Reputation: 947
I would like to piece together a new list which is a string using two columns of a numpy array. However, I can't seem to get this to work without looping through each element:
import numpy as np
test_list = np.tile(np.array([[1,2],[3,4],[5,6]]),(100000,1))
print(test_list[:,0])
print(test_list[:,1])
def dumbstring(points):
# Loop through and append a list
string_pnts = []
for x in points:
string_pnts.append("X co-ordinate is %g and y is %g" % (x[0], x[1]))
return string_pnts
def dumbstring2(points):
# Prefill a list
string_pnts = [""] * len(points)
i = 0
for x in points:
string_pnts[i] = ("X co-ordinate is %g and y is %g" % (x[0], x[1]))
i += 1
return string_pnts
def numpystring(points):
return ("X co-ordinate is %g and y is %g" % (points[:,0], points[:,1]))
def numpystring2(point_x, point_y):
return ("X co-ordinate is %g and y is %g" % (point_x, point_y))
The first two work (I would have thought pre-filling would be faster than appending but it seems the same):
%timeit tdumbstring = dumbstring(test_list) # 239ms
%timeit tdumbstring2 = dumbstring2(test_list) # 239ms
However, the last do not - I wonder is there no way to vectorise this function then?
tnumpystring = numpystring(test_list) # Error
tnumpystring2 = numpystring2(test_list[:,0],test_list[:,1]) # Error
Edit:
I tried Pandas as I don't actually need Numpy, however it was a bit slower:
import pandas as pd
df = pd.DataFrame(test_list)
df.columns = ['x','y']
% time pdtest = ("X co-ordinate is " + df.x.map(str) + " and y is " + df.y.map(str)).tolist()
print(test[:5])
I also tried mapping but that was also slower than looping through np:
def mappy(pt_x,pt_y):
return("X co-ordinate is %g and y is %g" % (pt_x, pt_y))
%time mtest1 = list(map(lambda x: mappy(x[0],x[1]),test_list))
print(mtest1[:5])
Timings:
Upvotes: 1
Views: 482
Reputation: 8207
Here's a solution using numpy.core.defchararray.add
, first set your type to str
.
from numpy.core.defchararray import add
test_list = np.tile(np.array([[1,2],[3,4],[5,6]]),(100000,1)).astype(str)
def stringy_arr(points):
return add(add('X coordinate is ', points[:,0]),add(' and y coordinate is ', points[:,1]))
slightly faster timing:
%timeit stringy_arr(test_list)
1 loops, best of 3: 216 ms per loop
array(['X coordinate is 1 and y coordinate is 2',
'X coordinate is 3 and y coordinate is 4',
'X coordinate is 5 and y coordinate is 6', ...,
'X coordinate is 1 and y coordinate is 2',
'X coordinate is 3 and y coordinate is 4',
'X coordinate is 5 and y coordinate is 6'],
dtype='|S85')
# Previously tried functions
%time dumbstring(test_list)
1 loops, best of 3: 340 ms per loop
%timeit tdumbstring2 = dumbstring2(test_list)
1 loops, best of 3: 320 ms per loop
%time mtest1 = list(map(lambda x: mappy(x[0],x[1]),test_list))
1 loops, best of 3: 340 ms per loop
EDIT
You could also just use pure python with comprehension, much faster than my first proposed solution:
test_list = np.tile(np.array([[1,2],[3,4],[5,6]]),(10000000,1)).astype(str) #10M
test_list = test_list.tolist()
def comp(points):
return ['X coordinate is %s Y coordinate is %s' % (x,y) for x,y in points]
%timeit comp(test_list)
1 loops, best of 3: 6.53 s per loop
['X coordinate is 1 Y coordinate is 2',
'X coordinate is 3 Y coordinate is 4',
'X coordinate is 5 Y coordinate is 6',
'X coordinate is 1 Y coordinate is 2',
'X coordinate is 3 Y coordinate is 4',
'X coordinate is 5 Y coordinate is 6',...
%timeit dumbstring(test_list)
1 loops, best of 3: 30.7 s per loop
Upvotes: 1