Reputation: 11743
I want to make a new column of type boolean
based on the value of 4 other columns. I have a function is_proximal
that takes two 2-tuples (the 4 values) and returns a boolean.
I am passing columns from a pandas DataFrame to this function. The is_proximal
function in turn calls geopy.distance.distance
with the arguments.
def is_proximal(p1, p2, exact=True):
dist = distance(p1, p2)
if exact:
return dist.miles < 0.75 # mile threshold
return dist.m < 100 # meter threshold
airbnb_coords = (df.loc[:, "lat_airbnb"], df.loc[:, "long_airbnb"])
homeaway_coords = (df.loc[:, "lat_homeaway"], df.loc[:, "long_homeaway"])
exacts.loc[:, "proximal"] = is_proximal(airbnb_coords, homeaway_coords)
This results in the following error:
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
I'm having trouble understanding why this error is occurring. What changes would I need to make to accomplish what I'm trying to do?
The expected output is an additional column of type boolean
. The input dataframe df
contains integer values in all columns.
The full traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-663-435de26b3cfa> in <module>
----> 1 m = filter_geographic_proximity(beds)
~/src/exemption_project/src/match.py in filter_geographic_proximity(df)
53 airbnb_coords = (exacts.loc[:, "lat_airbnb"], exacts.loc[:, "long_airbnb"])
54 homeaway_coords = (exacts.loc[:, "lat_homeaway"], exacts.loc[:, "long_homeaway"])
---> 55 exacts.loc[:, "proximal"] = is_proximal(airbnb_coords, homeaway_coords)
56
57 airbnb_coords = (inexacts.loc[:, "lat_airbnb"], inexacts.loc[:, "long_airbnb"])
~/src/exemption_project/src/match.py in is_proximal(p1, p2, exact)
29 def filter_geographic_proximity(df):
30 def is_proximal(p1, p2, exact=True):
---> 31 dist = distance(p1, p2)
32
33 if exact:
~/.local/share/virtualenvs/exemption_project-xI6bzvA1/lib/python3.7/site-packages/geopy/distance.py in __init__(self, *args, **kwargs)
387 kwargs.pop('iterations', 0)
388 major, minor, f = self.ELLIPSOID
--> 389 super(geodesic, self).__init__(*args, **kwargs)
390
391 def set_ellipsoid(self, ellipsoid):
~/.local/share/virtualenvs/exemption_project-xI6bzvA1/lib/python3.7/site-packages/geopy/distance.py in __init__(self, *args, **kwargs)
162 elif len(args) > 1:
163 for a, b in util.pairwise(args):
--> 164 kilometers += self.measure(a, b)
165
166 kilometers += units.kilometers(**kwargs)
~/.local/share/virtualenvs/exemption_project-xI6bzvA1/lib/python3.7/site-packages/geopy/distance.py in measure(self, a, b)
408 # Call geographiclib routines for measure and destination
409 def measure(self, a, b):
--> 410 a, b = Point(a), Point(b)
411 lat1, lon1 = a.latitude, a.longitude
412 lat2, lon2 = b.latitude, b.longitude
~/.local/share/virtualenvs/exemption_project-xI6bzvA1/lib/python3.7/site-packages/geopy/point.py in __new__(cls, latitude, longitude, altitude)
163 )
164 else:
--> 165 return cls.from_sequence(seq)
166
167 if single_arg:
~/.local/share/virtualenvs/exemption_project-xI6bzvA1/lib/python3.7/site-packages/geopy/point.py in from_sequence(cls, seq)
403 raise ValueError('When creating a Point from sequence, it '
404 'must not have more than 3 items.')
--> 405 return cls(*args)
406
407 @classmethod
~/.local/share/virtualenvs/exemption_project-xI6bzvA1/lib/python3.7/site-packages/geopy/point.py in __new__(cls, latitude, longitude, altitude)
176
177 latitude, longitude, altitude = \
--> 178 _normalize_coordinates(latitude, longitude, altitude)
179
180 self = super(Point, cls).__new__(cls)
~/.local/share/virtualenvs/exemption_project-xI6bzvA1/lib/python3.7/site-packages/geopy/point.py in _normalize_coordinates(latitude, longitude, altitude)
57
58 def _normalize_coordinates(latitude, longitude, altitude):
---> 59 latitude = float(latitude or 0.0)
60 longitude = float(longitude or 0.0)
61 altitude = float(altitude or 0.0)
~/.local/share/virtualenvs/exemption_project-xI6bzvA1/lib/python3.7/site-packages/pandas/core/generic.py in __nonzero__(self)
1476 raise ValueError("The truth value of a {0} is ambiguous. "
1477 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
-> 1478 .format(self.__class__.__name__))
1479
1480 __bool__ = __nonzero__
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
Upvotes: 1
Views: 6730
Reputation: 402323
From the traceback, it is clear that the error is being raised in the distance
function that is_proximal
is calling internally. This leads me to believe you're passing Series objects when the function is meant to be working with scalar data.
See the discussion in Truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all() where the use of python logical operators on pandas Series causes the same error.
In your case, the solution is to iterate over your data, and pass each group of co-ordinates to your function one at a time.
df['proximal'] = [
is_proximal((a, b), (c, d))
for a, b, c, d in df[['lat_x', 'long_x', 'lat_y', 'long_y']].values
]
Upvotes: 1