Reputation: 2014
I am attempting to convert to a geopandas
dataframe and set geometry
column.
Sample df:
state_fips_code tract_ce tract_geom
6 576700 POLYGON((-118.169788 33.74847, -118.167625 33.747597, -118.165187 33.747139, -118.165741 33.739433, -118.162769 33.739184, -118.162638 33.73965, -118.162508 33.740117, -118.161791 33.742436, -118.161766 33.742512, -118.160742 33.746281, -118.160714 33.746387, -118.159262 33.751741, -118.158781 33.753463, -118.157518 33.757987, -118.156954 33.760101, -118.156884 33.760362, -118.156531 33.761425, -118.15616 33.76175, -118.155959 33.762416, -118.15577 33.763042, -118.155603 33.763596, -118.155444 33.764124, -118.155268 33.764727, -118.154575 33.764584, -118.154574 33.765026, -118.15458 33.766035, -118.154581 33.766266, -118.154584 33.767057, -118.154589 33.767951, -118.15459 33.768073, -118.155696 33.768074, -118.155904 33.768074, -118.156762 33.768076, -118.157844 33.768077, -118.158646 33.768078, -118.158925 33.768079, -118.159534 33.76808, -118.159557 33.76808, -118.160268 33.76808, -118.160584 33.768081, -118.161666 33.768076, -118.161683 33.768076, -118.162772 33.768078, -118.162938 33.768078, -118.163854 33.768079, -118.16421 33.76808, -118.164738 33.768074, -118.165013 33.768097, -118.165136 33.768129, -118.165204 33.768161, -118.165299 33.768232, -118.165379 33.768324, -118.165389 33.768291, -118.165424 33.768173, -118.165457 33.768062, -118.165463 33.768042, -118.165621 33.767529, -118.16568 33.767337, -118.165806 33.766926, -118.16599 33.766328, -118.166149 33.765794, -118.166308 33.76528, -118.166632 33.764199, -118.166191 33.764109, -118.166348 33.763579, -118.16643 33.763468, -118.166608 33.762587, -118.166728 33.761999, -118.16676 33.761851, -118.166769 33.761802, -118.166792 33.761705, -118.16816 33.755431, -118.168649 33.753374, -118.169788 33.74847))
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
from shapely.geometry import Polygon
from shapely import wkt
# Find and drop rows with bad geometry data
i = 0
geom = []
dlst = []
for g in df['tract_geom']:
i = i + 1
try:
geom.append(wkt.loads(g))
except:
dlst.append(i)
df_geo.drop(dlst, inplace=True)
# # Convert to GeoDataFrame
df['geometry'] = df['tract_geom'].apply(wkt.loads)
df = gpd.GeoDataFrame(df, geometry='tract_geom')
Traceback:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/var/folders/d0/gnksqzwn2fn46fjgrkp6045c0000gn/T/ipykernel_50055/100886836.py in <module>
2 #df['geometry'] = df['tract_geom'].apply(wkt.loads)
----> 3 df = gpd.GeoDataFrame(df, geometry='tract_geom')
/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/geopandas/geodataframe.py in __init__(self, data, geometry, crs, *args, **kwargs)
182 _crs_mismatch_warning()
183 # TODO: raise error in 0.9 or 0.10.
--> 184 self.set_geometry(geometry, inplace=True)
185
186 if geometry is None and crs:
TypeError: Input must be valid geometry objects: POLYGON((-118.169788 33.74847, -118.167625 33.747597, -118.165187 33.747139, -118.165741 33.739433, -118.162769 33.739184, -118.162638 33.73965, -118.162508 33.740117, -118.161791 33.742436, -118.161766 33.742512, -118.160742 33.746281, -118.160714 33.746387, -118.159262 33.751741, -118.158781 33.753463, -118.157518 33.757987, -118.156954 33.760101, -118.156884 33.760362, -118.156531 33.761425, -118.15616 33.76175, -118.155959 33.762416, -118.15577 33.763042, -118.155603 33.763596, -118.155444 33.764124, -118.155268 33.764727, -118.154575 33.764584, -118.154574 33.765026, -118.15458 33.766035, -118.154581 33.766266, -118.154584 33.767057, -118.154589 33.767951, -118.15459 33.768073, -118.155696 33.768074, -118.155904 33.768074, -118.156762 33.768076, -118.157844 33.768077, -118.158646 33.768078, -118.158925 33.768079, -118.159534 33.76808, -118.159557 33.76808, -118.160268 33.76808, -118.160584 33.768081, -118.161666 33.768076, -118.161683 33.768076, -118.162772 33.768078, -118.162938 33.768078, -118.163854 33.768079, -118.16421 33.76808, -118.164738 33.768074, -118.165013 33.768097, -118.165136 33.768129, -118.165204 33.768161, -118.165299 33.768232, -118.165379 33.768324, -118.165389 33.768291, -118.165424 33.768173, -118.165457 33.768062, -118.165463 33.768042, -118.165621 33.767529, -118.16568 33.767337, -118.165806 33.766926, -118.16599 33.766328, -118.166149 33.765794, -118.166308 33.76528, -118.166632 33.764199, -118.166191 33.764109, -118.166348 33.763579, -118.16643 33.763468, -118.166608 33.762587, -118.166728 33.761999, -118.16676 33.761851, -118.166769 33.761802, -118.166792 33.761705, -118.16816 33.755431, -118.168649 33.753374, -118.169788 33.74847))
Upvotes: 2
Views: 6031
Reputation: 13242
I did all this, and then realized you just have a typo.
df['geometry'] = df['tract_geom'].apply(wkt.loads)
Should be:
df['tract_geom'] = df['tract_geom'].apply(wkt.loads)
import pandas as pd
import geopandas as gp
import numpy as np
from shapely.wkt import loads
d = {'state_fips_code': {0: 6},
'tract_ce': {0: 576700},
'tract_geom': {0: 'POLYGON((-118.169788 33.74847, -118.167625 33.747597, -118.165187 33.747139, -118.165741 33.739433, -118.162769 33.739184, -118.162638 33.73965, -118.162508 33.740117, -118.161791 33.742436, -118.161766 33.742512, -118.160742 33.746281, -118.160714 33.746387, -118.159262 33.751741, -118.158781 33.753463, -118.157518 33.757987, -118.156954 33.760101, -118.156884 33.760362, -118.156531 33.761425, -118.15616 33.76175, -118.155959 33.762416, -118.15577 33.763042, -118.155603 33.763596, -118.155444 33.764124, -118.155268 33.764727, -118.154575 33.764584, -118.154574 33.765026, -118.15458 33.766035, -118.154581 33.766266, -118.154584 33.767057, -118.154589 33.767951, -118.15459 33.768073, -118.155696 33.768074, -118.155904 33.768074, -118.156762 33.768076, -118.157844 33.768077, -118.158646 33.768078, -118.158925 33.768079, -118.159534 33.76808, -118.159557 33.76808, -118.160268 33.76808, -118.160584 33.768081, -118.161666 33.768076, -118.161683 33.768076, -118.162772 33.768078, -118.162938 33.768078, -118.163854 33.768079, -118.16421 33.76808, -118.164738 33.768074, -118.165013 33.768097, -118.165136 33.768129, -118.165204 33.768161, -118.165299 33.768232, -118.165379 33.768324, -118.165389 33.768291, -118.165424 33.768173, -118.165457 33.768062, -118.165463 33.768042, -118.165621 33.767529, -118.16568 33.767337, -118.165806 33.766926, -118.16599 33.766328, -118.166149 33.765794, -118.166308 33.76528, -118.166632 33.764199, -118.166191 33.764109, -118.166348 33.763579, -118.16643 33.763468, -118.166608 33.762587, -118.166728 33.761999, -118.16676 33.761851, -118.166769 33.761802, -118.166792 33.761705, -118.16816 33.755431, -118.168649 33.753374, -118.169788 33.74847))'}}
def load_valid(geo):
try:
return loads(geo)
except:
return np.nan
df = pd.DataFrame(d)
df.tract_geom = df.tract_geom.apply(load_valid)
gdf = gp.GeoDataFrame(df.dropna(), geometry='tract_geom')
print(gdf, gdf.length, sep='\n\n') # (A geometric function)
Output:
state_fips_code tract_ce \
0 6 576700
tract_geom
0 POLYGON ((-118.16979 33.74847, -118.16763 33.7...
0 0.07811
dtype: float64
Upvotes: 4