Reputation: 39
When I try to import a geojson file and convert it into a dataframe, the issue appears, I want a dataframe with columns: Zipcode, Latitude, Longitude. Here are my codes:
import urllib.request, json
import pandas as pd
with urllib.request.urlopen("http://bostonopendata-boston.opendata.arcgis.com/datasets/53ea466a189b4f43b3dfb7b38fa7f3b6_1.geojson") as url:
wuppertal_data = json.loads(url.read().decode())
neighborhoods_data = wuppertal_data['features']
results = pd.DataFrame()
for data in neighborhoods_data:
zipcode = data['properties']['ZIP5']
temp_df = pd.DataFrame(data['geometry']['coordinates'])
temp_df = temp_df.T
temp_df = pd.DataFrame(temp_df.iloc[:,0].tolist(), columns=['Latitude', 'Longitude'])
temp_df['Zipcode'] = zipcode
results = results.append(temp_df).reset_index(drop=True)
Result:
AssertionError Traceback (most recent call last)
D:\PYTHON3.7\lib\site-packages\pandas\core\internals\construction.py in _list_to_arrays(data, columns, coerce_float, dtype)
496 result = _convert_object_array(
--> 497 content, columns, dtype=dtype, coerce_float=coerce_float
498 )
D:\PYTHON3.7\lib\site-packages\pandas\core\internals\construction.py in _convert_object_array(content, columns, coerce_float, dtype)
580 raise AssertionError(
--> 581 f"{len(columns)} columns passed, passed data had "
582 f"{len(content)} columns"
AssertionError: 2 columns passed, passed data had 1170 columns
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-82-b1c5869e9ca3> in <module>
14 temp_df = pd.DataFrame(data['geometry']['coordinates'])
15 temp_df = temp_df.T
---> 16 temp_df = pd.DataFrame(temp_df.iloc[:,0].tolist(), columns=['Latitude', 'Longitude'])
17
18 temp_df['Neighborhood'] = neighborhood_name
D:\PYTHON3.7\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
472 if is_named_tuple(data[0]) and columns is None:
473 columns = data[0]._fields
--> 474 arrays, columns = to_arrays(data, columns, dtype=dtype)
475 columns = ensure_index(columns)
476
D:\PYTHON3.7\lib\site-packages\pandas\core\internals\construction.py in to_arrays(data, columns, coerce_float, dtype)
459 return [], [] # columns if columns is not None else []
460 if isinstance(data[0], (list, tuple)):
--> 461 return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype)
462 elif isinstance(data[0], abc.Mapping):
463 return _list_of_dict_to_arrays(
D:\PYTHON3.7\lib\site-packages\pandas\core\internals\construction.py in _list_to_arrays(data, columns, coerce_float, dtype)
498 )
499 except AssertionError as e:
--> 500 raise ValueError(e) from e
501 return result
502
ValueError: 2 columns passed, passed data had 1170 columns
I don't quite understand the error. Can anyone help me out? I don't know what part is wrong.
Upvotes: 0
Views: 822
Reputation: 13387
The problem was, that temp_df.iloc[:,0]
sometimes had more than 2 columns - in which case it was throwing an error, since you indexed only 2 of them - so to limit number of read columns from pd.Series
to 2
just do: temp_df.iloc[:,0].str[:2]
instead.
Full code:
import urllib.request, json
import pandas as pd
with urllib.request.urlopen("http://bostonopendata-boston.opendata.arcgis.com/datasets/53ea466a189b4f43b3dfb7b38fa7f3b6_1.geojson") as url:
wuppertal_data = json.loads(url.read().decode())
neighborhoods_data = wuppertal_data['features']
results = pd.DataFrame()
for data in neighborhoods_data:
zipcode = data['properties']['ZIP5']
temp_df = pd.DataFrame(data['geometry']['coordinates'])
temp_df = temp_df.T
temp_df = pd.DataFrame(temp_df.iloc[:,0].str[:2].tolist(), columns=['Latitude', 'Longitude'])
temp_df['Zipcode'] = zipcode
results = results.append(temp_df).reset_index(drop=True)
Upvotes: 1