Reputation: 29
I have the following code, which should do: take census data, clean it (keep only counties - column with SUMLEV==50, keep only the columns needed), set state column as index, sort states by county population, show only the first 3 counties by population in each state, add the population of those 3 counties, return as list the 3 most populous states per the population of the first 3 most populated counties.
The code works perfectly in Mac Terminal and VSC but throws an error in Coursera's Jupyter Notebooks. I tried restarting the Kernel, same thing. Any idea why?
Thank you.
import pandas as pd
census_df = pd.read_csv('census.csv')
census_df.head()
def answer_six():
census = census_df[census_df['SUMLEV']==50]
colstokeep = ['STNAME', 'CTYNAME', 'CENSUS2010POP']
census = census[colstokeep]
census = census.set_index(['STNAME'])
census = census.sort_values(['STNAME', 'CENSUS2010POP'], ascending= (True, False))
census = census.groupby(level=0).head(3)
final = census.groupby(['STNAME']).sum()
final = final.sort_values(['CENSUS2010POP'], ascending=False)
final_indexes = final.index.values.tolist()
answ = final_indexes[:3]
return answ
answer_six()
The error I get in JN:
KeyError Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2133 try:
-> 2134 return self._engine.get_loc(key)
2135 except KeyError:
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()
KeyError: 'STNAME'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-12-5fdb76484a21> in <module>()
14 return answ
15
---> 16 answer_six()
<ipython-input-12-5fdb76484a21> in answer_six()
5 census = census[colstokeep]
6 census = census.set_index(['STNAME'])
----> 7 census = census.sort_values(['STNAME', 'CENSUS2010POP'], ascending= (True, False))
8 census = census.groupby(level=0).head(3)
9 final = census.groupby(['STNAME']).sum()
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in sort_values(self, by, axis, ascending, inplace, kind, na_position)
3216 keys = []
3217 for x in by:
-> 3218 k = self.xs(x, axis=other_axis).values
3219 if k.ndim == 2:
3220 raise ValueError('Cannot sort by duplicate column %s' %
/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py in xs(self, key, axis, level, drop_level)
1768
1769 if axis == 1:
-> 1770 return self[key]
1771
1772 self._consolidate_inplace()
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2057 return self._getitem_multilevel(key)
2058 else:
-> 2059 return self._getitem_column(key)
2060
2061 def _getitem_column(self, key):
/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2064 # get column
2065 if self.columns.is_unique:
-> 2066 return self._get_item_cache(key)
2067
2068 # duplicate columns & possible reduce dimensionality
/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1384 res = cache.get(item)
1385 if res is None:
-> 1386 values = self._data.get(item)
1387 res = self._box_item_values(item, values)
1388 cache[item] = res
/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3541
3542 if not isnull(item):
-> 3543 loc = self.items.get_loc(item)
3544 else:
3545 indexer = np.arange(len(self.items))[isnull(self.items)]
/opt/conda/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2134 return self._engine.get_loc(key)
2135 except KeyError:
-> 2136 return self._engine.get_loc(self._maybe_cast_indexer(key))
2137
2138 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()
KeyError: 'STNAME'
Upvotes: 0
Views: 255
Reputation: 12503
Here's the issue with your code:
colstokeep = ['STNAME', 'CTYNAME', 'CENSUS2010POP']
census = census[colstokeep] # Keep only some columns
census = census.set_index(['STNAME']) # turn STNAME into an index
# at this point, it's an
# index and no longer a column
census = census.sort_values(['STNAME', 'CENSUS2010POP'], # now try to sort on a column that
ascending= (True, False)) # no longer exists - and
# you get an error
To solve it, switch the two lines:
# first sort
census = census.sort_values(['STNAME', 'CENSUS2010POP'], ascending= (True, False))
# then set the index
census = census.set_index(['STNAME'])
Upvotes: 1