dataadina
dataadina

Reputation: 29

error in Jupyter Notebooks but not in Mac Terminal or Visual Studio Code

I have the following code, which should do: take census data, clean it (keep only counties - column with SUMLEV==50, keep only the columns needed), set state column as index, sort states by county population, show only the first 3 counties by population in each state, add the population of those 3 counties, return as list the 3 most populous states per the population of the first 3 most populated counties.

The code works perfectly in Mac Terminal and VSC but throws an error in Coursera's Jupyter Notebooks. I tried restarting the Kernel, same thing. Any idea why?

Thank you.

import pandas as pd

census_df = pd.read_csv('census.csv')
census_df.head()

def answer_six():
    census = census_df[census_df['SUMLEV']==50] 
    colstokeep = ['STNAME', 'CTYNAME', 'CENSUS2010POP']
    census = census[colstokeep]
    census = census.set_index(['STNAME'])
    census = census.sort_values(['STNAME', 'CENSUS2010POP'], ascending= (True, False))
    census = census.groupby(level=0).head(3)
    final = census.groupby(['STNAME']).sum()
    final = final.sort_values(['CENSUS2010POP'], ascending=False)

    final_indexes = final.index.values.tolist()
    answ = final_indexes[:3]
    return answ

answer_six()
The error I get in JN:
KeyError                                  Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2133             try:
-> 2134                 return self._engine.get_loc(key)
   2135             except KeyError:

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()

KeyError: 'STNAME'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-12-5fdb76484a21> in <module>()
     14     return answ
     15 
---> 16 answer_six()

<ipython-input-12-5fdb76484a21> in answer_six()
      5     census = census[colstokeep]
      6     census = census.set_index(['STNAME'])
----> 7     census = census.sort_values(['STNAME', 'CENSUS2010POP'], ascending= (True, False))
      8     census = census.groupby(level=0).head(3)
      9     final = census.groupby(['STNAME']).sum()

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in sort_values(self, by, axis, ascending, inplace, kind, na_position)
   3216             keys = []
   3217             for x in by:
-> 3218                 k = self.xs(x, axis=other_axis).values
   3219                 if k.ndim == 2:
   3220                     raise ValueError('Cannot sort by duplicate column %s' %

/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py in xs(self, key, axis, level, drop_level)
   1768 
   1769         if axis == 1:
-> 1770             return self[key]
   1771 
   1772         self._consolidate_inplace()

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2057             return self._getitem_multilevel(key)
   2058         else:
-> 2059             return self._getitem_column(key)
   2060 
   2061     def _getitem_column(self, key):

/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2064         # get column
   2065         if self.columns.is_unique:
-> 2066             return self._get_item_cache(key)
   2067 
   2068         # duplicate columns & possible reduce dimensionality

/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1384         res = cache.get(item)
   1385         if res is None:
-> 1386             values = self._data.get(item)
   1387             res = self._box_item_values(item, values)
   1388             cache[item] = res

/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3541 
   3542             if not isnull(item):
-> 3543                 loc = self.items.get_loc(item)
   3544             else:
   3545                 indexer = np.arange(len(self.items))[isnull(self.items)]

/opt/conda/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2134                 return self._engine.get_loc(key)
   2135             except KeyError:
-> 2136                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2137 
   2138         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()

KeyError: 'STNAME'

Upvotes: 0

Views: 255

Answers (1)

Roy2012
Roy2012

Reputation: 12503

Here's the issue with your code:

colstokeep = ['STNAME', 'CTYNAME', 'CENSUS2010POP'] 
census = census[colstokeep]                              # Keep only some columns
census = census.set_index(['STNAME'])                    # turn STNAME into an index
                                                         # at this point, it's an 
                                                         # index and no longer a column

census = census.sort_values(['STNAME', 'CENSUS2010POP'], # now try to sort on a column that
                             ascending= (True, False))   # no longer exists - and 
                                                         # you get an error

To solve it, switch the two lines:

# first sort 
census = census.sort_values(['STNAME', 'CENSUS2010POP'], ascending= (True, False))
# then set the index
census = census.set_index(['STNAME'])

Upvotes: 1

Related Questions