Shriram
Shriram

Reputation: 4931

How to plot object type data in Pandas?

I want to plot 'Date Read' with respect to 'Original Publication Year' using Pandas in Python.

Pandas Version = '0.16.2'

Data

books_read.dtypes

Title                         object
My Rating                      int64
Original Publication Year    float64
Date Read                     object
Exclusive Shelf               object
dtype: object

When I try to plot the same I get error " KeyError: 'Date Read' "

books_read.plot(kind='scatter',  x='Date Read', y='Original Publication Year')
plt.show()

KeyError                                  Traceback (most recent call last)
<ipython-input-31-169da65d723c> in <module>()
----> 1 books_read.plot(kind='scatter',  x='Date Read', y='Original Publication Year')
      2 plt.show()

C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2486                  yerr=yerr, xerr=xerr,
   2487                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 2488                  **kwds)
   2489 
   2490 

C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   2322         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   2323 
-> 2324     plot_obj.generate()
   2325     plot_obj.draw()
   2326     return plot_obj.result

C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in generate(self)
    912         self._compute_plot_data()
    913         self._setup_subplots()
--> 914         self._make_plot()
    915         self._add_table()
    916         self._make_legend()

C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _make_plot(self)
   1455         else:
   1456             label = None
-> 1457         scatter = ax.scatter(data[x].values, data[y].values, c=c_values,
   1458                              label=label, cmap=cmap, **self.kwds)
   1459         if cb:

C:\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   1795             return self._getitem_multilevel(key)
   1796         else:
-> 1797             return self._getitem_column(key)
   1798 
   1799     def _getitem_column(self, key):

C:\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
   1802         # get column
   1803         if self.columns.is_unique:
-> 1804             return self._get_item_cache(key)
   1805 
   1806         # duplicate columns & possible reduce dimensionaility

C:\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
   1082         res = cache.get(item)
   1083         if res is None:
-> 1084             values = self._data.get(item)
   1085             res = self._box_item_values(item, values)
   1086             cache[item] = res

C:\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
   2849 
   2850             if not isnull(item):
-> 2851                 loc = self.items.get_loc(item)
   2852             else:
   2853                 indexer = np.arange(len(self.items))[isnull(self.items)]

C:\Anaconda3\lib\site-packages\pandas\core\index.py in get_loc(self, key, method)
   1570         """
   1571         if method is None:
-> 1572             return self._engine.get_loc(_values_from_object(key))
   1573 
   1574         indexer = self.get_indexer([key], method=method)

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3824)()

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3704)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12280)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12231)()

KeyError: 'Date Read'

I'm I doing anything wrong here ? Do I need to convert the 'Date Read' to some other format here ?

[Edit1] I get the same error even after converting the 'Date Read' and 'Original Publication Year' to datetime.

books_read['Date Read'] = pd.to_datetime(books_read['Date Read'])
books_read['Original Publication Year'] = pd.to_datetime(books_read['Original Publication Year'])
books_read.plot(kind='scatter', x='Date Read', y='Original Publication Year')

KeyError: 'Date Read'

Upvotes: 1

Views: 12345

Answers (1)

Anton Protopopov
Anton Protopopov

Reputation: 31672

I'm not sure why it's not working with scatter because it works fine with line.. But alternatively you could plot that with matplotlib.pyplot:

import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame({"Original Publication Year": [2010, 1941, 2012, 2008, 2012, 2009, 2009, 1931, 2005, 2007], "Data Read" : ['12/1/2015', '11/23/2015', '10/26/2015', '10/24/2015', '9/26/2015', '9/24/2015', '8/26/2015', '8/9/2015','10/2/2015', '7/12/2015']})

df['Data Read'] = pd.to_datetime(df['Data Read'])
df.plot(kind='line', x='Data Read', y='Original Publication Year')

enter image description here

plt.scatter(df['Data Read'].values, df['Original Publication Year'])

enter image description here

Upvotes: 1

Related Questions