Reputation: 4931
I want to plot 'Date Read' with respect to 'Original Publication Year' using Pandas in Python.
Pandas Version = '0.16.2'
books_read.dtypes
Title object
My Rating int64
Original Publication Year float64
Date Read object
Exclusive Shelf object
dtype: object
When I try to plot the same I get error " KeyError: 'Date Read' "
books_read.plot(kind='scatter', x='Date Read', y='Original Publication Year')
plt.show()
KeyError Traceback (most recent call last)
<ipython-input-31-169da65d723c> in <module>()
----> 1 books_read.plot(kind='scatter', x='Date Read', y='Original Publication Year')
2 plt.show()
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2486 yerr=yerr, xerr=xerr,
2487 secondary_y=secondary_y, sort_columns=sort_columns,
-> 2488 **kwds)
2489
2490
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
2322 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
2323
-> 2324 plot_obj.generate()
2325 plot_obj.draw()
2326 return plot_obj.result
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in generate(self)
912 self._compute_plot_data()
913 self._setup_subplots()
--> 914 self._make_plot()
915 self._add_table()
916 self._make_legend()
C:\Anaconda3\lib\site-packages\pandas\tools\plotting.py in _make_plot(self)
1455 else:
1456 label = None
-> 1457 scatter = ax.scatter(data[x].values, data[y].values, c=c_values,
1458 label=label, cmap=cmap, **self.kwds)
1459 if cb:
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1795 return self._getitem_multilevel(key)
1796 else:
-> 1797 return self._getitem_column(key)
1798
1799 def _getitem_column(self, key):
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1802 # get column
1803 if self.columns.is_unique:
-> 1804 return self._get_item_cache(key)
1805
1806 # duplicate columns & possible reduce dimensionaility
C:\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1082 res = cache.get(item)
1083 if res is None:
-> 1084 values = self._data.get(item)
1085 res = self._box_item_values(item, values)
1086 cache[item] = res
C:\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
2849
2850 if not isnull(item):
-> 2851 loc = self.items.get_loc(item)
2852 else:
2853 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\Anaconda3\lib\site-packages\pandas\core\index.py in get_loc(self, key, method)
1570 """
1571 if method is None:
-> 1572 return self._engine.get_loc(_values_from_object(key))
1573
1574 indexer = self.get_indexer([key], method=method)
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3824)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3704)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12280)()
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12231)()
KeyError: 'Date Read'
I'm I doing anything wrong here ? Do I need to convert the 'Date Read' to some other format here ?
[Edit1] I get the same error even after converting the 'Date Read' and 'Original Publication Year' to datetime.
books_read['Date Read'] = pd.to_datetime(books_read['Date Read'])
books_read['Original Publication Year'] = pd.to_datetime(books_read['Original Publication Year'])
books_read.plot(kind='scatter', x='Date Read', y='Original Publication Year')
KeyError: 'Date Read'
Upvotes: 1
Views: 12345
Reputation: 31672
I'm not sure why it's not working with scatter
because it works fine with line
.. But alternatively you could plot that with matplotlib.pyplot
:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"Original Publication Year": [2010, 1941, 2012, 2008, 2012, 2009, 2009, 1931, 2005, 2007], "Data Read" : ['12/1/2015', '11/23/2015', '10/26/2015', '10/24/2015', '9/26/2015', '9/24/2015', '8/26/2015', '8/9/2015','10/2/2015', '7/12/2015']})
df['Data Read'] = pd.to_datetime(df['Data Read'])
df.plot(kind='line', x='Data Read', y='Original Publication Year')
plt.scatter(df['Data Read'].values, df['Original Publication Year'])
Upvotes: 1