Reputation: 2693
I'm trying to merge two dataframes (call them df1 and df2) of different lengths which are both indexed by their dates. The longer of the dfs (df1) has all the dates listed in the shorter of the two (df2). I've tried to combine them using the following command: merged = df2.merge(df1, on='Date')
, however I get the following errors which I don't understand when I try to do so.
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-47-e8d3e1ec920d> in <module>()
----> 1 merged = df2.merge(df1, on='Date')
/usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)
3630 left_on=left_on, right_on=right_on,
3631 left_index=left_index, right_index=right_index, sort=sort,
-> 3632 suffixes=suffixes, copy=copy)
3633
3634 #----------------------------------------------------------------------
/usr/lib/python2.7/dist-packages/pandas/tools/merge.pyc in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy)
37 right_on=right_on, left_index=left_index,
38 right_index=right_index, sort=sort, suffixes=suffixes,
---> 39 copy=copy)
40 return op.get_result()
41 if __debug__:
/usr/lib/python2.7/dist-packages/pandas/tools/merge.pyc in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy)
181 (self.left_join_keys,
182 self.right_join_keys,
--> 183 self.join_names) = self._get_merge_keys()
184
185 def get_result(self):
/usr/lib/python2.7/dist-packages/pandas/tools/merge.pyc in _get_merge_keys(self)
324 else:
325 if not is_rkey(rk):
--> 326 right_keys.append(right[rk].values)
327 if lk == rk:
328 # avoid key upcast in corner case (length-0)
/usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in __getitem__(self, key)
1656 return self._getitem_multilevel(key)
1657 else:
-> 1658 return self._getitem_column(key)
1659
1660 def _getitem_column(self, key):
/usr/lib/python2.7/dist-packages/pandas/core/frame.pyc in _getitem_column(self, key)
1663 # get column
1664 if self.columns.is_unique:
-> 1665 return self._get_item_cache(key)
1666
1667 # duplicate columns & possible reduce dimensionaility
/usr/lib/python2.7/dist-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
1003 res = cache.get(item)
1004 if res is None:
-> 1005 values = self._data.get(item)
1006 res = self._box_item_values(item, values)
1007 cache[item] = res
/usr/lib/python2.7/dist-packages/pandas/core/internals.pyc in get(self, item)
2872 return self.get_for_nan_indexer(indexer)
2873
-> 2874 _, block = self._find_block(item)
2875 return block.get(item)
2876 else:
/usr/lib/python2.7/dist-packages/pandas/core/internals.pyc in _find_block(self, item)
3184
3185 def _find_block(self, item):
-> 3186 self._check_have(item)
3187 for i, block in enumerate(self.blocks):
3188 if item in block:
/usr/lib/python2.7/dist-packages/pandas/core/internals.pyc in _check_have(self, item)
3191 def _check_have(self, item):
3192 if item not in self.items:
-> 3193 raise KeyError('no item named %s' % com.pprint_thing(item))
3194
3195 def reindex_axis(self, new_axis, indexer=None, method=None, axis=0,
KeyError: u'no item named Date'
I've also tried dropping the on='Date'
as both are already indexed by the date but the result seems the same. Any idea as to where I might be going wrong?
Upvotes: 4
Views: 7262
Reputation: 13747
I think it is most naturally to use join
because it merges on indexes by default. So something like the following:
merged = df2.join(df1,rsuffix='_y')
Add the rsuffix='_y'
because you have common column names in both dataframes.
Upvotes: 4