fill row value in column A with previous row value in column B in pandas

Question

I have data like this and I want to fill the Na with previous row groupby the reg value. eg. the second row could be SHE and forth row be XMN, and since last two row got the same schedule_from, wo chose not to fill the Na in fifth row with SHA

reg	schedule_from	schedule_to
X-346	CAN	SHE
X-346	NaN	ZUH
A-583	CTU	NaN
A-583	XMN	SZX
T-777	SHA	NaN
T-777	SHA	CVG

I tired some coding, which also shown inside the error

    KeyError                                  Traceback (most recent call last)
 in 
      4         if asub['schedule_from'][ind] != asub['schedule_from'][ind-1]:
      5             if (pd.isnull(asub['schedule_from'][ind])== True)&(pd.isnull(asub['schedule_to'][ind-1])==False):
----> 6                 asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_from'][ind] == x['schedule_to'][ind-1])
      7            #     asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_to'][ind] == x['schedule_from'][ind+1])

~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
    857         with option_context("mode.chained_assignment", None):
    858             try:
--> 859                 result = self._python_apply_general(f, self._selected_obj)
    860             except TypeError:
    861                 # gh-20949

~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f, data)
    890             data after applying f
    891         """
--> 892         keys, values, mutated = self.grouper.apply(f, data, self.axis)
    893 
    894         return self._wrap_applied_output(

~\anaconda3\lib\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis)
    211             # group might be modified
    212             group_axes = group.axes
--> 213             res = f(group)
    214             if not _is_indexed_like(res, group_axes):
    215                 mutated = True

 in (x)
      4         if asub['schedule_from'][ind] != asub['schedule_from'][ind-1]:
      5             if (pd.isnull(asub['schedule_from'][ind])== True)&(pd.isnull(asub['schedule_to'][ind-1])==False):
----> 6                 asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_from'][ind] == x['schedule_to'][ind-1])
      7            #     asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_to'][ind] == x['schedule_from'][ind+1])

~\anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
    880 
    881         elif key_is_scalar:
--> 882             return self._get_value(key)
    883 
    884         if is_hashable(key):

~\anaconda3\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
    987 
    988         # Similar to Index.get_value, but we do not fall back to positional
--> 989         loc = self.index.get_loc(label)
    990         return self.index._get_values_for_loc(self, loc, label)
    991 

~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2895                 return self._engine.get_loc(casted_key)
   2896             except KeyError as err:
-> 2897                 raise KeyError(key) from err
   2898 
   2899         if tolerance is not None:

KeyError: 7

Ferris · Accepted Answer

import numpy as np
def handle_group_reg(group):
    # find the column is null to handle schedule_from and schedule_to is same
    cond_is_from_null = group['schedule_from'].isnull()
    cond_is_to_null   = group['schedule_to'  ].isnull()
    
    # fill schedule_from with previous schedule_to
    group['schedule_from'] = group['schedule_from'].combine_first(group['schedule_to'].shift(1))
    
    # fill schedule_to with next schedule_from
    group['schedule_to'] = group['schedule_to'].combine_first(group['schedule_from'].shift(-1))
    
    # handle schedule_from and schedule_to is same
    cond_is_from_to_same = group['schedule_from'] == group['schedule_to']
    group.loc[(cond_is_from_null & cond_is_from_to_same), 'schedule_from'] = np.nan
    group.loc[(cond_is_to_null   & cond_is_from_to_same), 'schedule_to'  ] = np.nan    
    
    return group


nan = np.nan
df = pd.DataFrame([{'reg': 'X-346', 'schedule_from': 'CAN', 'schedule_to': 'SHE'},
 {'reg': 'X-346', 'schedule_from': nan, 'schedule_to': 'ZUH'},
 {'reg': 'X-346', 'schedule_from': nan, 'schedule_to': 'SHA'},
 {'reg': 'X-346', 'schedule_from': 'SHA', 'schedule_to': 'PEK'},
 {'reg': 'X-346', 'schedule_from': 'PEK', 'schedule_to': nan},
 {'reg': 'X-346', 'schedule_from': 'XMN', 'schedule_to': 'SHA'},
 {'reg': 'A-583', 'schedule_from': 'CTU', 'schedule_to': nan},
 {'reg': 'A-583', 'schedule_from': 'XMN', 'schedule_to': 'SZX'},
 {'reg': 'T-777', 'schedule_from': 'SHA', 'schedule_to': nan},
 {'reg': 'T-777', 'schedule_from': 'SHA', 'schedule_to': 'CVG'}])
dfn = df.groupby('reg').apply(handle_group_reg)
print(dfn.fillna(0))

    #      reg schedule_from schedule_to
    # 0  X-346           CAN         SHE
    # 1  X-346           SHE         ZUH
    # 2  X-346           ZUH         SHA
    # 3  X-346           SHA         PEK
    # 4  X-346           PEK         XMN
    # 5  X-346           XMN         SHA
    # 6  A-583           CTU         XMN
    # 7  A-583           XMN         SZX
    # 8  T-777           SHA           0
    # 9  T-777           SHA         CVG

fill row value in column A with previous row value in column B in pandas

Answers (1)

Related Questions