user16286965
user16286965

Reputation:

fill row value in column A with previous row value in column B in pandas

I have data like this and I want to fill the Na with previous row groupby the reg value. eg. the second row could be SHE and forth row be XMN, and since last two row got the same schedule_from, wo chose not to fill the Na in fifth row with SHA

reg schedule_from schedule_to
X-346 CAN SHE
X-346 NaN ZUH
A-583 CTU NaN
A-583 XMN SZX
T-777 SHA NaN
T-777 SHA CVG

I tired some coding, which also shown inside the error

    KeyError                                  Traceback (most recent call last)
<ipython-input-20-19798ba7d29d> in <module>
      4         if asub['schedule_from'][ind] != asub['schedule_from'][ind-1]:
      5             if (pd.isnull(asub['schedule_from'][ind])== True)&(pd.isnull(asub['schedule_to'][ind-1])==False):
----> 6                 asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_from'][ind] == x['schedule_to'][ind-1])
      7            #     asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_to'][ind] == x['schedule_from'][ind+1])

~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
    857         with option_context("mode.chained_assignment", None):
    858             try:
--> 859                 result = self._python_apply_general(f, self._selected_obj)
    860             except TypeError:
    861                 # gh-20949

~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f, data)
    890             data after applying f
    891         """
--> 892         keys, values, mutated = self.grouper.apply(f, data, self.axis)
    893 
    894         return self._wrap_applied_output(

~\anaconda3\lib\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis)
    211             # group might be modified
    212             group_axes = group.axes
--> 213             res = f(group)
    214             if not _is_indexed_like(res, group_axes):
    215                 mutated = True

<ipython-input-20-19798ba7d29d> in <lambda>(x)
      4         if asub['schedule_from'][ind] != asub['schedule_from'][ind-1]:
      5             if (pd.isnull(asub['schedule_from'][ind])== True)&(pd.isnull(asub['schedule_to'][ind-1])==False):
----> 6                 asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_from'][ind] == x['schedule_to'][ind-1])
      7            #     asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_to'][ind] == x['schedule_from'][ind+1])

~\anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
    880 
    881         elif key_is_scalar:
--> 882             return self._get_value(key)
    883 
    884         if is_hashable(key):

~\anaconda3\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
    987 
    988         # Similar to Index.get_value, but we do not fall back to positional
--> 989         loc = self.index.get_loc(label)
    990         return self.index._get_values_for_loc(self, loc, label)
    991 

~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2895                 return self._engine.get_loc(casted_key)
   2896             except KeyError as err:
-> 2897                 raise KeyError(key) from err
   2898 
   2899         if tolerance is not None:

KeyError: 7

Upvotes: 2

Views: 717

Answers (1)

Ferris
Ferris

Reputation: 5601

import numpy as np
def handle_group_reg(group):
    # find the column is null to handle schedule_from and schedule_to is same
    cond_is_from_null = group['schedule_from'].isnull()
    cond_is_to_null   = group['schedule_to'  ].isnull()
    
    # fill schedule_from with previous schedule_to
    group['schedule_from'] = group['schedule_from'].combine_first(group['schedule_to'].shift(1))
    
    # fill schedule_to with next schedule_from
    group['schedule_to'] = group['schedule_to'].combine_first(group['schedule_from'].shift(-1))
    
    # handle schedule_from and schedule_to is same
    cond_is_from_to_same = group['schedule_from'] == group['schedule_to']
    group.loc[(cond_is_from_null & cond_is_from_to_same), 'schedule_from'] = np.nan
    group.loc[(cond_is_to_null   & cond_is_from_to_same), 'schedule_to'  ] = np.nan    
    
    return group


nan = np.nan
df = pd.DataFrame([{'reg': 'X-346', 'schedule_from': 'CAN', 'schedule_to': 'SHE'},
 {'reg': 'X-346', 'schedule_from': nan, 'schedule_to': 'ZUH'},
 {'reg': 'X-346', 'schedule_from': nan, 'schedule_to': 'SHA'},
 {'reg': 'X-346', 'schedule_from': 'SHA', 'schedule_to': 'PEK'},
 {'reg': 'X-346', 'schedule_from': 'PEK', 'schedule_to': nan},
 {'reg': 'X-346', 'schedule_from': 'XMN', 'schedule_to': 'SHA'},
 {'reg': 'A-583', 'schedule_from': 'CTU', 'schedule_to': nan},
 {'reg': 'A-583', 'schedule_from': 'XMN', 'schedule_to': 'SZX'},
 {'reg': 'T-777', 'schedule_from': 'SHA', 'schedule_to': nan},
 {'reg': 'T-777', 'schedule_from': 'SHA', 'schedule_to': 'CVG'}])
dfn = df.groupby('reg').apply(handle_group_reg)
print(dfn.fillna(0))

    #      reg schedule_from schedule_to
    # 0  X-346           CAN         SHE
    # 1  X-346           SHE         ZUH
    # 2  X-346           ZUH         SHA
    # 3  X-346           SHA         PEK
    # 4  X-346           PEK         XMN
    # 5  X-346           XMN         SHA
    # 6  A-583           CTU         XMN
    # 7  A-583           XMN         SZX
    # 8  T-777           SHA           0
    # 9  T-777           SHA         CVG

Upvotes: 1

Related Questions