Reputation:
I have data like this and I want to fill the Na with previous row groupby the reg value. eg. the second row could be SHE and forth row be XMN, and since last two row got the same schedule_from, wo chose not to fill the Na in fifth row with SHA
reg | schedule_from | schedule_to |
---|---|---|
X-346 | CAN | SHE |
X-346 | NaN | ZUH |
A-583 | CTU | NaN |
A-583 | XMN | SZX |
T-777 | SHA | NaN |
T-777 | SHA | CVG |
I tired some coding, which also shown inside the error
KeyError Traceback (most recent call last)
<ipython-input-20-19798ba7d29d> in <module>
4 if asub['schedule_from'][ind] != asub['schedule_from'][ind-1]:
5 if (pd.isnull(asub['schedule_from'][ind])== True)&(pd.isnull(asub['schedule_to'][ind-1])==False):
----> 6 asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_from'][ind] == x['schedule_to'][ind-1])
7 # asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_to'][ind] == x['schedule_from'][ind+1])
~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
857 with option_context("mode.chained_assignment", None):
858 try:
--> 859 result = self._python_apply_general(f, self._selected_obj)
860 except TypeError:
861 # gh-20949
~\anaconda3\lib\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f, data)
890 data after applying f
891 """
--> 892 keys, values, mutated = self.grouper.apply(f, data, self.axis)
893
894 return self._wrap_applied_output(
~\anaconda3\lib\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis)
211 # group might be modified
212 group_axes = group.axes
--> 213 res = f(group)
214 if not _is_indexed_like(res, group_axes):
215 mutated = True
<ipython-input-20-19798ba7d29d> in <lambda>(x)
4 if asub['schedule_from'][ind] != asub['schedule_from'][ind-1]:
5 if (pd.isnull(asub['schedule_from'][ind])== True)&(pd.isnull(asub['schedule_to'][ind-1])==False):
----> 6 asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_from'][ind] == x['schedule_to'][ind-1])
7 # asub.groupby(['aircraft_registration_no']).apply(lambda x: x['schedule_to'][ind] == x['schedule_from'][ind+1])
~\anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
880
881 elif key_is_scalar:
--> 882 return self._get_value(key)
883
884 if is_hashable(key):
~\anaconda3\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
987
988 # Similar to Index.get_value, but we do not fall back to positional
--> 989 loc = self.index.get_loc(label)
990 return self.index._get_values_for_loc(self, loc, label)
991
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2895 return self._engine.get_loc(casted_key)
2896 except KeyError as err:
-> 2897 raise KeyError(key) from err
2898
2899 if tolerance is not None:
KeyError: 7
Upvotes: 2
Views: 717
Reputation: 5601
import numpy as np
def handle_group_reg(group):
# find the column is null to handle schedule_from and schedule_to is same
cond_is_from_null = group['schedule_from'].isnull()
cond_is_to_null = group['schedule_to' ].isnull()
# fill schedule_from with previous schedule_to
group['schedule_from'] = group['schedule_from'].combine_first(group['schedule_to'].shift(1))
# fill schedule_to with next schedule_from
group['schedule_to'] = group['schedule_to'].combine_first(group['schedule_from'].shift(-1))
# handle schedule_from and schedule_to is same
cond_is_from_to_same = group['schedule_from'] == group['schedule_to']
group.loc[(cond_is_from_null & cond_is_from_to_same), 'schedule_from'] = np.nan
group.loc[(cond_is_to_null & cond_is_from_to_same), 'schedule_to' ] = np.nan
return group
nan = np.nan
df = pd.DataFrame([{'reg': 'X-346', 'schedule_from': 'CAN', 'schedule_to': 'SHE'},
{'reg': 'X-346', 'schedule_from': nan, 'schedule_to': 'ZUH'},
{'reg': 'X-346', 'schedule_from': nan, 'schedule_to': 'SHA'},
{'reg': 'X-346', 'schedule_from': 'SHA', 'schedule_to': 'PEK'},
{'reg': 'X-346', 'schedule_from': 'PEK', 'schedule_to': nan},
{'reg': 'X-346', 'schedule_from': 'XMN', 'schedule_to': 'SHA'},
{'reg': 'A-583', 'schedule_from': 'CTU', 'schedule_to': nan},
{'reg': 'A-583', 'schedule_from': 'XMN', 'schedule_to': 'SZX'},
{'reg': 'T-777', 'schedule_from': 'SHA', 'schedule_to': nan},
{'reg': 'T-777', 'schedule_from': 'SHA', 'schedule_to': 'CVG'}])
dfn = df.groupby('reg').apply(handle_group_reg)
print(dfn.fillna(0))
# reg schedule_from schedule_to
# 0 X-346 CAN SHE
# 1 X-346 SHE ZUH
# 2 X-346 ZUH SHA
# 3 X-346 SHA PEK
# 4 X-346 PEK XMN
# 5 X-346 XMN SHA
# 6 A-583 CTU XMN
# 7 A-583 XMN SZX
# 8 T-777 SHA 0
# 9 T-777 SHA CVG
Upvotes: 1