Reputation: 1
I am trying to make a definition to calculate the difference between two dates. For the the date columns there are null values. The definition should be able to ignore these null values and state that these null values don't meet the required format, using the e.args funtion.
I have created the below code to see if this is possible.
def date_difference(dataframe):
dataframe['Permit Creation Date'] = dataframe['Permit Creation Date'].astype(str)
dataframe['Issued Date'] = dataframe['Issued Date'].astype(str)
dataframe['Issued Date'] = dataframe['Issued Date'].fillna(0)
dataframe['Permit Creation Date'] = dataframe['Permit Creation Date'].fillna(0)
for x, y in zip(dataframe['Issued Date'], dataframe['Permit Creation Date']):
try:
d1 = datetime.datetime.strptime(x, "%Y-%m-%d")
d2 = datetime.datetime.strptime(y, "%Y-%m-%d")
print (abs(d1-d2).days)
except Exception as e:
#pass
#print(e.args)
pass
print(e.args)
I used apply funtion to see if the definition I made can be used to make a new column (date difference) in my dataframe.
type1['Date difference'] = type1.apply(date_difference(type1))
type1
output
525
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
Error output:
TypeError Traceback (most recent call last)
Cell In[30], line 1
----> 1 type1['Permit Type'] = type1.apply(date_difference(type1))
2 type1
File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:10362, in DataFrame.apply(self, func, axis, raw, result_type, args, by_row, engine, engine_kwargs, **kwargs)
10182 """
10183 Apply a function along an axis of the DataFrame.
10184
(...)
10358 2 1 2
10359 """
10360 from pandas.core.apply import frame_apply
> 10362 op = frame_apply(
10363 self,
10364 func=func,
10365 axis=axis,
10366 raw=raw,
10367 result_type=result_type,
10368 by_row=by_row,
10369 engine=engine,
10370 engine_kwargs=engine_kwargs,
10371 args=args,
10372 kwargs=kwargs,
10373 )
10374 return op.apply().__finalize__(self, method="apply")
File ~\anaconda3\Lib\site-packages\pandas\core\apply.py:102, in frame_apply(obj, func, axis, raw, result_type, by_row, engine, engine_kwargs, args, kwargs)
99 elif axis == 1:
100 klass = FrameColumnApply
--> 102 _, func, _, _ = reconstruct_func(func, **kwargs)
103 assert func is not None
105 return klass(
106 obj,
107 func,
(...)
114 kwargs=kwargs,
115 )
File ~\anaconda3\Lib\site-packages\pandas\core\apply.py:1696, in reconstruct_func(func, **kwargs)
1690 raise SpecificationError(
1691 "Function names must be unique if there is no new column names "
1692 "assigned"
1693 )
1694 if func is None:
1695 # nicer error message
-> 1696 raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")
1698 if relabeling:
1699 # error: Incompatible types in assignment (expression has type
1700 # "MutableMapping[Hashable, list[Callable[..., Any] | str]]", variable has type
1701 # "Callable[..., Any] | str | list[Callable[..., Any] | str] |
1702 # MutableMapping[Hashable, Callable[..., Any] | str | list[Callable[..., Any] |
1703 # str]] | None")
1704 func, columns, order = normalize_keyword_aggregation( # type: ignore[assignment]
1705 kwargs
1706 )
TypeError: Must provide 'func' or tuples of '(column, aggfunc).
I understand that return should be used instead of print() when making a definition but when I use return, the output, I only receive the date differnce for one row.
def date_difference(dataframe):
dataframe['Permit Creation Date'] = dataframe['Permit Creation Date'].astype(str)
dataframe['Issued Date'] = dataframe['Issued Date'].astype(str)
dataframe['Issued Date'] = dataframe['Issued Date'].fillna(0)
dataframe['Permit Creation Date'] = dataframe['Permit Creation Date'].fillna(0)
for x, y in zip(dataframe['Issued Date'], dataframe['Permit Creation Date']):
d1 = datetime.datetime.strptime(x, "%Y-%m-%d")
d2 = datetime.datetime.strptime(y, "%Y-%m-%d")
return abs(d1-d2).days
else:
#pass
#print(e.args)
pass
date_difference(type4)
output:
187
Upvotes: -8
Views: 61