user24902761
user24902761

Reputation: 1

python definition that use try and except loop to create a column using the apply funtion

I am trying to make a definition to calculate the difference between two dates. For the the date columns there are null values. The definition should be able to ignore these null values and state that these null values don't meet the required format, using the e.args funtion.

I have created the below code to see if this is possible.

def date_difference(dataframe):
    dataframe['Permit Creation Date'] = dataframe['Permit Creation Date'].astype(str)
    dataframe['Issued Date'] = dataframe['Issued Date'].astype(str)
    dataframe['Issued Date'] = dataframe['Issued Date'].fillna(0)
    dataframe['Permit Creation Date'] = dataframe['Permit Creation Date'].fillna(0)
    for x, y in zip(dataframe['Issued Date'], dataframe['Permit Creation Date']):
        try:
            d1 = datetime.datetime.strptime(x, "%Y-%m-%d")
            d2 = datetime.datetime.strptime(y, "%Y-%m-%d")
            print (abs(d1-d2).days)
        except Exception as e:
            #pass
            #print(e.args)
            pass
            print(e.args)

I used apply funtion to see if the definition I made can be used to make a new column (date difference) in my dataframe.

type1['Date difference'] = type1.apply(date_difference(type1))

type1

output


525
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)
("time data 'NaT' does not match format '%Y-%m-%d'",)

Error output:

TypeError                                 Traceback (most recent call last)
Cell In[30], line 1
----> 1 type1['Permit Type'] = type1.apply(date_difference(type1))
      2 type1

File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:10362, in DataFrame.apply(self, func, axis, raw, result_type, args, by_row, engine, engine_kwargs, **kwargs)
  10182 """
  10183 Apply a function along an axis of the DataFrame.
  10184 
   (...)
  10358 2  1  2
  10359 """
  10360 from pandas.core.apply import frame_apply
> 10362 op = frame_apply(
  10363     self,
  10364     func=func,
  10365     axis=axis,
  10366     raw=raw,
  10367     result_type=result_type,
  10368     by_row=by_row,
  10369     engine=engine,
  10370     engine_kwargs=engine_kwargs,
  10371     args=args,
  10372     kwargs=kwargs,
  10373 )
  10374 return op.apply().__finalize__(self, method="apply")

File ~\anaconda3\Lib\site-packages\pandas\core\apply.py:102, in frame_apply(obj, func, axis, raw, result_type, by_row, engine, engine_kwargs, args, kwargs)
     99 elif axis == 1:
    100     klass = FrameColumnApply
--> 102 _, func, _, _ = reconstruct_func(func, **kwargs)
    103 assert func is not None
    105 return klass(
    106     obj,
    107     func,
   (...)
    114     kwargs=kwargs,
    115 )

File ~\anaconda3\Lib\site-packages\pandas\core\apply.py:1696, in reconstruct_func(func, **kwargs)
   1690         raise SpecificationError(
   1691             "Function names must be unique if there is no new column names "
   1692             "assigned"
   1693         )
   1694     if func is None:
   1695         # nicer error message
-> 1696         raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")
   1698 if relabeling:
   1699     # error: Incompatible types in assignment (expression has type
   1700     # "MutableMapping[Hashable, list[Callable[..., Any] | str]]", variable has type
   1701     # "Callable[..., Any] | str | list[Callable[..., Any] | str] |
   1702     # MutableMapping[Hashable, Callable[..., Any] | str | list[Callable[..., Any] |
   1703     # str]] | None")
   1704     func, columns, order = normalize_keyword_aggregation(  # type: ignore[assignment]
   1705         kwargs
   1706     )

TypeError: Must provide 'func' or tuples of '(column, aggfunc).

I understand that return should be used instead of print() when making a definition but when I use return, the output, I only receive the date differnce for one row.

def date_difference(dataframe):
    dataframe['Permit Creation Date'] = dataframe['Permit Creation Date'].astype(str)
    dataframe['Issued Date'] = dataframe['Issued Date'].astype(str)
    dataframe['Issued Date'] = dataframe['Issued Date'].fillna(0)
    dataframe['Permit Creation Date'] = dataframe['Permit Creation Date'].fillna(0)
    for x, y in zip(dataframe['Issued Date'], dataframe['Permit Creation Date']):
            d1 = datetime.datetime.strptime(x, "%Y-%m-%d")
            d2 = datetime.datetime.strptime(y, "%Y-%m-%d")
            return abs(d1-d2).days
    else:
            #pass
            #print(e.args)
            pass
date_difference(type4)

output:

187

Upvotes: -8

Views: 61

Answers (0)

Related Questions