Reputation: 31
Data:
data = pd.Series({
'2005-01': 14.80,
'2005-02': np.nan,
'2005-03': np.nan,
'2005-04': 14.83,
'2005-05': np.nan,
'2005-06': np.nan,
'2005-07': 15.14,
'2005-08': np.nan,
'2005-09': np.nan,
'2005-10': 15.03,
'2005-11': np.nan,
'2005-12': np.nan
})
Taget Data:
target_data = pd.Series({
'2005-01': 14.85,
'2005-02': 14.76,
'2005-03': 14.79,
'2005-04': 14.71,
'2005-05': 14.83,
'2005-06': 14.95,
'2005-07': 15.16,
'2005-08': 15.16,
'2005-09': 15.11,
'2005-10': 15.05,
'2005-11': 15.02,
'2005-12': 15.01
})
I'm using below function for interpolation but it's not matching with target, help me match it to it with max acceptable error being mismatch after 4 decimal point.
def interpolate(row):
row = row.replace("nan", np.nan).astype(float)
first_non_null_index = row.first_valid_index()
if first_non_null_index is not None:
non_null_values = row.loc[first_non_null_index:].dropna().values
if len(non_null_values) > 1:
original_quarterly_values = non_null_values
quarterly_positions = np.arange(0, len(non_null_values) * 3, 3)
original_length = len(row.loc[first_non_null_index:])
interpolation_positions = np.arange(original_length)
#used natural for boundry condition, could that be an issue?
cs = CubicSpline(quarterly_positions, non_null_values, bc_type="natural")
interpolated_values = cs(interpolation_positions)
months)
interpolated_quarterly_avgs = []
for i in range(0, len(interpolated_values), 3):
end_idx = min(i + 3, len(interpolated_values))
quarterly_avg = np.mean(interpolated_values[i:end_idx])
interpolated_quarterly_avgs.append(quarterly_avg)
differences = original_quarterly_values - np.array(interpolated_quarterly_avgs)
#making adjustments could this be an issue?
adjusted_values = interpolated_values.copy()
for i in range(len(differences)):
start_idx = i * 3
end_idx = min(start_idx + 3, len(interpolated_values))
adjusted_values[start_idx:end_idx] += differences[i]
adjusted_values = np.round(adjusted_values, 3)
row.loc[first_non_null_index:] = adjusted_values
return row
else:
print('Only one non-null value found - cannot interpolate')
return row
return row
Tried cubic spline for interpolation but not getting expected output, do I've to change adjustment or bc_type or even use different kind of interpolation techniques ?
Upvotes: 1
Views: 29