Reputation: 21
This is my code:
def mask(mask_cols):
"""
This function masks specific columns of the pupil dataframe.
"""
masked_df = pd.read_csv(con.CLEANED_PUPIL_GAZE)
masked_df = mask_pupil_first_derivative(masked_df, threshold=3.0,
mask_cols=['diameter', 'diameter_3d'])
return masked_df
def smooth(columns_to_smooth):
"""
This function smooths two columns of the interpolated pupil dataframe.
:param columns_to_smooth: diameter and diamete 3d columns.
"""
original_df = mask(mask_cols=['diameter', 'diameter_3d'])
smooth_df = original_df
logging.basicConfig(filename='filtering_errors.log', level=logging.ERROR)
print(smooth_df)
print("Data will be now be smoothed with a low-pass BW filter.")
nyquist = 0.5 * con.sample_rate_ET
cutoff_high = 4.0 # Example: Upper bound for smoothing
cutoff_low = 0.01 # Example: Lower bound for smoothing
# Calculate normalized cutoff frequencies
normalized_cutoff_low = cutoff_low / nyquist
normalized_cutoff_high = cutoff_high / nyquist
for column in columns_to_smooth:
try:
# Check for NaNs before filtering
if smooth_df[column].isnull().any():
raise ValueError(f"NaN values detected in column {column} before filtering")
# Apply Butterworth filter to the selected column
b, a = butter(N=3, Wn=[normalized_cutoff_low, normalized_cutoff_high],
btype='bandpass', fs=con.sample_rate_ET)
smooth_values = filtfilt(b, a, smooth_df[column])
# Check for NaNs after filtering
if np.isnan(smooth_values).any():
raise ValueError(f"NaN values detected after filtering column {column}")
# Update the column in smooth_df with the filtered values
smooth_df[column] = smooth_values
print(smooth_values)
except ValueError as e:
print(f"Error: {e}")
# Handle the error, e.g., log the error, skip this column, or set NaNs as appropriate
# Save the smoothed data to a new CSV file
smooth_df.to_csv(con.SMOOTHED_PUPIL, index=False)
# Plot the frequency response of the filter for the current column
w, h = freqs(b, a)
plt.semilogx(w, 20 * np.log10(abs(h)))
plt.title(f'Butterworth filter frequency response for column {column}')
plt.xlabel('Frequency [radians / second]')
plt.ylabel('Amplitude [dB]')
plt.margins(0, 0.1)
plt.grid(which='both', axis='both')
plt.axvline(100, color='green') # cutoff frequency
plt.show()
# Assuming 'original_df' contains the original data before smoothing and 'smooth_df' contains the smoothed data
# Plot original and smoothed data for comparison
plt.plot(original_df['pupil_timestamp'], original_df['diameter'], label='Original')
plt.plot(smooth_df['pupil_timestamp'], smooth_df['diameter'], label='Smoothed')
plt.xlabel('Timestamp')
plt.ylabel('Diameter')
plt.title('Comparison of Original and Smoothed Data')
plt.legend()
plt.show()
# Calculate summary statistics
original_mean = original_df['diameter'].mean()
smoothed_mean = smooth_df['diameter'].mean()
original_std = original_df['diameter'].std()
smoothed_std = smooth_df['diameter'].std()
# Print summary statistics
print(f"Original Mean: {original_mean}, Smoothed Mean: {smoothed_mean}")
print(f"Original Standard Deviation: {original_std}, Smoothed Standard Deviation: {smoothed_std}")
I am working with eye-tracking data with an average sampling rate of 120 Hz (but it could be different for each dataset). The original and smoothed dataset have the same mean and standard deviation and the graphs are overlaid.
There is no smoothing going on. What should I do?
Upvotes: 0
Views: 34
Reputation: 86
As @mkrieger1 pointed out you are not making a deep copy of the pandas data frame. Which is why you are facing this issue. If you refer to this post you know that your logic should have been
smoothed_df = original_df.copy(deep=True)
This should ensure that your answers are different. Else you are calculating statistics from the same data frame.
Upvotes: 0