Reputation: 95
I have a nonuniformly sampled data that I am trying to apply a Gaussian filter to. I am using python's numpy library to solve this. The data is of XY type, here is how it looks like:
[[ -0.96 390.63523024]
[ -1.085 390.68523024]
[ -1.21 390.44023023]
...
[-76.695 390.86023024]
[-77.105 392.51023024]
[-77.155 392.10023024]]
And here is a link to the whole *.npz file.
Here is my approach:
Here is how code looks like:
import numpy as np
import matplotlib.pyplot as plt
xy = np.load('1D_data.npz')['arr_0']
def g_func(xx, w=1.0):
a = 0.47 * w
return (1 / a) * np.exp((xx / a) ** 2 * (-np.pi))
x, y, x_, y_ = xy[:, 0], xy[:, 1], [], []
counter, xi, ww = 0, x[0], 1.0
while xi > np.amin(x):
curr_x = x[(x < xi) & (x >= xi - 2 * ww)]
g, ysel = [], []
for i, els in enumerate(curr_x):
xil = els - curr_x[0] + abs(curr_x[0] - curr_x[-1]) / 2
g.append(g_func(xil, ww))
ysel.append(y[counter + i])
y_.append(np.sum(np.multiply(g, ysel)) / len(g))
x_.append(xi)
counter += 1
xi = x[counter]
plt.plot(x, y, '-k')
plt.plot(x_, y_, '-r')
plt.show()
The output doesn't look right though. (See the fig below) Even if discarding the edges, the convolution is very noisy and the values do not seem to correspond to the data. What am I possibly doing wrong?
Upvotes: 2
Views: 3112
Reputation: 7676
You made one mistake in your code:
Before multiplying g
with y_sel
, y_sel
is not centered.
The reason why y_sel
should be centered is because we want to add the relative differences weighted by the Gaussian to the entry at the center. If you multiply g
with y_sel
directly, not just the values of the neighboring entries within the window, but also the value of the center entry will be weighted by the Gaussian. This will definitely change the function values dramatically.
Below is my solution using numpy
def g_func(xx, w=1.0):
mean = np.mean(xx)
a = 0.47 * w
return (1 / a) * np.exp(((xx-mean) / a) ** 2 * (-np.pi))
def get_convolution(array,half_window_size):
array = np.concatenate((np.repeat(array[0],half_window_size),
array,
np.repeat(array[-1],half_window_size)))
window_inds = [list(range(ind-half_window_size,ind+half_window_size+1)) \
for ind in range(half_window_size,len(array)-half_window_size)]
return np.take(array,window_inds)
xy = np.load('1D_data.npz')['arr_0']
x, y = xy[:, 0], xy[:, 1]
half_window_size = 4
x_conv = np.apply_along_axis(g_func,axis=1,arr=get_convolution(x,half_window_size=half_window_size))
y_conv = get_convolution(y,half_window_size=half_window_size)
y_mean = np.mean(y_conv,axis=1)
y_centered = y_conv - y_mean[:,None]
smoothed = np.sum(x_conv*y_centered,axis=1) / (half_window_size*2) + y_mean
fig,ax = plt.subplots(figsize=(10,6))
ax.plot(x, y, '-k')
ax.plot(x, smoothed, '-r')
running the code, the output is
UPDATE
In order to unify w
with half_window_size
, here is one possibility, the idea is to let the standard deviation of the Gaussian to be 2*half_window_size
def g_func(xx):
std = len(xx)
mean = np.mean(xx)
return 1 / (std*np.sqrt(2*np.pi)) * np.exp(-1/2*((xx-mean)/std)**2)
def get_convolution(array,half_window_size):
array = np.concatenate((np.repeat(array[0],half_window_size),
array,
np.repeat(array[-1],half_window_size)))
window_inds = [list(range(ind-half_window_size,ind+half_window_size+1)) \
for ind in range(half_window_size,len(array)-half_window_size)]
return np.take(array,window_inds)
xy = np.load('1D_data.npz')['arr_0']
x, y = xy[:, 0], xy[:, 1]
half_window_size = 4
x_conv = np.apply_along_axis(g_func,axis=1,arr=get_convolution(x,half_window_size=half_window_size))
y_conv = get_convolution(y,half_window_size=half_window_size)
y_mean = np.mean(y_conv,axis=1)
y_centered = y_conv - y_mean[:,None]
smoothed = np.sum(x_conv*y_centered,axis=1) / (half_window_size*2) + y_mean
fig,ax = plt.subplots(figsize=(10,6))
ax.plot(x, y, '-k')
ax.plot(x, smoothed, '-r')
Upvotes: 1