Reputation: 301
The scipy cross correlation function is simply not working for a specific 1d array and I cant figure out why. The code below demonstrate the problem, just try it with one trace and than the other.
This question is a bit related to cross correlation and Python cross-correlation not returning correct shift
#!/usr/bin/python3
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
def _main():
"""
trace = np.array([0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, # down the step
0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, # up the step
0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002]) # down the step
"""
trace = np.array([0.51231204949426460, 0.47472182808002383, 0.48806029762272723, 0.51352464310119930, 0.58506742537603330, 0.62993314829830390, 0.57657927012749040, 0.55369158834668990, 0.56255864527226200, 0.61576098682569510,
0.62955418648769630, 0.64236215760241170, 0.69063835641941580, 0.75073729780384960, 0.86896478361172370, 0.92216712516515690, 0.91329988783884970, 0.92807831604813670, 0.99113300320800610, 0.99999999999999990, 0.91527040506699960,
0.80098377331469030, 0.71723934679539750, 0.68275634764039450, 0.65812563395824950, 0.63250963159524040, 0.59999708953480900, 0.55172083058422660, 0.54975037348965490, 0.57011178351142090, 0.52807534544936740])
left_padded_trace = np.pad(trace, (10, 0), mode='constant', constant_values=trace.min())
center_padded_trace = np.pad(trace, (5, 5), mode='constant', constant_values=trace.min())
right_padded_trace = np.pad(trace, (0, 10), mode='constant', constant_values=trace.min())
correlation1 = signal.correlate(center_padded_trace, left_padded_trace, mode='full', method='fft')
correlation2 = signal.correlate(center_padded_trace, center_padded_trace, mode='full', method='fft')
correlation3 = signal.correlate(center_padded_trace, right_padded_trace, mode='full', method='fft')
corr_peak_index1 = np.argmax(correlation1)
corr_max1 = np.max(correlation1)
corr_peak_index2 = np.argmax(correlation2)
corr_max2 = np.max(correlation2)
corr_peak_index3 = np.argmax(correlation3)
corr_max3 = np.max(correlation3)
offset1 = corr_peak_index1-(center_padded_trace.size-1)
offset2 = corr_peak_index2-(center_padded_trace.size-1)
offset3 = corr_peak_index3-(center_padded_trace.size-1)
print("Corr1: {}, Corr2: {}, Corr3: {}".format(corr_peak_index1, corr_peak_index2, corr_peak_index3))
print("Offset1: {}, Offset2: {}, Offset3: {}".format(offset1, offset2, offset3))
plt.figure(1)
plt.subplot(311)
plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
range(offset1, left_padded_trace.size+offset1), left_padded_trace, 'b--',
range(0, correlation1.size), correlation1/corr_max1, 'g-',
[corr_peak_index1], [1], 'k+')
plt.subplot(312)
plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
range(offset2, center_padded_trace.size+offset2), center_padded_trace, 'b--',
range(0, correlation2.size), correlation2/corr_max2, 'g-',
[corr_peak_index2], [1], 'k+')
plt.subplot(313)
plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
range(offset3, right_padded_trace.size+offset3), right_padded_trace, 'b--',
range(0, correlation3.size), correlation3/corr_max3, 'g-',
[corr_peak_index3], [1], 'k+')
plt.show()
Since the shift added by padding is the same and the only difference is the change on the input trace, the results in terms of shift and alignment from the correlation should be the same but they are not.
For the first trace (more synthetic step) the correlations and offsets are: (1 is the left padded, 2 is the centered and 3 is the right padded)
For the second trace (more natural), the
Follows the plots:
See answer and comments from Paul Panzer below.
The problem is with the original code is with the non zero padding.
When shifting the array with non zero values, the cross correlation value gets increasingly higher and the peak gets affected. The following code and images demonstrates this effect:
trace = np.array([0.51231204949426460, 0.47472182808002383, 0.48806029762272723, 0.51352464310119930, 0.58506742537603330, 0.62993314829830390, 0.57657927012749040, 0.55369158834668990, 0.56255864527226200, 0.61576098682569510, 0.62955418648769630, 0.64236215760241170, 0.69063835641941580, 0.75073729780384960, 0.86896478361172370, 0.92216712516515690, 0.91329988783884970, 0.92807831604813670, 0.99113300320800610, 0.99999999999999990, 0.91527040506699960, 0.80098377331469030, 0.71723934679539750, 0.68275634764039450, 0.65812563395824950, 0.63250963159524040, 0.59999708953480900, 0.55172083058422660, 0.54975037348965490, 0.57011178351142090, 0.52807534544936740])
for padding_value in np.arange(0, trace.min(), trace.min()/10):
left_padded_trace = np.pad(trace, (10, 0), mode='constant', constant_values=padding_value)
center_padded_trace = np.pad(trace, (5, 5), mode='constant', constant_values=padding_value)
correlation = signal.correlate(center_padded_trace, left_padded_trace, mode='full', method='fft')
corr_peak_index = np.argmax(correlation)
plt.figure(2)
plt.subplot(211)
plt.title('Left Padded Trace')
plt.xticks([])
plt.plot(left_padded_trace)
plt.subplot(212)
plt.title('Centered Padded Trace')
plt.plot(center_padded_trace)
plt.figure(3)
plt.plot(range(0, correlation.size), correlation)
plt.plot([corr_peak_index], [correlation[corr_peak_index]], 'k+')
plt.show()
The results are presented below. One can see that, as the padding value increases, the correlation peak moves to the center.
Upvotes: 1
Views: 2219
Reputation: 53029
The difference is explained by the fact that you are padding with the minimum which is not zero in case of the second trace. As a consequence you cannot expect the peak just to shift with the offset. Instead you get the shifted peak curve plus a triangle that scales with the minimum.
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
def _main(offset=0, trace_idx=0):
trace = [np.array([0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, # down the step
0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, # up the step
0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002]), # down the step
np.array([0.51231204949426460, 0.47472182808002383, 0.48806029762272723, 0.51352464310119930, 0.58506742537603330, 0.62993314829830390, 0.57657927012749040, 0.55369158834668990, 0.56255864527226200, 0.61576098682569510,
0.62955418648769630, 0.64236215760241170, 0.69063835641941580, 0.75073729780384960, 0.86896478361172370, 0.92216712516515690, 0.91329988783884970, 0.92807831604813670, 0.99113300320800610, 0.99999999999999990, 0.91527040506699960,
0.80098377331469030, 0.71723934679539750, 0.68275634764039450, 0.65812563395824950, 0.63250963159524040, 0.59999708953480900, 0.55172083058422660, 0.54975037348965490, 0.57011178351142090, 0.52807534544936740])][trace_idx]
trace += offset - trace.min()
left_padded_trace = np.pad(trace, (10, 0), mode='constant', constant_values=trace.min())
center_padded_trace = np.pad(trace, (5, 5), mode='constant', constant_values=trace.min())
right_padded_trace = np.pad(trace, (0, 10), mode='constant', constant_values=trace.min())
correlation1 = signal.correlate(center_padded_trace, left_padded_trace, mode='full', method='fft')
correlation2 = signal.correlate(center_padded_trace, center_padded_trace, mode='full', method='fft')
correlation3 = signal.correlate(center_padded_trace, right_padded_trace, mode='full', method='fft')
corr_peak_index1 = np.argmax(correlation1)
corr_max1 = np.max(correlation1)
corr_peak_index2 = np.argmax(correlation2)
corr_max2 = np.max(correlation2)
corr_peak_index3 = np.argmax(correlation3)
corr_max3 = np.max(correlation3)
offset1 = corr_peak_index1-(center_padded_trace.size-1)
offset2 = corr_peak_index2-(center_padded_trace.size-1)
offset3 = corr_peak_index3-(center_padded_trace.size-1)
return offset1, offset2, offset3
print("Corr1: {}, Corr2: {}, Corr3: {}".format(corr_peak_index1, corr_peak_index2, corr_peak_index3))
print("Offset1: {}, Offset2: {}, Offset3: {}".format(offset1, offset2, offset3))
plt.figure(1)
plt.subplot(311)
plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
range(offset1, left_padded_trace.size+offset1), left_padded_trace, 'b--',
range(0, correlation1.size), correlation1/corr_max1, 'g-',
[corr_peak_index1], [1], 'k+')
plt.subplot(312)
plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
range(offset2, center_padded_trace.size+offset2), center_padded_trace, 'b--',
range(0, correlation2.size), correlation2/corr_max2, 'g-',
[corr_peak_index2], [1], 'k+')
plt.subplot(313)
plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
range(offset3, right_padded_trace.size+offset3), right_padded_trace, 'b--',
range(0, correlation3.size), correlation3/corr_max3, 'g-',
[corr_peak_index3], [1], 'k+')
plt.show()
x = np.arange(200)*0.01
y1 = np.array([*map(_main, x)])
y2 = np.array([*map(_main, x, np.ones(x.size,int))])
plt.figure(1)
plt.subplot(211)
plt.title('synthetic')
plt.plot(x,y1)
plt.legend(('left-shifted input', 'centered input', 'right-shifted input'))
plt.subplot(212)
plt.title('natural')
plt.plot(x,y2)
plt.ylabel('x-offset of result')
plt.xlabel('y-offset')
plt.savefig("summary.png")
Upvotes: 2
Reputation: 2139
Use mode=valid
scipy.signal.correlate(in1, in2, mode='valid', method='auto')
modestr {‘full’, ‘valid’, ‘same’}, optional
A string indicating the size of the output:
full The output is the full discrete linear cross-correlation of the inputs. (Default)
valid The output consists only of those elements that do not rely on the zero-padding. In >‘valid’ mode, either in1 or in2 must be at least as large as the other in every >dimension.
same The output is the same size as in1, centered with respect to the ‘full’ output.
Signal processing (scipy.signal.correlate)
Upvotes: 1