Reputation: 301

Why python scipy cross correlation is not working when padding one of the inputs

The scipy cross correlation function is simply not working for a specific 1d array and I cant figure out why. The code below demonstrate the problem, just try it with one trace and than the other.

This question is a bit related to cross correlation and Python cross-correlation not returning correct shift

#!/usr/bin/python3

import numpy as np
from scipy import signal
import matplotlib.pyplot as plt

def _main():
    """
    trace = np.array([0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, # down the step
                      0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, # up the step
                      0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002]) # down the step
    """
    trace = np.array([0.51231204949426460, 0.47472182808002383, 0.48806029762272723, 0.51352464310119930, 0.58506742537603330, 0.62993314829830390, 0.57657927012749040, 0.55369158834668990, 0.56255864527226200, 0.61576098682569510,
                      0.62955418648769630, 0.64236215760241170, 0.69063835641941580, 0.75073729780384960, 0.86896478361172370, 0.92216712516515690, 0.91329988783884970, 0.92807831604813670, 0.99113300320800610, 0.99999999999999990, 0.91527040506699960, 
                      0.80098377331469030, 0.71723934679539750, 0.68275634764039450, 0.65812563395824950, 0.63250963159524040, 0.59999708953480900, 0.55172083058422660, 0.54975037348965490, 0.57011178351142090, 0.52807534544936740])


    left_padded_trace = np.pad(trace, (10, 0), mode='constant', constant_values=trace.min())
    center_padded_trace = np.pad(trace, (5, 5), mode='constant', constant_values=trace.min())
    right_padded_trace = np.pad(trace, (0, 10), mode='constant', constant_values=trace.min())

    correlation1 = signal.correlate(center_padded_trace, left_padded_trace, mode='full', method='fft')
    correlation2 = signal.correlate(center_padded_trace, center_padded_trace, mode='full', method='fft')
    correlation3 = signal.correlate(center_padded_trace, right_padded_trace, mode='full', method='fft')

    corr_peak_index1 = np.argmax(correlation1)
    corr_max1 = np.max(correlation1)

    corr_peak_index2 = np.argmax(correlation2)
    corr_max2 = np.max(correlation2)

    corr_peak_index3 = np.argmax(correlation3)
    corr_max3 = np.max(correlation3)

    offset1 = corr_peak_index1-(center_padded_trace.size-1)
    offset2 = corr_peak_index2-(center_padded_trace.size-1)
    offset3 = corr_peak_index3-(center_padded_trace.size-1)

    print("Corr1: {}, Corr2: {}, Corr3: {}".format(corr_peak_index1, corr_peak_index2, corr_peak_index3))
    print("Offset1: {}, Offset2: {}, Offset3: {}".format(offset1, offset2, offset3))

    plt.figure(1)

    plt.subplot(311)
    plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
            range(offset1, left_padded_trace.size+offset1), left_padded_trace, 'b--',
            range(0, correlation1.size), correlation1/corr_max1, 'g-',
            [corr_peak_index1], [1], 'k+')

    plt.subplot(312)
    plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
            range(offset2, center_padded_trace.size+offset2), center_padded_trace, 'b--',
            range(0, correlation2.size), correlation2/corr_max2, 'g-',
            [corr_peak_index2], [1], 'k+')

    plt.subplot(313)
    plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
            range(offset3, right_padded_trace.size+offset3), right_padded_trace, 'b--',
            range(0, correlation3.size), correlation3/corr_max3, 'g-',
            [corr_peak_index3], [1], 'k+')

    plt.show()

Since the shift added by padding is the same and the only difference is the change on the input trace, the results in terms of shift and alignment from the correlation should be the same but they are not.

For the first trace (more synthetic step) the correlations and offsets are: (1 is the left padded, 2 is the centered and 3 is the right padded)

Corr1: 35, Corr2: 40, Corr3: 45
Offset1: -5, Offset2: 0, Offset3: 5

For the second trace (more natural), the

Corr1: 40, Corr2: 40, Corr3: 40
Offset1: 0, Offset2: 0, Offset3: 0

Follows the plots:

Plot for the synthetic trace (first in the code)
Plot for the natural trace (second in the code)

Solution

See answer and comments from Paul Panzer below.

The problem is with the original code is with the non zero padding.

When shifting the array with non zero values, the cross correlation value gets increasingly higher and the peak gets affected. The following code and images demonstrates this effect:

    trace = np.array([0.51231204949426460, 0.47472182808002383, 0.48806029762272723, 0.51352464310119930, 0.58506742537603330, 0.62993314829830390, 0.57657927012749040, 0.55369158834668990, 0.56255864527226200, 0.61576098682569510, 0.62955418648769630, 0.64236215760241170, 0.69063835641941580, 0.75073729780384960, 0.86896478361172370, 0.92216712516515690, 0.91329988783884970, 0.92807831604813670, 0.99113300320800610, 0.99999999999999990, 0.91527040506699960, 0.80098377331469030, 0.71723934679539750, 0.68275634764039450, 0.65812563395824950, 0.63250963159524040, 0.59999708953480900, 0.55172083058422660, 0.54975037348965490, 0.57011178351142090, 0.52807534544936740])

    for padding_value in np.arange(0, trace.min(), trace.min()/10):
        left_padded_trace = np.pad(trace, (10, 0), mode='constant', constant_values=padding_value)
        center_padded_trace = np.pad(trace, (5, 5), mode='constant', constant_values=padding_value)

        correlation = signal.correlate(center_padded_trace, left_padded_trace, mode='full', method='fft')

        corr_peak_index = np.argmax(correlation)

        plt.figure(2)
        plt.subplot(211)
        plt.title('Left Padded Trace')
        plt.xticks([])
        plt.plot(left_padded_trace)
        plt.subplot(212)
        plt.title('Centered Padded Trace')
        plt.plot(center_padded_trace)

        plt.figure(3)
        plt.plot(range(0, correlation.size), correlation)
        plt.plot([corr_peak_index], [correlation[corr_peak_index]], 'k+')

    plt.show()

The results are presented below. One can see that, as the padding value increases, the correlation peak moves to the center.

Traces with different padding levels, from 0 up to the minimum
- Correlation values and peak

Upvotes: 1

Answers (2)

Paul Panzer

Reputation: 53029

The difference is explained by the fact that you are padding with the minimum which is not zero in case of the second trace. As a consequence you cannot expect the peak just to shift with the offset. Instead you get the shifted peak curve plus a triangle that scales with the minimum.

import numpy as np
from scipy import signal
import matplotlib.pyplot as plt

def _main(offset=0, trace_idx=0):
    trace = [np.array([0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, # down the step
                      0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, 0.99999999999999999, 0.99999999999999998, # up the step
                       0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002, 0.00000000000000001, 0.00000000000000002]), # down the step
    np.array([0.51231204949426460, 0.47472182808002383, 0.48806029762272723, 0.51352464310119930, 0.58506742537603330, 0.62993314829830390, 0.57657927012749040, 0.55369158834668990, 0.56255864527226200, 0.61576098682569510,
                      0.62955418648769630, 0.64236215760241170, 0.69063835641941580, 0.75073729780384960, 0.86896478361172370, 0.92216712516515690, 0.91329988783884970, 0.92807831604813670, 0.99113300320800610, 0.99999999999999990, 0.91527040506699960, 
                      0.80098377331469030, 0.71723934679539750, 0.68275634764039450, 0.65812563395824950, 0.63250963159524040, 0.59999708953480900, 0.55172083058422660, 0.54975037348965490, 0.57011178351142090, 0.52807534544936740])][trace_idx]

    trace += offset - trace.min()

    left_padded_trace = np.pad(trace, (10, 0), mode='constant', constant_values=trace.min())
    center_padded_trace = np.pad(trace, (5, 5), mode='constant', constant_values=trace.min())
    right_padded_trace = np.pad(trace, (0, 10), mode='constant', constant_values=trace.min())

    correlation1 = signal.correlate(center_padded_trace, left_padded_trace, mode='full', method='fft')
    correlation2 = signal.correlate(center_padded_trace, center_padded_trace, mode='full', method='fft')
    correlation3 = signal.correlate(center_padded_trace, right_padded_trace, mode='full', method='fft')

    corr_peak_index1 = np.argmax(correlation1)
    corr_max1 = np.max(correlation1)

    corr_peak_index2 = np.argmax(correlation2)
    corr_max2 = np.max(correlation2)

    corr_peak_index3 = np.argmax(correlation3)
    corr_max3 = np.max(correlation3)

    offset1 = corr_peak_index1-(center_padded_trace.size-1)
    offset2 = corr_peak_index2-(center_padded_trace.size-1)
    offset3 = corr_peak_index3-(center_padded_trace.size-1)

    return offset1, offset2, offset3

    print("Corr1: {}, Corr2: {}, Corr3: {}".format(corr_peak_index1, corr_peak_index2, corr_peak_index3))
    print("Offset1: {}, Offset2: {}, Offset3: {}".format(offset1, offset2, offset3))

    plt.figure(1)

    plt.subplot(311)
    plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
            range(offset1, left_padded_trace.size+offset1), left_padded_trace, 'b--',
            range(0, correlation1.size), correlation1/corr_max1, 'g-',
            [corr_peak_index1], [1], 'k+')

    plt.subplot(312)
    plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
            range(offset2, center_padded_trace.size+offset2), center_padded_trace, 'b--',
            range(0, correlation2.size), correlation2/corr_max2, 'g-',
            [corr_peak_index2], [1], 'k+')

    plt.subplot(313)
    plt.plot(range(0, center_padded_trace.size), center_padded_trace, 'r-',
            range(offset3, right_padded_trace.size+offset3), right_padded_trace, 'b--',
            range(0, correlation3.size), correlation3/corr_max3, 'g-',
            [corr_peak_index3], [1], 'k+')

    plt.show()


x = np.arange(200)*0.01
y1 = np.array([*map(_main, x)])

y2 = np.array([*map(_main, x, np.ones(x.size,int))])

plt.figure(1)
plt.subplot(211)
plt.title('synthetic')
plt.plot(x,y1)
plt.legend(('left-shifted input', 'centered input', 'right-shifted input'))
plt.subplot(212)
plt.title('natural')
plt.plot(x,y2)
plt.ylabel('x-offset of result')
plt.xlabel('y-offset')
plt.savefig("summary.png")

Upvotes: 2

Mahsa Hassankashi

Reputation: 2139

Use mode=valid

scipy.signal.correlate(in1, in2, mode='valid', method='auto')
modestr {‘full’, ‘valid’, ‘same’}, optional

A string indicating the size of the output:

full The output is the full discrete linear cross-correlation of the inputs. (Default)

valid The output consists only of those elements that do not rely on the zero-padding. In >‘valid’ mode, either in1 or in2 must be at least as large as the other in every >dimension.

same The output is the same size as in1, centered with respect to the ‘full’ output.

Signal processing (scipy.signal.correlate)

Upvotes: 1

Why python scipy cross correlation is not working when padding one of the inputs

Solution

Answers (2)

Related Questions