ticofiz
ticofiz

Reputation: 107

gaussian fitting not working using Python

I am trying to fit on columns this matrix data using this Python code:

#!/usr/local/bin/env python
import numpy as np
import Tkinter #Used for file import
import tkFileDialog #Used for file import
import os
import scipy
import scipy.optimize as optimize


root = Tkinter.Tk()
root.withdraw() #use to hide tkinter window

filename = os.getcwd()
background = os.getcwd()
filename = tkFileDialog.askopenfile(parent=root,mode='rb',title='Choose a file') 
background = tkFileDialog.askopenfile(parent=root,mode='rb',title='Choose a background') 
filename = filename.name
#filename = r'bb1e03'
#background = r'bb1e03_background'


T0 = np.loadtxt(filename, unpack=False)
bg = np.loadtxt(background, unpack=False)

T = T0-bg # background subtraction?
#T = T.clip(min=0)
T[T<0]=0
T = np.flipud(T)
N, M = T.shape
datax = np.arange(N)


def gaussian(x, height, center, width, offset):
    return height*np.exp(-(x - center)**2/(2*width**2)) + offset

def three_gaussians(x, h1, c1, w1, h2, c2, w2, h3, c3, w3, offset):
    return (gaussian(x, h1, c1, w1, offset=0) +
        gaussian(x, h2, c2, w2, offset=0) +
        gaussian(x, h3, c3, w3, offset=0) + offset)

def two_gaussians(x, h1, c1, w1, h2, c2, w2, offset):
    return three_gaussians(x, h1, c1, w1, h2, c2, w2, 0,0,1, offset)

def one_gaussian(x,h1,c1,w1, offset):
    return (gaussian(x, h1, c1, w1, offset=0)+offset)

#errfunc3 = lambda p, x, y: (three_gaussians(x, *p) - y)**2
#errfunc2 = lambda p, x, y: (two_gaussians(x, *p) - y)**2
#errfunc1 = lambda p, x, y: (one_gaussian(x, *p) - y)**2
 #output files for fit parameters
outfile1 = open('results_1gau.txt', 'w')
outfile2 = open('results_2gau.txt', 'w')
outfile3 = open('results_3gau.txt', 'w')
outfile1.write('column\th1\tc1\tw1\toffset\n')
outfile2.write('column\th1\tc1\tw1\th2\tc2\tw2\toffset\n')
outfile3.write('column\th1\tc1\tw1\th2\tc2\tw2\th3\tc3\tw3\toffset\n')

# new matrices for fitted data
datafit1 = np.empty_like(T)
datafit2 = np.empty_like(T)
datafit3 = np.empty_like(T)

for n in xrange(M):

    Mmax = T[:,n].max()
    guess1 = [0.5*Mmax, N/10., 10., 0.]
    guess2 = [0.5*Mmax, N/10., 10., 0.5*Mmax, N/10., 10., 0.]
    guess3 = [0.5*Mmax, N/10., 10., 0.5*Mmax, N/10., 10.,
              0.5*Mmax, N/10., 10., 0]
    #optim3, success = optimize.leastsq(errfunc3, guess3[:],
    #                                   args=(datax, data[:,n]))
    #optim2, success = optimize.leastsq(errfunc2, guess2[:],
    #                                   args=(datax, data[:,n]))

    try:
        optim1, pcov = optimize.curve_fit(one_gaussian, datax, T[:,n], guess1)
    except:
        optim1 = [0, 0, 1, 0]

    try:
        optim2, pcov = optimize.curve_fit(two_gaussians, datax, T[:,n], guess2)
    except:
        optim2 = [0, 0, 1, 0, 0, 1, 0]

    try:
        optim3, pcov = optimize.curve_fit(three_gaussians, datax, T[:,n], guess3)
    except:
        optim3 = [0, 0, 1, 0, 0, 1, 0, 0, 1, 0]

   # write parameters to file (1 gau)
    s = '{}'.format(n)
    for x in guess1:
        s += '\t{:g}'.format(x)
    outfile1.write(s + '\n')

   # write parameters to file (2 gau)
    s = '{}'.format(n)
    for x in guess2:
        s += '\t{:g}'.format(x)
    outfile2.write(s + '\n')

    # write parameters to file (3 gau)
    s = '{}'.format(n)
    for x in guess3:
        s += '\t{:g}'.format(x)
    outfile3.write(s + '\n')
    # fill new matrices with fitted data
    datafit1[:,n] = one_gaussian(datax, *optim1)
    datafit2[:,n] = two_gaussians(datax, *optim2)
    datafit3[:,n] = three_gaussians(datax, *optim3)

T = datafit1 

I have read most of the relevant posts related to fitting but I could not find what is wrong with my code. It should work but the final matrix, "T" shows only columns with constant numbers and not a nice smooth gaussian shape-like curve. Please take a look and tell me what am I doing wrong. I have tried in other programs, such as OriginLab and the fitting works well.

Thank you.

Upvotes: 4

Views: 741

Answers (1)

Oliver W.
Oliver W.

Reputation: 13459

You're experiencing the classical problem of supplying an incorrect guess to the curve fitting algorithm. That is entirely due to your unnecessary upside down flipping of the matrix T and then not taking into account the new locations of the gaussians (the parameter called center, passed to gaussian() - I remember this code).

You see, here's what happens when I do fitting on your original data:

T = T0-bg # background subtraction?
fitparams_me, fitparams_you = [], []

for colind in xrange(16,19):
    column = T[:,colind]
    guess = column.max(), column.argmax(), 3, 0  # Good guess for a SINGLE gaussian

    popt, pcov = optimize.curve_fit(one_gaussian, datax, column, p0=guess)
    fitparams_me.append(popt)
print(fitparams_me)

Which shows:

[array([ 365.40098996,   91.24095009,    1.11390434,   -0.99632476]),
 array([ 348.4327168 ,   92.0262556 ,    1.26650618,   -1.08018819]),
 array([ 413.21526868,   90.8569241 ,    1.0445618 ,   -1.0565371 ])]

And these lead to very good fits.

Now here's what you're doing: you first flip the matrix upside down, but you keep assuming the peaks are located in the first rows. However, that is no longer the case, which this code highlights:

T = np.flipud(T)

for colind in xrange(16,19):
    column = T[:,colind]
    guess = column.max(), column.argmax(), 3, 0  # Good guess for a SINGLE gaussian
    your_guess = [0.5*Mmax, N/10., 10., 0.]
    print guess[1], your_guess[1]
    popt, pcov = optimize.curve_fit(one_gaussian, datax, column, p0=your_guess)
    fitparams_you.append(popt)

# printed results:
932 102.4
931 102.4
932 102.4

So, each time I'm still guessing correctly where the maximum is appearing, but you are assuming it is always around row 102 of your data (of shape 1024, 1024).

It shall come as no surprise that the results from your curve fitting differ a lot from mine:

>>> print(fitparams_you)  

[array([ -1.640e-07,   1.024e+02,   1.000e+01,   2.046e-10]),
 array([ -1.640e-07,   1.024e+02,   1.000e+01,   2.046e-10]),
 array([ -1.640e-07,   1.024e+02,   1.000e+01,   2.046e-10])]

You could solve it easily by just flipping your column:

popt, pcov = optimize.curve_fit(one_gaussian, datax, column[::-1], p0=your_guess) 

Or you could attempt to make your algorithm more robust by using tricks like argmax.

Upvotes: 3

Related Questions