Py-ser
Py-ser

Reputation: 2098

Python: How to fit curves

I have the following code to overplot three sets of data, count rate vs time, for three different sets of time ranges:

#!/usr/bin/env python

from pylab import rc, array, subplot, zeros, savefig, ylim, xlabel, ylabel, errorbar, FormatStrFormatter, gca, axis
from scipy import optimize, stats
import numpy as np
import pyfits, os, re, glob, sys

rc('font',**{'family':'serif','serif':['Helvetica']})
rc('ps',usedistiller='xpdf')
rc('text', usetex=True)
#------------------------------------------------------

tmin=56200
tmax=56249

data=pyfits.open('http://heasarc.gsfc.nasa.gov/docs/swift/results/transients/weak/GX304-1.orbit.lc.fits')

time  = data[1].data.field(0)/86400. + data[1].header['MJDREFF'] + data[1].header['MJDREFI']
rate  = data[1].data.field(1)
error = data[1].data.field(2)
data.close()

cond = ((time > tmin-5) & (time < tmax))
time=time[cond]
rate=rate[cond]
error=error[cond]

errorbar(time, rate, error, fmt='r.', capsize=0)
gca().xaxis.set_major_formatter(FormatStrFormatter('%5.1f'))

axis([tmin-10,tmax,-0.00,0.45])
xlabel('Time, MJD')
savefig("sync.eps",orientation='portrait',papertype='a4',format='eps')

As, in this way, the plot is too much confusing, I thought to fit the curves. I tried with UnivariateSpline, but this completely messes up my data. Any advice please? Should I first define a function to fit those data? I also looked for "least-squared": is this the best solution to this problem?

Upvotes: 1

Views: 1063

Answers (2)

Py-ser
Py-ser

Reputation: 2098

This is how I solved:

#!/usr/bin/env python

import pyfits, os, re, glob, sys
from scipy.optimize import leastsq
from numpy import *
from pylab import *
from scipy import *
rc('font',**{'family':'serif','serif':['Helvetica']})
rc('ps',usedistiller='xpdf')
rc('text', usetex=True)
#------------------------------------------------------

tmin = 56200
tmax = 56249
pi = 3.14
data=pyfits.open('http://heasarc.gsfc.nasa.gov/docs/swift/results/transients/weak/GX304-1.orbit.lc.fits')

time  = data[1].data.field(0)/86400. + data[1].header['MJDREFF'] + data[1].header['MJDREFI']
rate  = data[1].data.field(1)
error = data[1].data.field(2)
data.close()

cond = ((time > tmin-5) & (time < tmax))
time=time[cond]
rate=rate[cond]
error=error[cond]

gauss_fit = lambda p, x: p[0]*(1/(2*pi*(p[2]**2))**(1/2))*exp(-(x-p[1])**2/(2*p[2]**2))+p[3]*(1/sqrt(2*pi*(p[5]**2)))*exp(-(x-p[4])**2/(2*p[5]**2)) #1d Gaussian func
e_gauss_fit = lambda p, x, y: (gauss_fit(p, x) -y) #1d Gaussian fit
v0= [0.20, 56210.0, 1, 0.40, 56234.0, 1] #inital guesses for Gaussian Fit, just do it around the peaks
out = leastsq(e_gauss_fit, v0[:], args=(time, rate), maxfev=100000, full_output=1) #Gauss Fit
v = out[0] #fit parameters out
xxx = arange(min(time), max(time), time[1] - time[0])
ccc = gauss_fit(v, xxx) # this will only work if the units are pixel and not wavelength
fig = figure(figsize=(9, 9)) #make a plot
ax1 = fig.add_subplot(111)
ax1.plot(time, rate, 'g.') #spectrum
ax1.plot(xxx, ccc, 'b-') #fitted spectrum
savefig("plotfitting.png")

axis([tmin-10,tmax,-0.00,0.45])

From here.

What about if I would like to fit with different functions the raising and the decaying part of the curves?

Upvotes: 1

Robbert
Robbert

Reputation: 2724

I use this for fitting. It is adapted from somewhere on the internet, but I forgot where.

from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

import numpy

from scipy.optimize.minpack import leastsq

### functions ###

def eq_cos(A, t):
    """
    4 parameters
    function: A[0] + A[1] * numpy.cos(2 * numpy.pi * A[2] * t + A[3])
    A[0]: offset
    A[1]: amplitude
    A[2]: frequency
    A[3]: phase
    """
    return A[0] + A[1] * numpy.cos(2 * numpy.pi * A[2] * t + numpy.pi*A[3])

def linear(A, t):
    """
    A[0]: y-offset
    A[1]: slope
    """
    return A[0] + A[1] * t  

### fitting routines ###

def minimize(A, t, y0, function):
    """
    Needed for fit
    """
    return y0 - function(A, t)

def fit(x_array, y_array, function, A_start):
    """
    Fit data

    20101209/RB: started
    20130131/RB: added example to doc-string

    INPUT:
    x_array: the array with time or something
    y-array: the array with the values that have to be fitted
    function: one of the functions, in the format as in the file "Equations"
    A_start: a starting point for the fitting

    OUTPUT:
    A_final: the final parameters of the fitting

    EXAMPLE:
    Fit some data to this function above
    def linear(A, t):
        return A[0] + A[1] * t  

    ### 
    x = x-axis
    y = some data
    A = [0,1] # initial guess
    A_final = fit(x, y, linear, A)
    ###

    WARNING:
    Always check the result, it might sometimes be sensitive to a good starting point.

    """
    param = (x_array, y_array, function)

    A_final, cov_x, infodict, mesg, ier = leastsq(minimize, A_start, args=param, full_output = True)

    return A_final



if __name__ == '__main__':

    # data
    x = numpy.arange(10)
    y = x + numpy.random.rand(10) # values between 0 and 1

    # initial guesss
    A = [0,0.5]

    # fit 
    A_final = fit(x, y, linear, A)

    # result is linear with a little offset
    print(A_final)

Upvotes: 0

Related Questions