masher
masher

Reputation: 4116

How do I use scipy curve_fit with a custom objective function?

I wish to do a curve fit to some tabulated data using my own objective function, not the in-built normal least squares.

I can make the normal curve_fit work, but I can't understand how to properly formulate my objective function to feed it into the method.

I am interested in knowing the values of my fitted curve at each tabulated x value.


x = np.array([-5.0,-4.5,-4.0,-3.5,-3.0,-2.5,-2.0,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5,6.0,6.5,7.0,7.5,8.0,8.5,9.0,9.5,10.0])
y = np.array([300,300,1000,350,340,1230,500,360,360,920,365,365,350,1000,375,1050,380,385,385,390,400,395,780,410,420,420,415,435,440,435,455])
e = np.array([math.sqrt(i) for i in y]) #uncertainty in y values

def test_func(x, a0, a1):
    """ This is the function I want to fit to my data """
    return a0 + a1*x

def norm_residual(test_func, x, y, e, params):
    """ This calculates the normalised residuals, given the tabulated data and function parameters"""
    yhat = test_func(x,*params)
    z = (y-yhat)/e
    return z
        
def f(z):
    """ This modifies the normalised residual value, depending on it's sign."""
    if z <= 0:
        return z**2
    else:
       return 6*np.log(2*z/(np.sqrt(math.pi) * sp.special.erf(z/np.sqrt(2))))-3*np.log(2) 
    
def objective(test_func, x, y, e, params):
    """This returns the sum of the modified normalised residuals. Smaller is better"""
    z = norm_residual(test_func, x, y, e, params)
    return np.sum(np.array([f(i) for i in z]))


#normal scipy curve fit
params, params_covariance = sp.optimize.curve_fit(test_func, x, y, p0=[0,0])
plt.scatter(x, y, label='Data')
plt.plot(x, test_func(x, params[0], params[1]), label='Fitted function', color="orange")
plt.legend(loc='best')
plt.show()

#how do I use my objective function to do my curve fit?

Upvotes: 1

Views: 2093

Answers (1)

masher
masher

Reputation: 4116

This is what I came up with, for my slightly more realistic requirements.

Lesson: vectorise everything! Don't just wrap it in a np.vectorize function call. I got a speed-up of ~100x by doing so.

Some guidance taken from https://hernandis.me/2020/04/05/three-examples-of-nonlinear-least-squares-fitting-in-python-with-scipy.html

import numpy as np
import scipy as sp
from scipy import optimize
import matplotlib.pyplot as plt
import math


x_data = np.array([-5.0,-4.5,-4.0,-3.5,-3.0,-2.5,-2.0,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5,6.0,6.5,7.0,7.5,8.0,8.5,9.0,9.5,10.0])
y_data = np.array([300,300,1000,350,340,1230,500,360,360,920,365,365,350,1000,375,1050,380,385,385,390,400,395,780,410,420,420,415,435,440,435,455])
e_data = np.array([math.sqrt(i) for i in y_data]) #uncertainty in y values


# https://hernandis.me/2020/04/05/three-examples-of-nonlinear-least-squares-fitting-in-python-with-scipy.html

def model(params, x):
    """Calculates the model, given params and x. Is vectorised; can be used with numpy.arrays"""
    a0, a1 = params
    return a0 + a1 * x

def v_f(z):
    """Modifies the residual. Used when you need to calc your own chi2 value. Is vectorised; can be used with numpy.arrays"""
    return np.where(z <= 0, np.square(z), 6*np.log(z/sp.special.erf(z*0.7071067811865475)) - 1.3547481158683645)

def v_f_2(z):
    """Modifies the residual. Used when chi2 is calc'd for you. Is vectorised; can be used with numpy.arrays"""
    return np.where(z <= 0, z, np.sqrt(6*np.log(z/sp.special.erf(z*0.7071067811865475)) - 1.3547481158683645))

def objective(params, model_func, data, v_modify_residuals_func = None):   
    """ Calculates the residuals given a model and data. Is vectorised; can be used with numpy.arrays """
    if len(data) == 3:
        xd, yd, ed = data
    elif len(data) == 2:
        xd, yd = data
        ed = np.ones(len(xd))     
            
    r = (yd - model_func(params, xd)) / ed # r is an array of residuals
    if v_modify_residuals_func is not None:
        r = v_modify_residuals_func(r)
    return r

def objective_sum(params, model_func, data, modify_residuals_func = None):  
    """ Calculates the sum of the residuals given a model and data. Used when you need to calc your own chi2 value. Is vectorised; can be used with numpy.arrays """
    r = objective(params, model_func, data, modify_residuals_func)
    return np.sum(r)

def v_cheb(n, x):
    """ Calculate a chebyshev polynomial. -1.0 <= x <= 1.0, n >= 0, int.  Is vectorised; can be used with numpy.arrays """
    return np.cos(n * np.arccos(x))

def bkg_model(params, x):
    """ Calculate a bkg curve from a number of chebyshev polynomials. Polynomial degree given by len(params).  Is vectorised; can be used with numpy.arrays """
    r = 0
    for i, p in enumerate(params):
        r += p * v_cheb(i, x)
    return r

def v_normaliseList(nparray):
    """ Given a monotonically increasing x-ordinate array, normalise values in the range -1 <= x <= 1. Is vectorised; can be used with numpy.arrays """
    min_ = nparray[0]
    max_ = nparray[-1]
    r = (2*(nparray - min_)/(max_ - min_)) - 1
    return r



initial_params = [0,0]

""" least_squares takes an array of residuals, r, and minimises Sum(r**2) """
results1 = sp.optimize.least_squares(objective,
                                     initial_params,
                                     method = 'lm',
                                     args = [bkg_model,
                                             [v_normaliseList(x_data), y_data, e_data],
                                             v_f_2])

""" minimize takes a scalar, r, and minimises r """
results2 = sp.optimize.minimize(objective_sum,
                                initial_params,
                                #method = 'SLSQP',
                                args = (bkg_model,
                                        (v_normaliseList(x_data), y_data, e_data),
                                        v_f))


print(results1.x)
print(results2.x)



Upvotes: 1

Related Questions