Billy Matlock
Billy Matlock

Reputation: 350

Data Fit to a Curve without a known Function

I want to find a function fit for these curves, without guessing their basic form, and adding boundary condtions for θ->0 (asymptotic)

optimize_curve_fit does not work without giving a basic function as the fitting form.

Upvotes: 1

Views: 4707

Answers (2)

James Phillips
James Phillips

Reputation: 4657

In the comments you reference parameter bounds. While numpy's linear fitter polyfit in my previous example does not directly support parameter bounds, scipy's non-linear fitter curve_fit does allow parameter bounds, though the non-linear fitter requires initial parameter estimates. This example has parameter bounds and uses scipy's differential_evolution genetic algorithm module to estimate initial parameter values, and the scipy implementation in that module uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, requiring ranges within which to search - here those ranges are taken from the data max and min values with one parameter minimum hard-coded and an offset minimum of zero. It is much easier to supply ranges within which to search rather than specific values for the initial parameter estimates.

    import numpy, scipy, matplotlib
    import matplotlib.pyplot as plt
    from scipy.optimize import curve_fit
    from scipy.optimize import differential_evolution
    import warnings

    xData = numpy.array([19.1647, 18.0189, 16.9550, 15.7683, 14.7044, 13.6269, 12.6040, 11.4309, 10.2987, 9.23465, 8.18440, 7.89789, 7.62498, 7.36571, 7.01106, 6.71094, 6.46548, 6.27436, 6.16543, 6.05569, 5.91904, 5.78247, 5.53661, 4.85425, 4.29468, 3.74888, 3.16206, 2.58882, 1.93371, 1.52426, 1.14211, 0.719035, 0.377708, 0.0226971, -0.223181, -0.537231, -0.878491, -1.27484, -1.45266, -1.57583, -1.61717])
    yData = numpy.array([0.644557, 0.641059, 0.637555, 0.634059, 0.634135, 0.631825, 0.631899, 0.627209, 0.622516, 0.617818, 0.616103, 0.613736, 0.610175, 0.606613, 0.605445, 0.603676, 0.604887, 0.600127, 0.604909, 0.588207, 0.581056, 0.576292, 0.566761, 0.555472, 0.545367, 0.538842, 0.529336, 0.518635, 0.506747, 0.499018, 0.491885, 0.484754, 0.475230, 0.464514, 0.454387, 0.444861, 0.437128, 0.415076, 0.401363, 0.390034, 0.378698])


    def func(x, a, b, offset): #exponential curve fitting function
        return a * numpy.exp(-b*x) + offset


    # function for genetic algorithm to minimize (sum of squared error)
    def sumOfSquaredError(parameterTuple):
        warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
        val = func(xData, *parameterTuple)
        return numpy.sum((yData - val) ** 2.0)


    def generate_Initial_Parameters():
        # min and max used for bounds
        maxX = max(xData)
        minX = min(xData)
        maxY = max(yData)
        minY = min(yData)

        parameterBounds = []
        parameterBounds.append([-0.185, maxX]) # search bounds for a
        parameterBounds.append([minX, maxX]) # search bounds for b
        parameterBounds.append([0.0, maxY]) # search bounds for Offset

        # "seed" the numpy random number generator for repeatable results
        result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
        return result.x

    # by default, differential_evolution completes by calling
    # curve_fit() using parameter bounds
    geneticParameters = generate_Initial_Parameters()
    print('fit with parameter bounds (note the -0.185)')
    print(geneticParameters)
    print()

    # second call to curve_fit made with no bounds for comparison
    fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)

    print('re-fit with no parameter bounds')
    print(fittedParameters)
    print()

    modelPredictions = func(xData, *fittedParameters) 

    absError = modelPredictions - yData

    SE = numpy.square(absError) # squared errors
    MSE = numpy.mean(SE) # mean squared errors
    RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
    Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))

    print()
    print('RMSE:', RMSE)
    print('R-squared:', Rsquared)

    print()


    ##########################################################
    # graphics output section
    def ModelAndScatterPlot(graphWidth, graphHeight):
        f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
        axes = f.add_subplot(111)

        # first the raw data as a scatter plot
        axes.plot(xData, yData,  'D')

        # create data for the fitted equation plot
        xModel = numpy.linspace(min(xData), max(xData))
        yModel = func(xModel, *fittedParameters)

        # now the model as a line plot
        axes.plot(xModel, yModel)

        axes.set_xlabel('X Data') # X axis data label
        axes.set_ylabel('Y Data') # Y axis data label

        plt.show()
        plt.close('all') # clean up after using pyplot

    graphWidth = 800
    graphHeight = 600
    ModelAndScatterPlot(graphWidth, graphHeight)

Upvotes: 1

James Phillips
James Phillips

Reputation: 4657

Here is a graphing polynomial fitter, you can use your own data and specify different polynomial orders to see if the fit is sufficient for your modeling requirements.

import numpy, matplotlib
import matplotlib.pyplot as plt

polynomialOrder = 2 # example quadratic

xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])

# curve fit the test data
fittedParameters = numpy.polyfit(xData, yData, polynomialOrder)
print('Fitted Parameters:', fittedParameters)

modelPredictions = numpy.polyval(fittedParameters, xData)
absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData))
    yModel = numpy.polyval(fittedParameters, xModel)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Upvotes: 0

Related Questions