genjong
genjong

Reputation: 137

Segmented regression in python using differential evolution

I have the long-term aim of creating a module that for a specific data set, fits segmented regressions up to an arbitrary number of breakpoints, as well as a standard polynomial and linear curve fit, and then evaluates which of the fits are the most appropriate for the data (likely using AIC or BIC).

I have a function that uses differential evolution to use segmented regression on an x and y dataset assuming 1 breakpoint:

def segReg_one(xData,yData):

    def func(xVals,model_break,slopeA,slopeB,offsetA,offsetB): #Initialization of the piecewise function
        returnArray=[]
        for x in xVals:
            if x > model_break:
                returnArray.append(slopeA * x + offsetA)
            else:
                returnArray.append(slopeB * x + offsetB)


        return returnArray

    def sumSquaredError(parametersTuple): #Definition of an error function to minimize
        modely=func(xData,*parametersTuple)
        warnings.filterwarnings("ignore") # Ignore warnings by genetic algorithm

        return np.sum((yData-modely)**2.0)

    def generate_genetic_Parameters():
        initial_parameters=[]
        x_max=np.max(xData)
        x_min=np.min(xData)
        y_max=np.max(yData)
        y_min=np.min(yData)
        slope=10*(y_max-y_min)/(x_max-x_min)

        initial_parameters.append([x_max,x_min]) #Bounds for model break point
        initial_parameters.append([-slope,slope]) #Bounds for slopeA
        initial_parameters.append([-slope,slope]) #Bounds for slopeB
        initial_parameters.append([y_max,y_min]) #Bounds for offset A
        initial_parameters.append([y_max,y_min]) #Bounds for offset B

        result=differential_evolution(sumSquaredError,initial_parameters,seed=3)

        return result.x

    geneticParameters = generate_genetic_Parameters() #Generates genetic parameters



    fittedParameters, pcov= curve_fit(func, xData, yData, geneticParameters) #Fits the data 
    print('Parameters:', fittedParameters)
    print('Model break at: ', fittedParameters[0])
    print('Slope of line where x < model break: ', fittedParameters[1])
    print('Slope of line where x > model break: ', fittedParameters[2])
    print('Offset of line where x < model break: ', fittedParameters[3])
    print('Offset of line where x > model break: ', fittedParameters[4])





    model=func(xData,*fittedParameters)

    absError = model - yData

    SE = np.square(absError) 
    MSE = np.mean(SE) 
    RMSE = np.sqrt(MSE) 
    Rsquared = 1.0 - (np.var(absError) / np.var(yData))

    print()
    print('RMSE:', RMSE)
    print('R-squared:', Rsquared)



    def ModelAndScatterPlot(graphWidth, graphHeight):
            f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
            axes = f.add_subplot(111)


            axes.plot(xData, yData,  'D')

            xModel = np.linspace(min(xData), max(xData))
            yModel = func(xModel, *fittedParameters)

            axes.plot(xModel, yModel)

            axes.set_xlabel('X Data') # X axis data label
            axes.set_ylabel('Y Data') # Y axis data label

            plt.show()
            plt.close('all') 


    graphWidth = 800
    graphHeight = 600

    return ModelAndScatterPlot(800,600)

Which runs fine. However, I tried to expand the model to allow for more than 1 breakpoint:

def segReg_two(xData,yData):

    def func(xData,break1,break2,slope1,slope_mid,slope2,offset1,offset_mid,offset2):
        returnArray=[]
        for x in xData:
            if x < break1:
                returnArray.append(slope1 * x + offset1)

            if (x < break2 and x > break1):
                returnArray.append(slope_mid * x + offset_mid)

            else:
                returnArray.append(slope2 * x + offset2)


    def sumSquaredError(parametersTuple): #Definition of an error function to minimize
        modely=func(xData,*parametersTuple)
        warnings.filterwarnings("ignore") # Ignore warnings by genetic algorithm

        return np.sum((yData-modely)**2.0)

    def generate_genetic_Parameters():
        initial_parameters=[]
        x_max=np.max(xData)
        x_min=np.min(xData)
        y_max=np.max(yData)
        y_min=np.min(yData)
        slope=10*(y_max-y_min)/(x_max-x_min)

        initial_parameters.append([x_max,x_min]) #Bounds for model break point
        initial_parameters.append([x_max,x_min])
        initial_parameters.append([-slope,slope]) 
        initial_parameters.append([-slope,slope]) 
        initial_parameters.append([-slope,slope]) 
        initial_parameters.append([y_max,y_min])
        initial_parameters.append([y_max,y_min]) 
        initial_parameters.append([y_max,y_min]) 


        result=differential_evolution(sumSquaredError,initial_parameters,seed=3)

        return result.x

    geneticParameters = generate_genetic_Parameters() #Generates genetic parameters



    fittedParameters, pcov= curve_fit(func, xData, yData, geneticParameters) #Fits the data 
    print('Parameters:', fittedParameters)
    print('Model break at: ', fittedParameters[0])
    print('Slope of line where x < model break: ', fittedParameters[1])
    print('Slope of line where x > model break: ', fittedParameters[2])
    print('Offset of line where x < model break: ', fittedParameters[3])
    print('Offset of line where x > model break: ', fittedParameters[4])





    model=func(xData,*fittedParameters)

    absError = model - yData

    SE = np.square(absError) 
    MSE = np.mean(SE) 
    RMSE = np.sqrt(MSE) 
    Rsquared = 1.0 - (np.var(absError) / np.var(yData))

    print()
    print('RMSE:', RMSE)
    print('R-squared:', Rsquared)

    def ModelAndScatterPlot(graphWidth, graphHeight):
            f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
            axes = f.add_subplot(111)


            axes.plot(xData, yData,  'D')

            xModel = np.linspace(min(xData), max(xData))
            yModel = func(xModel, *fittedParameters)

            axes.plot(xModel, yModel)

            axes.set_xlabel('X Data') # X axis data label
            axes.set_ylabel('Y Data') # Y axis data label

            plt.show()
            plt.close('all') 


    graphWidth = 800
    graphHeight = 600

    return ModelAndScatterPlot(800,600)

And this code runs into problems when I run segReg_two(x,y), stopping at the differential_evolution bit:

TypeError: unsupported operand type(s) for -: 'float' and 'NoneType' During handling of the above exception, another exception occurred:

RuntimeError: The map-like callable must be of the form f(func, iterable), returning a sequence of numbers the same length as 'iterable'

I didn't have this problem with segReg_one, so I don't see why I'm having it happen here. I am assuming (and I may be incorrect with this assumption) that the argument iterable must have compatible dimensions with my error function. However, I'm not exactly sure on how those two arguments exactly relate other than the fact that I'm finding the breakpoints, slopes and offsets that minimize the breakpoints given the bounds I have.

Also, my plan of attack seems extremely long-winded and brutish. Is there a better way to tackle this?

I think perhaps it is considering my piecewise function as None-type. Printing the function with some random values returned simply "None". However, my piecewise function prints the same thing and it still worked out fine.

Upvotes: 2

Views: 1133

Answers (1)

chasmani
chasmani

Reputation: 2510

If you are not tied to using differential evolution then the piecewise-regression package fits segmented models to data, using an iterative algorithm. And it has a model comparison tool based on the Bayesian Information Criterion.

Here is some data generated from 2 breakpoints

x = [0.0, 0.3, 0.5, 0.8, 1.0, 1.3, 1.5, 1.8, 2.0, 2.3, 2.5, 2.8, 3.0, 3.3, 3.5, 3.8, 4.0, 4.3, 4.5, 4.8, 5.1, 5.3, 5.6, 5.8, 6.1, 6.3, 6.6, 6.8, 7.1, 7.3, 7.6, 7.8, 8.1, 8.3, 8.6, 8.8, 9.1, 9.3, 9.6, 9.8, 10.1, 10.4, 10.6, 10.9, 11.1, 11.4, 11.6, 11.9, 12.1, 12.4, 12.6, 12.9, 13.1, 13.4, 13.6, 13.9, 14.1, 14.4, 14.6, 14.9, 15.2, 15.4, 15.7, 15.9, 16.2, 16.4, 16.7, 16.9, 17.2, 17.4, 17.7, 17.9, 18.2, 18.4, 18.7, 18.9, 19.2, 19.4, 19.7, 19.9, 20.2, 20.5, 20.7, 21.0, 21.2, 21.5, 21.7, 22.0, 22.2, 22.5, 22.7, 23.0, 23.2, 23.5, 23.7, 24.0, 24.2, 24.5, 24.7, 25.0]

y = [16.2, -5.5, -4.0, -8.8, 11.2, -19.9, 21.2, -3.2, 8.2, 3.2, 20.9, -13.7, 4.4, 4.4, 20.2, -1.5, 8.4, 2.0, 11.8, 17.8, 1.6, 24.7, 22.9, 19.5, 24.7, 11.9, 20.6, 15.5, 25.2, 36.2, 27.0, 33.0, 33.1, 34.5, 39.3, 48.9, 40.9, 57.5, 74.7, 68.6, 62.3, 58.4, 62.8, 90.2, 76.8, 73.0, 84.3, 106.4, 89.7, 97.7, 97.5, 94.0, 89.2, 100.1, 104.5, 115.5, 121.1, 125.0, 121.6, 130.6, 115.8, 136.3, 129.4, 121.8, 130.2, 125.1, 137.6, 142.0, 149.2, 113.9, 113.9, 123.8, 131.0, 138.6, 133.5, 110.7, 128.3, 140.2, 134.7, 140.5, 131.2, 131.9, 136.3, 139.0, 137.4, 137.1, 129.7, 140.7, 138.7, 149.2, 150.4, 140.8, 135.7, 133.6, 144.7, 141.8, 138.0, 142.4, 136.3, 150.0]

using piecewise-regression's model comparison tool:

import piecewise_regression

piecewise_regression.ModelSelection(xx, yy)

enter image description here

That suggests a model with 2 breakpoints, based on the BIC.

We can also plot a fit with 2 breakpoints:

pw_fit = piecewise_regression.Fit(xx, yy, n_breakpoints=2)

pw_fit.plot()

enter image description here

Upvotes: 0

Related Questions