Justin
Justin

Reputation: 6559

Simple linear regression for data set

I am looking to create a trend function in C# for a set of data and it seems like using a big math library is a bit overkill for my needs.

Given a list of values such as 6,13,7,9,12,4,2,2,1. I would like to get the slope of the simple linear regression (to see if it is decreasing or increasing) and the next estimated value. I know that there are massive libraries out there that can do that and more, but I wanted a more simple approach.

I'm not big into statistics, so if someone could lead me in a way to do this, it would be appreciated.

Upvotes: 5

Views: 25665

Answers (2)

duffymo
duffymo

Reputation: 308988

You don't need massive libraries. The formulas are relatively simple.

Given a pair of arrays for your x and y data, you'll calculate the least square fit coefficients like this

Formulas (27) and (28) are the two you want. The coding involves little more than sums and sums of squares of the input array values.

Here is a Java class and its JUnit test class for those who want more detail:

import java.util.Arrays;

/**
 * Simple linear regression example using Wolfram Alpha formulas.
 * User: mduffy
 * Date: 10/22/2018
 * Time: 10:56 AM
 * @link https://stackoverflow.com/questions/15623129/simple-linear-regression-for-data-set/15623183?noredirect=1#comment92773017_15623183
 */
public class SimpleLinearRegressionExample {

    public static double slope(double [] x, double [] y) {
        double slope = 0.0;
        if ((x != null) && (y != null) && (x.length == y.length) && (x.length > 0)) {
            slope = correlation(x, y)/sumOfSquares(x);
        }
        return slope;
    }

    public static double intercept(double [] x, double [] y) {
        double intercept = 0.0;
        if ((x != null) && (y != null) && (x.length == y.length) && (x.length > 0)) {
            double xave = average(x);
            double yave = average(y);
            intercept = yave-slope(x, y)*xave;
        }
        return intercept;
    }

    public static double average(double [] values) {
        double average = 0.0;
        if ((values != null) && (values.length > 0)) {
            average = Arrays.stream(values).average().orElse(0.0);
        }
        return average;
    }

    public static double sumOfSquares(double [] values) {
        double sumOfSquares = 0.0;
        if ((values != null) && (values.length > 0)) {
            sumOfSquares = Arrays.stream(values).map(v -> v*v).sum();
            double average = average(values);
            sumOfSquares -= average*average*values.length;
        }
        return sumOfSquares;
    }

    public static double correlation(double [] x, double [] y) {
        double correlation = 0.0;
        if ((x != null) && (y != null) && (x.length == y.length) && (x.length > 0)) {
            for (int i = 0; i < x.length; ++i) {
                correlation += x[i]*y[i];
            }
            double xave = average(x);
            double yave = average(y);
            correlation -= xave*yave*x.length;
        }
        return correlation;
    }
}

JUnit test class:

import org.junit.Assert;
import org.junit.Test;

/**
 * JUnit tests for simple linear regression example.
 * User: mduffy
 * Date: 10/22/2018
 * Time: 11:53 AM
 * @link https://stackoverflow.com/questions/15623129/simple-linear-regression-for-data-set/15623183?noredirect=1#comment92773017_15623183
 */
public class SimpleLinearRegressionExampleTest {

    public static double tolerance = 1.0e-6;

    @Test
    public void testAverage_NullArray() {
        // setup
        double [] x = null;
        double expected = 0.0;
        // exercise
        double actual = SimpleLinearRegressionExample.average(x);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }

    @Test
    public void testAverage_EmptyArray() {
        // setup
        double [] x = {};
        double expected = 0.0;
        // exercise
        double actual = SimpleLinearRegressionExample.average(x);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }

    @Test
    public void testAverage_Success() {
        // setup
        double [] x = { 1.0, 2.0, 2.0, 3.0, 4.0, 7.0, 9.0 };
        double expected = 4.0;
        // exercise
        double actual = SimpleLinearRegressionExample.average(x);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }


    @Test
    public void testSumOfSquares_NullArray() {
        // setup
        double [] x = null;
        double expected = 0.0;
        // exercise
        double actual = SimpleLinearRegressionExample.sumOfSquares(x);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }

    @Test
    public void testSumOfSquares_EmptyArray() {
        // setup
        double [] x = {};
        double expected = 0.0;
        // exercise
        double actual = SimpleLinearRegressionExample.sumOfSquares(x);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }

    @Test
    public void testSumOfSquares_Success() {
        // setup
        double [] x = { 1.0, 2.0, 2.0, 3.0, 4.0, 7.0, 9.0 };
        double expected = 52.0;
        // exercise
        double actual = SimpleLinearRegressionExample.sumOfSquares(x);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }

    @Test
    public void testCorrelation_NullX_NullY() {
        // setup
        double [] x = null;
        double [] y = null;
        double expected = 0.0;
        // exercise
        double actual = SimpleLinearRegressionExample.correlation(x, y);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }

    @Test
    public void testCorrelation_DifferentLengths() {
        // setup
        double [] x = { 1.0, 2.0, 3.0, 5.0, 8.0 };
        double [] y = { 0.11, 0.12, 0.13, 0.15, 0.18, 0.20 };
        double expected = 0.0;
        // exercise
        double actual = SimpleLinearRegressionExample.correlation(x, y);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }

    @Test
    public void testCorrelation_Success() {
        // setup
        double [] x = { 1.0, 2.0, 3.0, 5.0, 8.0 };
        double [] y = { 0.11, 0.12, 0.13, 0.15, 0.18 };
        double expected = 0.308;
        // exercise
        double actual = SimpleLinearRegressionExample.correlation(x, y);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }

    @Test
    public void testSlope() {
        // setup
        double [] x = { 1.0, 2.0, 3.0, 4.0 };
        double [] y = { 6.0, 5.0, 7.0, 10.0 };
        double expected = 1.4;
        // exercise
        double actual = SimpleLinearRegressionExample.slope(x, y);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }

    @Test
    public void testIntercept() {
        // setup
        double [] x = { 1.0, 2.0, 3.0, 4.0 };
        double [] y = { 6.0, 5.0, 7.0, 10.0 };
        double expected = 3.5;
        // exercise
        double actual = SimpleLinearRegressionExample.intercept(x, y);
        // assert
        Assert.assertEquals(expected, actual, tolerance);
    }
}

Upvotes: 8

VISHMAY
VISHMAY

Reputation: 709

My own code for future prediction(Example for 15th day from first day)

  static void Main(string[] args)
    {
        double[] xVal = new double[9]
        {

    ...


           };
        double[] yVal = new double[9]  {
     ...

        };
        double rsquared;
        double yintercept;
        double slope;
        LinearRegression(xVal, yVal,0,9, out rsquared, out yintercept, out slope);
        Console.WriteLine( yintercept + (slope*15));//15 is xvalue of future(no of day from 1)

        Console.ReadKey();
    }
    public static void LinearRegression(double[] xVals, double[] yVals,
                                        int inclusiveStart, int exclusiveEnd,
                                        out double rsquared, out double yintercept,
                                        out double slope)
    {
        Debug.Assert(xVals.Length == yVals.Length);
        double sumOfX = 0;
        double sumOfY = 0;
        double sumOfXSq = 0;
        double sumOfYSq = 0;
        double ssX = 0;
        double ssY = 0;
        double sumCodeviates = 0;
        double sCo = 0;
        double count = exclusiveEnd - inclusiveStart;

        for (int ctr = inclusiveStart; ctr < exclusiveEnd; ctr++)
        {
            double x = xVals[ctr];
            double y = yVals[ctr];
            sumCodeviates += x * y;
            sumOfX += x;
            sumOfY += y;
            sumOfXSq += x * x;
            sumOfYSq += y * y;
        }
        ssX = sumOfXSq - ((sumOfX * sumOfX) / count);
        ssY = sumOfYSq - ((sumOfY * sumOfY) / count);
        double RNumerator = (count * sumCodeviates) - (sumOfX * sumOfY);
        double RDenom = (count * sumOfXSq - (sumOfX * sumOfX))
         * (count * sumOfYSq - (sumOfY * sumOfY));
        sCo = sumCodeviates - ((sumOfX * sumOfY) / count);

        double meanX = sumOfX / count;
        double meanY = sumOfY / count;
        double dblR = RNumerator / Math.Sqrt(RDenom);
        rsquared = dblR * dblR;
        yintercept = meanY - ((sCo / ssX) * meanX);
        slope = sCo / ssX;
    }

Upvotes: 8

Related Questions