Qwynes
Qwynes

Reputation: 87

linear regression with a forced non-zero y-intercept

I would like to run a linear regression with the y-intercept forced to 0.115. This is the code I tried. I set to fit_intercept=True to get a non-zero y-intercept, but can I set it to a value?

Also, how can I get the best fit line to be plotted rather than a line connecting each point?

Thanks in advance.

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
x=np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).reshape(-1,1)
y=np.array([0.113, 0.116, 0.130, 0.150, 0.150, 0.160, 0.180, 0.210, 0.220, 0.260, 0.280])
regression=LinearRegression(fit_intercept=True).fit(x,y)
r_sq=round(regression.score(x,y),4)
m=round(regression.coef_[0],4)
b=round(regression.intercept_,4)
print("r_sq:", r_sq,"m:",m,"b:",b)
plt.figure()
plt.scatter(x,y)
plt.title('A')
plt.ylabel('X')
plt.xlabel('Y')
plt.plot(x,y,'r--',label='measured')
plt.legend(loc='best')

Upvotes: 4

Views: 3041

Answers (4)

ToddP
ToddP

Reputation: 712

I suggest using NumPy's linear algebra library and specifically the pseudo-inverse function np.linalg.pinv. This approach allows you use only the NumPy package as shown below.

import numpy as np
import matplotlib.pyplot as plt

x = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).reshape(-1,1)
y = np.array([0.113, 0.116, 0.130, 0.150, 0.150, 0.160, 0.180, 0.210, 0.220, 0.260, 0.280])

b    =  0.115        # fixed intercept
a    =  np.linalg.pinv( x ) @ (y - b)  # estimated slope
yfit = (a * x) + b   # fitted y values


plt.figure()
plt.scatter(x, y, label='data')
plt.plot(x, yfit, label='fit')
plt.legend(loc='best')
plt.show()

regression with forced intercept

Upvotes: 0

Qwynes
Qwynes

Reputation: 87

I found a general solution which gave me the same answer, but also allows me to fit equations that are not linear by simply modifying the function.

import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np

#set y-intercept
b=0.115

#Fitting function
def func(x, m):
    return (x*m)+b

#Experimental x and y data points    
x_A1 = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
y_A1 = np.array([0.113, 0.116, 0.130, 0.150, 0.150, 0.160, 0.180, 0.210, 0.220, 0.260, 0.280])

#Plot experimental data points
plt.plot(x_A1, y_A1, 'bo', label='experimental')

#Perform the curve-fit
popt, pcov = curve_fit(func, x_A1, y_A1) #, initialGuess)
#print(popt)

#x values for the fitted function
x_A1_Fit = np.arange(x_A1[0], x_A1[-1], 0.1)

residuals = y_A1- func(x_A1, *popt)
ss_res = np.sum(residuals**2)
ss_tot = np.sum((y_A1-np.mean(y_A1))**2)
r_sq = 1 - (ss_res / ss_tot)

#Plot the fitted function
plt.plot(x_A1_Fit, func(x_A1_Fit, *popt), 'r--', label='fitted: m=%5.4f' % tuple(popt))

plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
print ('r_sq=', "%.4f"%r_sq, 'm=', "%.4f"%popt, "b=", "%.4f"%b)

Upvotes: 1

David Hoffman
David Hoffman

Reputation: 2343

Subtract the y intercept you want to fix from your data and set fit_intercept=False.

For example

import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression

x = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).reshape(-1, 1)
y = np.array([0.113, 0.116, 0.130, 0.150, 0.150, 0.160, 0.180, 0.210, 0.220, 0.260, 0.280])

fig, ax = plt.subplots()

for fit, y_intercept in zip((True, False), (0.0, 0.115)):
    regression = LinearRegression(fit_intercept=fit)
    regression.fit(x, y - y_intercept)

    r_sq = regression.score(x, y - y_intercept)
    m = regression.coef_[0]
    b = regression.intercept_ + y_intercept

    print(f"Fit intercept: {regression.fit_intercept}")
    print(f"r_sq: {r_sq:0.4f}\nm: {m:0.4f}\nb: {b:0.4f}")

    ax.plot(x, y, "bo")
    ax.plot(
        x,
        regression.predict(x) + y_intercept,
        "r" + "--" * fit,
        label=f"Fit Intercept: {regression.fit_intercept}",
    )

ax.set_title("A")
ax.set_ylabel("X")
ax.set_xlabel("Y")

ax.legend(loc="best")

plt.show()

Which prints:

Fit intercept: True
r_sq: 0.9473
m: 0.0017
b: -0.0192
Fit intercept: False
r_sq: 0.9112
m: 0.0014
b: 0.0000

Upvotes: 3

fadil eldin
fadil eldin

Reputation: 143


A post on fit_intercept https://stackoverflow.com/questions/46779605

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression

x=np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).reshape(-1,1)
y=np.array([0.113, 0.116, 0.130, 0.150, 0.150, 0.160, 0.180, 0.210, 0.220, 0.260, 0.280])

lr_fi_true = LinearRegression(fit_intercept=True)
lr_fi_false = LinearRegression(fit_intercept=False)

lr_fi_true.fit(x, y)
lr_fi_false.fit(x, y)

print('Intercept when fit_intercept=True : {:.5f}'.format(lr_fi_true.intercept_))
print('Intercept when fit_intercept=False : {:.5f}'.format(lr_fi_false.intercept_))

lr_fi_true_yhat = np.dot(x, lr_fi_true.coef_) + lr_fi_true.intercept_
lr_fi_false_yhat = np.dot(x, lr_fi_false.coef_) + lr_fi_false.intercept_

plt.scatter(x, y, label='Actual points')
plt.plot(x, lr_fi_true_yhat, 'r--', label='fit_intercept=True')
plt.plot(x, lr_fi_false_yhat, 'r-', label='fit_intercept=False')
plt.legend()

plt.vlines(0, 0, y.max())
plt.hlines(0, x.min(), x.max())

plt.show()

Prints:
Intercept when fit_intercept=True : 0.09577
Intercept when fit_intercept=False : 0.00000

enter image description here


Upvotes: 0

Related Questions