Reputation: 87
I would like to run a linear regression with the y-intercept forced to 0.115. This is the code I tried. I set to fit_intercept=True to get a non-zero y-intercept, but can I set it to a value?
Also, how can I get the best fit line to be plotted rather than a line connecting each point?
Thanks in advance.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
x=np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).reshape(-1,1)
y=np.array([0.113, 0.116, 0.130, 0.150, 0.150, 0.160, 0.180, 0.210, 0.220, 0.260, 0.280])
regression=LinearRegression(fit_intercept=True).fit(x,y)
r_sq=round(regression.score(x,y),4)
m=round(regression.coef_[0],4)
b=round(regression.intercept_,4)
print("r_sq:", r_sq,"m:",m,"b:",b)
plt.figure()
plt.scatter(x,y)
plt.title('A')
plt.ylabel('X')
plt.xlabel('Y')
plt.plot(x,y,'r--',label='measured')
plt.legend(loc='best')
Upvotes: 4
Views: 3041
Reputation: 712
I suggest using NumPy's linear algebra library and specifically the pseudo-inverse function np.linalg.pinv
. This approach allows you use only the NumPy package as shown below.
import numpy as np
import matplotlib.pyplot as plt
x = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).reshape(-1,1)
y = np.array([0.113, 0.116, 0.130, 0.150, 0.150, 0.160, 0.180, 0.210, 0.220, 0.260, 0.280])
b = 0.115 # fixed intercept
a = np.linalg.pinv( x ) @ (y - b) # estimated slope
yfit = (a * x) + b # fitted y values
plt.figure()
plt.scatter(x, y, label='data')
plt.plot(x, yfit, label='fit')
plt.legend(loc='best')
plt.show()
Upvotes: 0
Reputation: 87
I found a general solution which gave me the same answer, but also allows me to fit equations that are not linear by simply modifying the function.
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
#set y-intercept
b=0.115
#Fitting function
def func(x, m):
return (x*m)+b
#Experimental x and y data points
x_A1 = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
y_A1 = np.array([0.113, 0.116, 0.130, 0.150, 0.150, 0.160, 0.180, 0.210, 0.220, 0.260, 0.280])
#Plot experimental data points
plt.plot(x_A1, y_A1, 'bo', label='experimental')
#Perform the curve-fit
popt, pcov = curve_fit(func, x_A1, y_A1) #, initialGuess)
#print(popt)
#x values for the fitted function
x_A1_Fit = np.arange(x_A1[0], x_A1[-1], 0.1)
residuals = y_A1- func(x_A1, *popt)
ss_res = np.sum(residuals**2)
ss_tot = np.sum((y_A1-np.mean(y_A1))**2)
r_sq = 1 - (ss_res / ss_tot)
#Plot the fitted function
plt.plot(x_A1_Fit, func(x_A1_Fit, *popt), 'r--', label='fitted: m=%5.4f' % tuple(popt))
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
print ('r_sq=', "%.4f"%r_sq, 'm=', "%.4f"%popt, "b=", "%.4f"%b)
Upvotes: 1
Reputation: 2343
Subtract the y intercept you want to fix from your data and set fit_intercept=False
.
For example
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
x = np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).reshape(-1, 1)
y = np.array([0.113, 0.116, 0.130, 0.150, 0.150, 0.160, 0.180, 0.210, 0.220, 0.260, 0.280])
fig, ax = plt.subplots()
for fit, y_intercept in zip((True, False), (0.0, 0.115)):
regression = LinearRegression(fit_intercept=fit)
regression.fit(x, y - y_intercept)
r_sq = regression.score(x, y - y_intercept)
m = regression.coef_[0]
b = regression.intercept_ + y_intercept
print(f"Fit intercept: {regression.fit_intercept}")
print(f"r_sq: {r_sq:0.4f}\nm: {m:0.4f}\nb: {b:0.4f}")
ax.plot(x, y, "bo")
ax.plot(
x,
regression.predict(x) + y_intercept,
"r" + "--" * fit,
label=f"Fit Intercept: {regression.fit_intercept}",
)
ax.set_title("A")
ax.set_ylabel("X")
ax.set_xlabel("Y")
ax.legend(loc="best")
plt.show()
Which prints:
Fit intercept: True
r_sq: 0.9473
m: 0.0017
b: -0.0192
Fit intercept: False
r_sq: 0.9112
m: 0.0014
b: 0.0000
Upvotes: 3
Reputation: 143
A post on fit_intercept https://stackoverflow.com/questions/46779605
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
x=np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).reshape(-1,1)
y=np.array([0.113, 0.116, 0.130, 0.150, 0.150, 0.160, 0.180, 0.210, 0.220, 0.260, 0.280])
lr_fi_true = LinearRegression(fit_intercept=True)
lr_fi_false = LinearRegression(fit_intercept=False)
lr_fi_true.fit(x, y)
lr_fi_false.fit(x, y)
print('Intercept when fit_intercept=True : {:.5f}'.format(lr_fi_true.intercept_))
print('Intercept when fit_intercept=False : {:.5f}'.format(lr_fi_false.intercept_))
lr_fi_true_yhat = np.dot(x, lr_fi_true.coef_) + lr_fi_true.intercept_
lr_fi_false_yhat = np.dot(x, lr_fi_false.coef_) + lr_fi_false.intercept_
plt.scatter(x, y, label='Actual points')
plt.plot(x, lr_fi_true_yhat, 'r--', label='fit_intercept=True')
plt.plot(x, lr_fi_false_yhat, 'r-', label='fit_intercept=False')
plt.legend()
plt.vlines(0, 0, y.max())
plt.hlines(0, x.min(), x.max())
plt.show()
Intercept when fit_intercept=True : 0.09577
Intercept when fit_intercept=False : 0.00000
Upvotes: 0