Reputation: 21
I want to do a linear regression with python with two requirements:
intercept forced to zero
in the output I would like to have uncertainty on the slope parameter, as well as p-value, r-squared...
As far as I know, stats.linregress does the first requirement, and np.linalg.lstsq does the second. Can someone help me find the easiest way to do this please?
Thank you very much, Camille
Upvotes: 2
Views: 2537
Reputation: 2244
import numpy as np
from scipy.optimize import curve_fit
xdata = np.array([x values])
ydata = np.array([y values])
def func(x, a):
return a*x
popt, pcov = curve_fit(func, xdata, ydata)
residuals = ydata- func(xdata, *popt)
ss_res = np.sum(residuals**2)
ss_tot = np.sum((ydata-np.mean(ydata))**2)
r_squared = 1 - (ss_res / ss_tot)
dgr_free = len(xdata)-1
chi_sqr = sum([(y-func(x,*popt))**2/func(x,*popt) for x,y in zip(xdata,ydata)])
print(popt) # will print out your varibles in order, in this case just a
print(r_squared)
print(chi_sqr,dgr_free) # btw this is chi squared not p
the idear here is that we make a regression of the lieaner function without + b since b move the y axis intercept up and down, thus when that is et to 0 we get a linear regresion with intercept in (0,0)
A benefit to use scipy.curve_fit is also that you can make the regression for any formula - though r_squared are some what reduntant in curved regressions.
Upvotes: 1
Reputation: 4657
This example has the statistics requested in your question, and also plots the fitted function vs. the data.
from scipy.optimize import curve_fit
import numpy as np
import scipy.odr
import scipy.stats
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
xData = np.array([5.357, 5.797, 5.936, 6.161, 6.697, 6.731, 6.775, 8.442, 9.861])
yData = np.array([0.376, 0.874, 1.049, 1.327, 2.054, 2.077, 2.138, 4.744, 7.104])
def func(x,b0):
return b0 * x
initialParameters = numpy.array([np.mean(yData) / np.mean(xData)])
def f_wrapper_for_odr(beta, x): # parameter order for odr
return func(x, *beta)
fittedParameters, cov= curve_fit(func, xData, yData, p0=initialParameters)
model = scipy.odr.odrpack.Model(f_wrapper_for_odr)
data = scipy.odr.odrpack.Data(xData, yData)
myodr = scipy.odr.odrpack.ODR(data, model, beta0=fittedParameters, maxit=0)
myodr.set_job(fit_type=2)
fittedParameterstatistics = myodr.run()
df_e = len(xData) - len(fittedParameters) # degrees of freedom, error
cov_beta = fittedParameterstatistics.cov_beta # parameter covariance matrix from ODR
sd_beta = fittedParameterstatistics.sd_beta * fittedParameterstatistics.sd_beta
ci = []
t_df = scipy.stats.t.ppf(0.975, df_e)
ci = []
for i in range(len(fittedParameters)):
ci.append([fittedParameters[i] - t_df * fittedParameterstatistics.sd_beta[i], fittedParameters[i] + t_df * fittedParameterstatistics.sd_beta[i]])
tstat_beta = fittedParameters / fittedParameterstatistics.sd_beta # coeff t-statistics
pstat_beta = (1.0 - scipy.stats.t.cdf(np.abs(tstat_beta), df_e)) * 2.0 # coef. p-values
for i in range(len(fittedParameters)):
print('parameter:', fittedParameters[i])
print(' conf interval:', ci[i][0], ci[i][1])
print(' tstat:', tstat_beta[i])
print(' pstat:', pstat_beta[i])
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Upvotes: 2