Reputation: 29
Linear regression with gradient descent is giving different result on the same dataset compared to sklearn.
Want to know why is that so. Is it the problem of local minima
The dataset is as follows
ht wt
63 127
64 121
66 142
69 157
69 162
71 156
71 169
72 165
73 181
75 208
Sklearn is computing intercept as -266.53439537 and coefficient as 6.13758146
whereas gradient descent is giving intercept as -1.49087014 and coeff as 2.3239637
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
def cost (m,b , data_size):
x = IN
y = OUT
totalError = 0
for i in range (data_size):
x = IN[i]
y = OUT[i]
totalError += ((m*x + b) - y) ** 2
return totalError/ float(data_size)
def compute_gradient(X , Y, theta_1 ,theta_0 , N, learning_rate):
gradient_theta_0 = 0
gradient_theta_1 = 0
#print (X.shape, Y.shape, N)
Y_pred = theta_1*X + theta_0
gradient_theta_1 = ((-2/N) * sum(X * (Y - Y_pred)))
gradient_theta_0 = ((-2/N) * sum(Y - Y_pred))
#print (gradient_theta_0 , gradient_theta_1, gradient_theta_0 *
learning_rate, gradient_theta_1 * learning_rate)
new_theta_0 = theta_0 - (gradient_theta_0 * learning_rate)
new_theta_1 = theta_1 - (gradient_theta_1 * learning_rate)
return (new_theta_1,new_theta_0)
IN = np.array([63 , 64, 66, 69, 69, 71, 71, 72, 73, 75])
OUT = np.array([127,121,142,157,162,156,169,165,181,208])
X = IN[:,np.newaxis]
Y = OUT[:,np.newaxis]
iterations = 10000
initial_theta_0 = 0
initial_theta_1 = 0
learning_rate = 0.00001
theta_0 = initial_theta_0
theta_1 = initial_theta_1
fig,ax = plt.subplots(figsize=(12,8))
cost_history = []
for i in range (iterations):
#print ("iteration {} m {} b {}".format(i, theta_1, theta_0))
[theta_1, theta_0] = compute_gradient(X , Y , theta_1 ,theta_0,
data_size, learning_rate)
totalError = cost (theta_1,theta_0, data_size)
#print (totalError)
cost_history.append (totalError)
ax.plot(range(iterations),cost_history,'b.')
print ("iteration {} m {} b {}".format(i, theta_1, theta_0))
reg_line = [(theta_1 * x) + theta_0 for x in IN]
lm = LinearRegression()
lm.fit(X, Y)
print ("SKLEARN coeff {}".format(lm.coef_))
print ("SKLEARN intercept {}".format(lm.intercept_))
#reg_line = [(lm.coef_[0] * x) + lm.intercept_ for x in IN]
ax3.plot (IN, reg_line , color='red')
plt.show()
print ("SKLEARN coeff {}".format(lm.coef_))
print ("SKLEARN intercept {}".format(lm.intercept_))
RESULTS
iteration 99999 m [2.3239637] b [-1.49087014]
SKLEARN coeff [[6.13758146]]
SKLEARN intercept [-266.53439537]
Upvotes: 0
Views: 196
Reputation: 529
You have taken bad initial conditions (0,0) and fallen into a local minimum close to that point. More intuitive initial conditions are based on maxima and minima of ht and wt, i.e.
initial_theta_0 = np.min(Y)+np.min(X)*(np.min(Y)-np.max(Y))/(np.max(X)-np.min(X)) #-335.75
initial_theta_1 = (np.max(Y)-np.min(Y))/(np.max(X)-np.min(X)) # 7.25
#initial_theta_0 = 121+63*(121-208)/(75-63) # -335.75
#initial_theta_1 = (208-121)/(75-63) # 7.25
Upvotes: 1