Rest1ve
Rest1ve

Reputation: 105

Why is Loss of SGD for a dataset is not matching the pytorch code with the scratch python code for linear regression?

I'm trying to implement Multiple Linear regression on the wine dataset. But when I compare the results of Pytorch with scratch code of Python the losses are not coming same.

My Scratch Code:

Functions:

def yinfer(X, beta):
  return beta[0] + np.dot(X,beta[1:]) 

def cost(X, Y, beta):
  sum = 0
  m = len(Y)
  for i in range(m): 
    sum = sum + ( yinfer(X[i],beta) - Y[i])*(yinfer(X[i],beta) - Y[i])
  return  sum/(1.0*m)

Main Code:

alpha = 0.005
b=[0,0.04086357 ,-0.02831656  ,0.09622949 ,-0.15162516  ,0.60188454  ,0.47528714,
  -0.6066466  ,-0.22995654 ,-0.58388734  ,0.20954669 ,-0.67851365]
beta = np.array(b)
print(beta)
iterations = 1000
arr_cost = np.zeros((iterations,2))
m = len(Y)
temp_beta = np.zeros(12)
for i in range(iterations):
  for k in range(m): 
        temp_beta[0] =  yinfer(X[k,:], beta) - Y[k]
        temp_beta[1:] = (yinfer(X[k,:], beta) - Y[k])*X[k,:]
        beta = beta - alpha*temp_beta/(1.0*m)    #(m*np.linalg.norm(temp_beta))
  arr_cost[i] = [i,cost(X,Y,beta)]
  #print(cost(X,Y,beta))
plt.scatter(arr_cost[0:iterations,0], arr_cost[0:iterations,1])

I have used same weights that were used in Pytorch code

My Pytorch code:

class LinearRegression(nn.Module):
  def __init__(self,n_input_features):
    super(LinearRegression,self).__init__()
    self.linear=nn.Linear(n_input_features,1)
    # self.linear.weight.data=b.view(1,-1)
    self.linear.bias.data.fill_(0.0)
    nn.init.xavier_uniform_(self.linear.weight)
    # nn.init.xavier_normal_(self.linear.bias)
  def forward(self,x):
    y_predicted=self.linear(x)
    return y_predicted
model=LinearRegression(11)
criterion = nn.MSELoss()
num_epochs=1000
for epoch in range(num_epochs):
  for x,y in train_data:
    y_pred=model(x)
    loss=criterion(y,y_pred)
    # print(loss)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

My DataLoader:

class Data(Dataset):
    def __init__(self):
        self.x=x_train
        self.y=y_train
        self.len=self.x.shape[0]
    def __getitem__(self,index):
      return self.x[index],self.y[index]
    def __len__(self):
        return self.len
dataset=Data()
train_data=DataLoader(dataset=dataset,batch_size=1,shuffle=False)

The graph comparing both the losses

Can someone please tell me why is that happening or is there any faults in my code?

Upvotes: 5

Views: 116

Answers (1)

joeDiHare
joeDiHare

Reputation: 679

There were a couple of tweaks necessary to the code. I also had to create data and an optimizer, which you hadn't provided. With the changes below, both methods produce a learning function. Of course optimal hyperparameters such as alpha or iterations might be different between the two approaches, and you might need to find them separately.

# Create data:
import sklearn
X, Y = sklearn.datasets.load_diabetes(return_X_y=True)
# Adding a random column to match your data shape:
X = np.hstack((X, np.random.randn(X.shape[0], 1)))

iterations = 500

################
# Python version
def yinfer(X, beta):
    return beta[0] + np.dot(X,beta[1:]) 

def cost(X, Y, beta):
    sum = 0
    m = len(Y)
    for i in range(m): 
        sum = sum + ( yinfer(X[i], beta) - Y[i])*(yinfer(X[i], beta) - Y[i])
    return  sum/(1.0*m)

beta = np.array([0,0.04086357 ,-0.02831656  ,0.09622949 ,-0.15162516  ,0.60188454  ,0.47528714,
  -0.6066466  ,-0.22995654 ,-0.58388734  ,0.20954669 ,-0.67851365])
arr_cost = []
m = len(Y)
alpha = 0.1
temp_beta = np.zeros(12)
for i in range(iterations):
    for k in range(m): 
        temp_beta[0] =  yinfer(X[k,:], beta) - Y[k]
        temp_beta[1:] = (yinfer(X[k,:], beta) - Y[k])*X[k,:]
        beta = beta - alpha*temp_beta/(1.0*m)
    arr_cost.append(cost(X,Y,beta))

#################
# Pytorch version
from torch import nn
from torch import optim

class LinearRegression(nn.Module):
    def __init__(self,n_input_features):
        super(LinearRegression,self).__init__()
        self.linear=nn.Linear(n_input_features,1)
        self.linear.bias.data.fill_(0.0)
        nn.init.xavier_uniform_(self.linear.weight)
    def forward(self,x):
        y_predicted=self.linear(x)
        return y_predicted

class Data(Dataset):
    def __init__(self, x_train, y_train):
        self.x=x_train
        self.y=y_train
        self.len=self.x.shape[0]
    def __getitem__(self,index):
        return self.x[index],self.y[index]
    def __len__(self):
        return self.len
train_data=DataLoader(dataset=Data(X, Y),batch_size=1,shuffle=False)

criterion = nn.MSELoss()
model=LinearRegression(11)
optimizer = optim.SGD(model.parameters(), lr=0.01)

loss_vals = []  # store results
for epoch in range(iterations):
    for x, y in train_data:
        x, y = x.float(), y.float()
        y_pred=model.forward(x)
        loss=criterion(y, y_pred)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    loss_vals.append(float(loss))
    
##############
# Plot results
f, ax = plt.subplots(1,1, figsize=(20,5))
ax.plot(range(1, iterations+1), arr_cost, label='python')
ax.plot(range(1, iterations+1), loss_vals, label='torch')
ax.legend(); ax.set_xlabel('epochs'); ax.set_xlabel('loss');

Upvotes: 1

Related Questions