Cost function is not decreasing in gradient descent implementation

Question

Implementing multiple linear regression in python from scratch. The cost after every epoch is increasing very rapidly, and finally overflowing. What is happening wrong? Is there any logical error or methodical error? Here is the code:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

class LinearRegression:

    def __init__(self):
        #initialize parameters with none values
        self.cost_history = None
        self.std_dev = None
        self.mean = None
        self.weights = None

    def set_mean_and_std_dev(self, X_data, axis=0):
        #mean and std_deviation of training_data 
        self.std_dev = np.array(np.std(X_data, axis = axis))
        self.mean = np.array(np.mean(X_data, axis = axis))
        print("Mean : ", self.mean)
        print("Standard deviation : ", self.std_dev)
        return

    def normalize(self, X_data):
        #normalizing the data
        X_data = (X_data - self.mean)/(self.std_dev+0.00001)
        return

    def cost(self, X, y):
        #squared error based cost
        return np.sum((np.dot(X, self.weights.T)-y)**2)

    def get_gradient(self, X, y, pred_y, learning_rate):
        grad = np.zeros(shape=(X.shape[1], ), dtype='float64')
        #print("X.shape : ", X.shape, " grad.shape : ", grad.shape, "pred_y.shape", pred_y.shape, "y.shape : ", y.shape)

        for ix in range(X.shape[0]):
            #for each example in X
            x_example = X[ix, :]
            error = (pred_y[ix] - y[ix])

            for jx in range(grad.shape[0]):
                #for each feature of X
                grad[jx] = grad[jx] + x_example[jx]*error
        for ix in range(grad.shape[0]):
            #divide by the number of examples
            grad[jx] = grad[jx]*learning_rate*(1.0/X.shape[0])
        return grad


    def gradient_descent(self, X, y, learning_rate=0.001, num_iterations=100):

        self.weights = np.zeros(shape=(X.shape[1],),  dtype='float64')
        for ix in range(X.shape[1]):
            #initialize weights with random values
            self.weights[ix] = np.random.rand()*100

        #initialize cost history
        self.cost_history = []

        for ix in range(num_iterations):
            pred_y = np.dot(X, self.weights.T)
            gradient = self.get_gradient(X=X, y=y, pred_y=pred_y, learning_rate=learning_rate)
            self.weights = self.weights - gradient
            cost = self.cost(X, y)
            self.cost_history.append(cost)

            if ix%10 == 0:
                print("(learning_rate/(X.shape[0]))*gradient) : ", gradient)            
                print("Cost at ", ix, " epoch is : ", cost)

    def predict(self, X):
        return np.dot(X, self.weights.T)

    def get_weights(self):
        return self.weights

#create a linear regression object
lr = LinearRegression()


# # Some random function generator
# y = 2.5 + 2*X1 - X2 - 3*X3 + 1.23*X4
def generate_data(X):
    y = np.zeros(shape=(X.shape[0], ), dtype='float64')
    for ix in range(X.shape[0]):
        y[ix] = 1.23 + 2.5*X[ix, 0] + 2*X[ix, 1] - X[ix, 2] - 3*X[ix, 3] 
    return y


X = np.zeros(shape=(300, 5), dtype='float64')
data_gen = [[np.random.rand()*107 for jx in range(4)] for ix in range(300)]
X[:, 1:] = np.array(data_gen, dtype='float64')

y = generate_data(X[:, 1:])

lr.set_mean_and_std_dev(X)
lr.normalize(X)

X[:, 0] = 1
print(X.shape, y.shape, X.dtype, y.dtype)
print(X[0], y[0])



X_train, X_test, y_train, y_test = X[:200], X[200:], y[:200], y[200:]
print(y_test)


lr.gradient_descent(X_train, y_train, 0.01, 500)
pred_y = lr.predict(X_test)

Cost function is not decreasing in gradient descent implementation

Answers (1)

Related Questions