Linear regression from scratch

Question

So I am following along a youtube video showing how to setup the linear regression python code from scratch with gradient descent. In the video, the person initialized the regression with using X and y values. I am trying to apply the same code to a csv file. Here's is what the code looks like:

import numpy as np
import pandas as pd

class LinearRegression():
    def __init__(self):
        self.learning_rate = 0.001
        self.total_iterations = 10000

    def y_hat(self, X, w):
        return np.dot(w.T, X)

    def loss(self, yhat, y):
        L =1/self.m * np.sum(np.power(yhat-y, 2))
        return L

    def gradient_descent(self, w, X, y, yhat):
        dldW = np.dot(X, (yhat - y).T)

        w = w - self.learning_rate * dldW
        return w

    def main(self, X, y):
        x1 = np.ones((1, X.shape[1]))
        x = np.append(X, x1, axis=0)

        self.m = X.shape[1]
        self.n = X.shape[0]

        w = np.zeros((self.n, 1))

        for it in range(self.total_iterations+1):
            yhat = self.y_hat(X, w)
            loss = self.loss(yhat, y)

            if it % 2000 == 0:
                print(f'Cost at iteration {it} is {loss}')


            w = self.gradient_descent(w, X, y, yhat)

        return w


if __name__ == '__main__':
    #X = np.random.rand(1, 500)
    #y = 3 * X + np.random.randn(1, 500) * 0.1
    data = pd.read_csv('/Users/brasilgu/Downloads/student (1) 2/student-mat.csv', sep=";")
    X = data['G1'].values
    y = data['G2'].values
    regression = LinearRegression()
    w = regression.main(X, y)

I am getting the following error


Traceback (most recent call last):
  File "/Users/brasilgu/PycharmProjects/LinReg2/main.py", line 51, in 
    w = regression.main(X, y)
  File "/Users/brasilgu/PycharmProjects/LinReg2/main.py", line 23, in main
    x1 = np.ones((1, X.shape[1]))
IndexError: tuple index out of range

Linear regression from scratch

Answers (0)

Related Questions