Reputation: 23
So I am following along a youtube video showing how to setup the linear regression python code from scratch with gradient descent. In the video, the person initialized the regression with using X and y values. I am trying to apply the same code to a csv file. Here's is what the code looks like:
import numpy as np
import pandas as pd
class LinearRegression():
def __init__(self):
self.learning_rate = 0.001
self.total_iterations = 10000
def y_hat(self, X, w):
return np.dot(w.T, X)
def loss(self, yhat, y):
L =1/self.m * np.sum(np.power(yhat-y, 2))
return L
def gradient_descent(self, w, X, y, yhat):
dldW = np.dot(X, (yhat - y).T)
w = w - self.learning_rate * dldW
return w
def main(self, X, y):
x1 = np.ones((1, X.shape[1]))
x = np.append(X, x1, axis=0)
self.m = X.shape[1]
self.n = X.shape[0]
w = np.zeros((self.n, 1))
for it in range(self.total_iterations+1):
yhat = self.y_hat(X, w)
loss = self.loss(yhat, y)
if it % 2000 == 0:
print(f'Cost at iteration {it} is {loss}')
w = self.gradient_descent(w, X, y, yhat)
return w
if __name__ == '__main__':
#X = np.random.rand(1, 500)
#y = 3 * X + np.random.randn(1, 500) * 0.1
data = pd.read_csv('/Users/brasilgu/Downloads/student (1) 2/student-mat.csv', sep=";")
X = data['G1'].values
y = data['G2'].values
regression = LinearRegression()
w = regression.main(X, y)
I am getting the following error
Traceback (most recent call last):
File "/Users/brasilgu/PycharmProjects/LinReg2/main.py", line 51, in <module>
w = regression.main(X, y)
File "/Users/brasilgu/PycharmProjects/LinReg2/main.py", line 23, in main
x1 = np.ones((1, X.shape[1]))
IndexError: tuple index out of range
Upvotes: 0
Views: 80