Reputation: 786
I'm trying to implement a stochastic gradient descent with two constraints, therefore cannot use scikit-learn. Unfortunately, I'm already struggling with the regular SGD without the two constraints. The loss (squared loss) on the training set drops for some iterations, but starts to increase after some time as shown in the pictures. These are the functions I use:
def loss_prime_simple(w,node,feature,data):
x = data[3]
y = data[2]
x_f = x[node][feature]
y_node = y[node]
ret = (y_node - w[feature] * x_f) * (-x_f)
return ret
def update_weights(w,data,predecs,children,node, learning_rate):
len_features = len(data[3][0])
w_new = np.zeros(len_features)
for feature_ in range(len_features):
w_new[feature_] = loss_prime_simple(w,node,feature_,data)
return w - learning_rate * w_new
def loss_simple(w,data):
y_p = data[2]
x = data[3]
return ((y_p - np.dot(w,np.array(x).T)) ** 2).sum()
This shows the loss on the training set with two different learning rates (0.001, 0.0001) http://postimg.org/image/43nbmh8x5/
Can anyone find a mistake or has an advice how to debug this? Thanks
EDIT:
As lejlot pointed out, it would be good to have the data. Here is the data i'm using for x (single sample): http://textuploader.com/5x0f1
y=2
This gives a loss of this: http://postimg.org/image/o9d97kt9v/
The updated code:
def loss_prime_simple(w,node,feature,data):
x = data[3]
y = data[2]
x_f = x[node][feature]
y_node = y[node]
return -(y_node - w[feature] * x_f) * x_f
def update_weights(w,data,predecs,children,node, learning_rate):
len_features = len(data[3][0])
w_new = np.zeros(len_features)
for feature_ in range(len_features):
w_new[feature_] = loss_prime_simple(w,node,feature_,data)
return w - learning_rate * w_new
def loss_simple2(w,data):
y_p = data[2]
x = data[3]
return ((y_p - np.dot(w,np.array(x).T)) ** 2).sum()
import numpy as np
X = [#put array from http://textuploader.com/5x0f1 here]
y = [2]
data = None, None, y, X
w = np.random.rand(4096)
a = [ loss_simple2(w, data) ]
for _ in range(200):
for j in range(X.shape[0]):
w = update_weights(w,data,None,None,j, 0.0001)
a.append( loss_simple2(w, data) )
from matplotlib import pyplot as plt
plt.figure()
plt.plot(a)
plt.show()
Upvotes: 3
Views: 1349
Reputation: 786
The problem was that I updated the weights with instead of
So this works:
def update_weights(w,x,y, learning_rate):
inner_product = 0.0
for f_ in range(len(x)):
inner_product += (w[f_] * x[f_])
dloss = inner_product - y
for f_ in range(len(x)):
w[f_] += (learning_rate * (-x[f_] * dloss))
return w
Upvotes: 1
Reputation: 66805
Main error that one can notice is that you reshape
instead of transpose
, compare:
>>> import numpy as np
>>> X = np.array(range(10)).reshape(2,-1)
>>> X
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
>>> X.reshape(-1, 2)
array([[0, 1],
[2, 3],
[4, 5],
[6, 7],
[8, 9]])
>>> X.T
array([[0, 5],
[1, 6],
[2, 7],
[3, 8],
[4, 9]])
>>> X.reshape(-1, 2) == X.T
array([[ True, False],
[False, False],
[False, False],
[False, False],
[False, True]], dtype=bool)
Next thing which looks bad is calling sum( array ), you should rather call array.sum()
>>> import numpy as np
>>> x = np.array(range(10)).reshape(2, 5)
>>> x
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
>>> sum(x)
array([ 5, 7, 9, 11, 13])
>>> x.sum()
45
After this, it works just fine
def loss_prime_simple(w,node,feature,data):
x = data[3]
y = data[2]
x_f = x[node][feature]
y_node = y[node]
ret = w[feature]
return -(y_node - w[feature] * x_f) * x_f
def update_weights(w,data,predecs,children,node, learning_rate):
len_features = len(data[3][0])
w_new = np.zeros(len_features)
for feature_ in range(len_features):
w_new[feature_] = loss_prime_simple(w,node,feature_,data)
return w - learning_rate * w_new
def loss_simple(w,data):
y_p = data[2]
x = data[3]
return ((y_p - np.dot(w,np.array(x).T)) ** 2).sum()
import numpy as np
X = np.random.randn(1000, 3)
y = np.random.randn(1000)
data = None, None, y, X
w = np.array([1,3,3])
loss = [loss_simple(w, data)]
for _ in range(20):
for j in range(X.shape[0]):
w = update_weights(w, data, None, None, j, 0.001)
loss.append(loss_simple(w, data))
from matplotlib import pyplot as plt
plt.figure()
plt.plot(loss)
plt.show()
Upvotes: 1