Reputation: 1297
I'am implementing logistic regression in python with the regularazied loss function like this:
But the gradient alghorithm works bad. Read the bold text first, please! Just paste the code cell by cell
import numpy as np, scipy as sp, sklearn as sl
from scipy import special as ss
from sklearn.base import ClassifierMixin, BaseEstimator
from sklearn.datasets import make_classification
import theano.tensor as T
Here is the loss function:(scipy is to "clip" the logorithm's arg near 1
)
def lossf(w, X, y, l1, l2):
w.resize((w.shape[0],1))
y.resize((y.shape[0],1))
lossf1 = np.sum(ss.log1p(1 + ss.expm1(np.multiply(-y, np.dot(X, w)))))
lossf2 = l2 * (np.dot(np.transpose(w), w))
lossf3 = l1 * sum(abs(w))
lossf = np.float(lossf1 + lossf2 + lossf3)
return lossf
Here is the gradient function:(??PROBLEM HERE?? -see the end)
def gradf(w, X, y, l1, l2):
w.resize((w.shape[0],1))
y.resize((y.shape[0],1))
gradw1 = l2 * 2 * w
gradw2 = l1 * np.sign(w)
gradw3 = np.multiply(-y,(2 + ss.expm1(np.multiply(-y, np.dot(X, w)))))
gradw3 = gradw3 / (2 + (ss.expm1((np.multiply(-y, np.dot(X, w))))))
gradw3 = np.sum(np.multiply(gradw3, X), axis=0)
gradw3.resize(gradw3.shape[0],1)
gradw = gradw1 + gradw2 + gradw3
gradw.resize(gradw.shape[0],)
return np.transpose(gradw)
Here is my LR class:
class LR(ClassifierMixin, BaseEstimator):
def __init__(self, lr=0.0001, l1=0.1, l2=0.1, num_iter=100, verbose=0):
self.l1 = l1
self.l2 = l2
self.w = None
self.lr = lr
self.verbose = verbose
self.num_iter = num_iter
def fit(self, X, y):
n, d = X.shape
self.w = np.zeros(shape=(d,))
for i in range(self.num_iter):
g = gradf(self.w, X, y, self.l1, self.l2)
g.resize((g.shape[0],1))
self.w = self.w - g
print "Loss: ", lossf(self.w, X, y, self.l1, self.l2)
return self
def predict_proba(self, X):
probs = 1/(2 + ss.expm1(np.dot(-X, self.w)))
return probs
def predict(self, X):
probs = self.predict_proba(X)
probs = np.sign(2 * probs - 1)
probs.resize((probs.shape[0],))
return probs
Here are the tests:
X, y = make_classification(n_features=100, n_samples=100)
y = 2 * (y - 0.5)
clf = LR(lr=0.000001, l1=0.1, l2=0.1, num_iter=10, verbose=0)
clf = clf.fit(X, y)
yp = clf.predict(X)
yp.resize((100,1))
accuracy = int(sum(y == yp))/len(y)
Ooops. this doesnt converge. But if i replace my gradw3 with theno:
gradw3 = get_gradw3(w,X,y)
where:
w,X,y = T.matrices("wXy")
logloss = T.sum(T.log1p(1 + T.expm1(-y* T.dot(X, w))))
get_gradw3 = theano.function([w,X,y],T.grad(logloss,w).reshape(w.shape))
it converges to 100% accuracy. That means, my gradw3 is implemented wrong, but i cant find a mistake. Greedly looking for help!
Upvotes: 1
Views: 417
Reputation: 1297
Actually, i have finally made it work. I dont know, what exactly was the crucial change, but here's the extract of my changes:
replaced all np.multiply
with *
Decreased learning rate and regulizers
np.nan_to_num
to exponentsSo here is the final code:
def lossf(w, X, y, l1, l2):
w.resize((w.shape[0],1))
y.resize((y.shape[0],1))
lossf1 = np.sum(ss.log1p(1 + np.nan_to_num(ss.expm1(-y * np.dot(X, w)))))
lossf2 = l2 * (np.dot(np.transpose(w), w))
lossf3 = l1 * sum(abs(w))
lossf = np.float(lossf1 + lossf2 + lossf3)
return lossf
def gradf(w, X, y, l1, l2):
w.resize((w.shape[0],1))
y.resize((y.shape[0],1))
gradw1 = l2 * 2 * w
gradw2 = l1 * np.sign(w)
gradw3 = -y * (1 + np.nan_to_num(ss.expm1(-y * np.dot(X, w))))
gradw3 = gradw3 / (2 + np.nan_to_num(ss.expm1(-y * np.dot(X, w))))
gradw3 = np.sum(gradw3 * X, axis=0)
gradw3.resize(gradw3.shape[0],1)
gradw = gradw1 + gradw2 + gradw3
gradw.resize(gradw.shape[0],)
return np.transpose(gradw)
class LR(ClassifierMixin, BaseEstimator):
def __init__(self, lr=0.000001, l1=0.1, l2=0.1, num_iter=100, verbose=0):
self.l1 = l1
self.l2 = l2
self.w = None
self.lr = lr
self.verbose = verbose
self.num_iter = num_iter
def fit(self, X, y):
n, d = X.shape
self.w = np.zeros(shape=(d,))
for i in range(self.num_iter):
print "\n", "Iteration ", i
g = gradf(self.w, X, y, self.l1, self.l2)
g.resize((g.shape[0],1))
self.w = self.w - g
print "Loss: ", lossf(self.w, X, y, self.l1, self.l2)
return self
def predict_proba(self, X):
probs = 1/(2 + ss.expm1(np.dot(-X, self.w)))
return probs
def predict(self, X):
probs = self.predict_proba(X)
probs = np.sign(2 * probs - 1)
probs.resize((probs.shape[0],))
return probs
Upvotes: 1