Ladenkov Vladislav
Ladenkov Vladislav

Reputation: 1297

Logistic regression GD implementation in python

I'am implementing logistic regression in python with the regularazied loss function like this: enter image description here

But the gradient alghorithm works bad. Read the bold text first, please! Just paste the code cell by cell

import numpy as np, scipy as sp, sklearn as sl
from scipy import special as ss
from sklearn.base import ClassifierMixin, BaseEstimator
from sklearn.datasets import make_classification
import theano.tensor as T

Here is the loss function:(scipy is to "clip" the logorithm's arg near 1)

def lossf(w, X, y, l1, l2):
     w.resize((w.shape[0],1))
     y.resize((y.shape[0],1))

     lossf1 = np.sum(ss.log1p(1 + ss.expm1(np.multiply(-y, np.dot(X, w)))))
     lossf2 = l2 * (np.dot(np.transpose(w), w))
     lossf3 = l1 * sum(abs(w))
     lossf = np.float(lossf1 + lossf2 + lossf3)
     return lossf

Here is the gradient function:(??PROBLEM HERE?? -see the end)

def gradf(w, X, y, l1, l2):
    w.resize((w.shape[0],1))
    y.resize((y.shape[0],1))

    gradw1 = l2 * 2 * w 
    gradw2 = l1 * np.sign(w)
    gradw3 = np.multiply(-y,(2 + ss.expm1(np.multiply(-y, np.dot(X, w)))))
    gradw3 = gradw3 / (2 + (ss.expm1((np.multiply(-y, np.dot(X, w))))))
    gradw3 = np.sum(np.multiply(gradw3, X), axis=0)
    gradw3.resize(gradw3.shape[0],1)
    gradw = gradw1 + gradw2 + gradw3
    gradw.resize(gradw.shape[0],)
    return np.transpose(gradw)

Here is my LR class:

class LR(ClassifierMixin, BaseEstimator):
    def __init__(self, lr=0.0001, l1=0.1, l2=0.1, num_iter=100, verbose=0):
        self.l1 = l1
        self.l2 = l2
        self.w = None
        self.lr = lr
        self.verbose = verbose
        self.num_iter = num_iter

def fit(self, X, y):        
    n, d = X.shape 
    self.w = np.zeros(shape=(d,))
    for i in range(self.num_iter):
        g = gradf(self.w, X, y, self.l1, self.l2)
        g.resize((g.shape[0],1))
        self.w = self.w - g
        print "Loss: ", lossf(self.w, X, y, self.l1, self.l2)
    return self

def predict_proba(self, X):
    probs = 1/(2 + ss.expm1(np.dot(-X, self.w)))
    return probs 

def predict(self, X):
    probs = self.predict_proba(X)
    probs = np.sign(2 * probs - 1)
    probs.resize((probs.shape[0],))
    return probs 

Here are the tests:

X, y = make_classification(n_features=100, n_samples=100)
y = 2 * (y - 0.5)
clf = LR(lr=0.000001, l1=0.1, l2=0.1, num_iter=10, verbose=0)
clf = clf.fit(X, y)
yp = clf.predict(X)
yp.resize((100,1))
accuracy = int(sum(y == yp))/len(y)

Ooops. this doesnt converge. But if i replace my gradw3 with theno:

gradw3 = get_gradw3(w,X,y)

where:
w,X,y = T.matrices("wXy") 
logloss = T.sum(T.log1p(1 + T.expm1(-y* T.dot(X, w)))) 
get_gradw3 = theano.function([w,X,y],T.grad(logloss,w).reshape(w.shape))

it converges to 100% accuracy. That means, my gradw3 is implemented wrong, but i cant find a mistake. Greedly looking for help!

Upvotes: 1

Views: 417

Answers (1)

Ladenkov Vladislav
Ladenkov Vladislav

Reputation: 1297

Actually, i have finally made it work. I dont know, what exactly was the crucial change, but here's the extract of my changes:

  • replaced all np.multiply with *

  • Decreased learning rate and regulizers

  • Applied np.nan_to_num to exponents

So here is the final code:

def lossf(w, X, y, l1, l2):
    w.resize((w.shape[0],1))
    y.resize((y.shape[0],1))

    lossf1 = np.sum(ss.log1p(1 + np.nan_to_num(ss.expm1(-y * np.dot(X, w)))))
    lossf2 = l2 * (np.dot(np.transpose(w), w))
    lossf3 = l1 * sum(abs(w))
    lossf = np.float(lossf1 + lossf2 + lossf3)
    return lossf

def gradf(w, X, y, l1, l2):
    w.resize((w.shape[0],1))
    y.resize((y.shape[0],1))

    gradw1 = l2 * 2 * w 
    gradw2 = l1 * np.sign(w)
    gradw3 = -y * (1 + np.nan_to_num(ss.expm1(-y * np.dot(X, w))))
    gradw3 = gradw3 / (2 + np.nan_to_num(ss.expm1(-y * np.dot(X, w))))
    gradw3 = np.sum(gradw3 * X, axis=0)
    gradw3.resize(gradw3.shape[0],1)
    gradw = gradw1 + gradw2 + gradw3
    gradw.resize(gradw.shape[0],)
    return np.transpose(gradw)
class LR(ClassifierMixin, BaseEstimator):
    def __init__(self, lr=0.000001, l1=0.1, l2=0.1, num_iter=100, verbose=0):
        self.l1 = l1
        self.l2 = l2
        self.w = None
        self.lr = lr
        self.verbose = verbose
        self.num_iter = num_iter

    def fit(self, X, y):       
        n, d = X.shape 
        self.w = np.zeros(shape=(d,))
        for i in range(self.num_iter):
            print "\n", "Iteration ", i
            g = gradf(self.w, X, y, self.l1, self.l2)
            g.resize((g.shape[0],1))
            self.w = self.w - g
            print "Loss: ", lossf(self.w, X, y, self.l1, self.l2)
        return self

    def predict_proba(self, X):
        probs = 1/(2 + ss.expm1(np.dot(-X, self.w)))
        return probs 

    def predict(self, X):
        probs = self.predict_proba(X)
        probs = np.sign(2 * probs - 1)
        probs.resize((probs.shape[0],))
        return probs 

Upvotes: 1

Related Questions