mavi
mavi

Reputation: 1138

Xor gate with Backpropagation

I am trying to understand how backpropagation works. So I wrote a straight forward script to try to understand it before writing a generalized algorithm.

What the script is trying to do is to train an XOR gate. My neural network is very simple. 2 inputs, 2 hidden neurons, and 1 output. (Note that the bias are omitted for simplicity)

enter image description here

(for more information see the images attached at the end)

The problem is that after training the perceptron it doesn't work and I don't know where the problem is. It can be in my equations or in my implementation.

Code:

    def xor(self):
        print('xor')
        X = np.array([[1,1],[1,0],[0,1],[0,0]]) #X.shape = (4,2)
        y = np.array([0,1,1,0])
        w0 = np.array([[.9,.1],[.3,.5]]) #random weights layer0
        w1 = np.array([.8,.7]) #random wights layer1

        #forward pass
        youtput=[]
        for i in range(X.shape[0]):#X.shape = (4,2)
            #print('x0', X[i][0])
            #print('x1', X[i][1])
            h0 = self.sig(w0[0,0]*X[i][0] + w0[1,0]*X[i][1])
            h1 = self.sig(w0[0,1]*X[i][0] + w0[1,1]* X[i][1])
            y0 = self.sig(w1[0]* h0 + w1[1] * h1) # shape = (4,)
            youtput.append(y0)
            print('y0',y0)

            #backpropagation
            dey0 = -(y[i]-y0) # y[i] -> desired output | y0 -> output
            deW0_00 = dey0 * y0 * (1 - y0) * w1[0] * h0 * (1 - h0) * X[i][0]
            deW0_01 = dey0 * y0 * (1 - y0) * w1[1] * h1 * (1 - h1) * X[i][0]
            deW0_10 = dey0 * y0 * (1 - y0) * w1[0] * h0 * (1 - h0) * X[i][1]
            deW0_11 = dey0 * y0 * (1 - y0) * w1[1] * h1 * (1 - h1) * X[i][1]
            deW1_00 = dey0 * h0
            deW1_10 = dey0 * h1

            w0[0,0] = self.gradient(w0[0,0], deW0_00)
            w0[0,1] = self.gradient(w0[0,1], deW0_01)
            w0[1,0] = self.gradient(w0[1,0], deW0_10)
            w0[1,1] = self.gradient(w0[1,1], deW0_11)
            w1[0] = self.gradient(w1[0], deW1_00)
            w1[1] = self.gradient(w1[1], deW1_10)

            #print('print W0, ', w0)
            #print('print W1, ', w1)
        print('error -> ', self.error(y,youtput ))  
        #forward pass
        youtput2= []
        for i in range(X.shape[0]):#X.shape = (4,2)
            print('x0 =', X[i][0], ', x1 =', X[i][1])
            h0 = self.sig(w0[0,0]*X[i][0] + w0[1,0]*X[i][1])
            h1 = self.sig(w0[0,1]*X[i][0] + w0[1,1]* X[i][1])
            y0 = self.sig(w1[0]* h0 + w1[1] * h1)
            youtput2.append(y0)
            print('y0----->',y0)
        print('error -> ', self.error(y,youtput2 ))

    def gradient(self, w, w_derivative):
        alpha = .001
        for i in range(1000000):
            w = w - alpha * w_derivative
        return w

    def error(self, y, yhat):
        e = 0
        for i in range (y.shape[0]):
            e = e + .5 * (y[i]- yhat[i])**2
        return e 

    def sig(self,x):
         return 1 / (1 + math.exp(-x)) 

Result

PS C:\gitProjects\perceptron> python .\perceptron.py
xor
y0 0.7439839341840395
y0 0.49999936933995615
y0 0.4999996364775347
y0 7.228146514841657e-229
error ->  0.5267565442535
x0 = 1 , x1 = 1
y0-----> 0.49999999999999856
x0 = 1 , x1 = 0
y0-----> 0.4999993695274945
x0 = 0 , x1 = 1
y0-----> 0.49999963653435153
x0 = 0 , x1 = 0
y0-----> 7.228146514841657e-229
error ->  0.3750004969693411

The equations. enter image description here enter image description here enter image description here enter image description here

Upvotes: 3

Views: 1492

Answers (1)

Paul Denoyes
Paul Denoyes

Reputation: 315

Just changed the way you "loop", it seems to be working fine now (modified code hereunder).

I may have missed something but your backprop looks ok.

import numpy as np
import math

class perceptronmonocouche(object):
    def xor(self):
        print('xor')
        X = np.array([[1,1],[1,0],[0,1],[0,0]]) #X.shape = (4,2)
        y = np.array([0,1,1,0])
        w0 = np.array([[.9,.1],[.3,.5]]) #random weights layer0
        w1 = np.array([.8,.7]) #random wights layer1
        max_epochs = 10000
        epochs = 0
        agreed_convergence_error = 0.001
        error = 1
        decision_threshold = 0.5

        while epochs <= max_epochs and error > agreed_convergence_error:
            #forward pass
            epochs += 1
            youtput=[]
            for i in range(X.shape[0]):#X.shape = (4,2)
                #print('x0', X[i][0])
                #print('x1', X[i][1])
                h0 = self.sig(w0[0,0]*X[i][0] + w0[1,0]*X[i][1])
                h1 = self.sig(w0[0,1]*X[i][0] + w0[1,1]* X[i][1])
                y0 = self.sig(w1[0]* h0 + w1[1] * h1) # shape = (4,)
                youtput.append(y0)
                if epochs%1000 ==0:
                    print('y0',y0)
                    if y0 > decision_threshold:
                        prediction = 1
                    else:
                        prediction = 0
                    print('real value', y[i])
                    print('predicted value', prediction)

                #backpropagation
                dey0 = -(y[i]-y0) # y[i] -> desired output | y0 -> output            
                dew0_00 = dey0 * y0 * (1 - y0) * w1[0] * h0 * (1 - h0) * X[i][0]
                dew0_01 = dey0 * y0 * (1 - y0) * w1[1] * h1 * (1 - h1) * X[i][0]
                dew0_10 = dey0 * y0 * (1 - y0) * w1[0] * h0 * (1 - h0) * X[i][1]
                dew0_11 = dey0 * y0 * (1 - y0) * w1[1] * h1 * (1 - h1) * X[i][1]
                dew1_0 = dey0 * h0
                dew1_1 = dey0 * h1

                w0[0,0] = self.gradient(w0[0,0], dew0_00)
                w0[0,1] = self.gradient(w0[0,1], dew0_01)
                w0[1,0] = self.gradient(w0[1,0], dew0_10)
                w0[1,1] = self.gradient(w0[1,1], dew0_11)
                w1[0] = self.gradient(w1[0], dew1_0)
                w1[1] = self.gradient(w1[1], dew1_1)

                #print('print W0, ', w0)
                #print('print W1, ', w1)
            error = self.error(y,youtput )
            if epochs%1000 ==0:
                print('error -> ', error)

    def gradient(self, w, w_derivative):
        alpha = .2
        w = w - alpha * w_derivative
        return w

    def error(self, y, yhat):
        e = 0
        for i in range (y.shape[0]):
            e = e + .5 * (y[i]- yhat[i])**2
        return e 

    def sig(self,x):
         return 1 / (1 + math.exp(-x))

p = perceptronmonocouche()
p.xor()

Result

y0 0.05892656406522486
real value 0
predicted value 0
y0 0.9593864604895951
real value 1
predicted value 1
y0 0.9593585562506973
real value 1
predicted value 1
y0 0.03119936553811551
real value 0
predicted value 0
error ->  0.003873463452052477

Note : Here it works fine without the bias, however I'd recommend anytime you can to let the bias for the propagation.

Upvotes: 2

Related Questions