ismail rachid
ismail rachid

Reputation: 19

theta values for gradient descent not coherent

i made a gradient descent code but it doesnt seem to work well

import numpy as np
from random import randint,random
import matplotlib . pyplot as plt


def calculh(theta, X):
    h = 0
    h+=theta[0]*X # w*X
    h+= theta[-1] # +b
    return h


def calculY(sigma, h) :
    return sigma(h) # sigma peut-etre tanh, signoide etc.


def erreurJ(theta, sigma):
    somme = 0
    somme = 1/4*(sigma(theta[1])**2+sigma(theta[0]+theta[1])**2)
    return somme


def gradient(X, Y, Ysol, sigmaprime, h):
    return ((Y-Ysol)*sigmaprime(h)*X ,(Y-Ysol)*sigmaprime(h)*1)
def grad(theta):
    w,b = theta[0],theta[1]
    #print(theta)
    return [2*b**3+3*b**2*w+3*b*w**2-2*b+w**3-w,b**3+3*b**2*w+3*b*w**2-b+w**3-w]
# *X correspond a 0 ou 1  : nos 2 entrées ; *1 correspond a derivee de b

def pasfixe(theta, eta, epsilon, X, Y, Ysol, sigma, sigmaprime, h):
    n=0
    while np.linalg.norm(gradient(X, Y, Ysol, sigmaprime, h)) > epsilon and n<10000 :
        for i in range(len(theta)) :
            theta[i] = theta[i] - eta*gradient(X, Y, Ysol, sigmaprime, h)[i]
            h = calculh(theta, X)
            Y = calculY(sigma, h)
            n+=1
            if theta[i]>100 : ### cas de divergence
                return [100,100],Y
    return theta,Y

sigma = lambda z : z**2-1
sigmaprime = lambda z : 2*z
eta = 0.1

X = 1
Ysol = 0
listeY = []
listetheta = []
lst = [[3*random()*(-1)**randint(0,1),3*random()*(-1)**randint(0,1)] for i in range(5000)]
nb = 0
for i in lst:
        nb+=1
        if nb%50 == 0:
            print(nb)
        theta = i[:]
        h = calculh(theta, X)
        Y = calculY(sigma, h)
        CalculTheta = pasfixe( theta, eta, 10**-4, X,Y, Ysol, sigma, sigmaprime, h)
        listetheta.append(CalculTheta[0])
        listeY.append(CalculTheta[1])


for i in range (len(listeY)):
          listeY[i] = round(listeY[i],2)
print (listeY)

for i in range (len(listetheta)):
      for j in range(2):
          listetheta[i][j] = round(listetheta[i][j],2)
print (listetheta)

for i in range(len(lst)):
    if [int(listetheta[i][0]),int(listetheta[i][1])] in [[-2,1]]:
        plt.plot(lst[i][0],lst[i][1],"bo")
    elif [int(listetheta[i][0]),int(listetheta[i][1])] in [[2,-1]]:
        plt.plot(lst[i][0],lst[i][1],"co")
    elif  [int(listetheta[i][0]),int(listetheta[i][1])] in [[0,-1]]:
        plt.plot(lst[i][0],lst[i][1],"go")
    elif  [int(listetheta[i][0]),int(listetheta[i][1])] in [[0,1]]:
        plt.plot(lst[i][0],lst[i][1],"mo")
    elif  int(listetheta[i][0])**2 +int(listetheta[i][1])**2 >= 10:
        plt.plot(lst[i][0],lst[i][1],"ro")

plt.show()

in the end i make a graph with the bias and weight values ans each point is colored in function of what the theta (weight,bias) value given at the beginning of the loop is converging to. the graph i am supposed to have

i tried to calculate the gradient myself but it didnt work as well. i am supposed to get a graph like this one

Upvotes: 1

Views: 31

Answers (1)

Gamze
Gamze

Reputation: 109

The changes are covered that gradient descent, correcting previous errors in the gradient calculation and how the parameters were updated. It now uses the correct analytical gradient, grad(theta), derived directly from the cost function (erreurJ), replacing the previous incorrect gradient() function, and removes an unnecessary inner loop, ensuring a single, vectorized parameter update in each iteration. These key changes—using the correct gradient and a streamlined update step—ensure the algorithm properly converges towards the minima of the cost function.

import numpy as np
from random import randint, random
import matplotlib.pyplot as plt

# Activation function (sigma) and its derivative (sigmaprime)
sigma = lambda z: z**2 - 1
sigmaprime = lambda z: 2 * z

# Cost function (erreurJ) -  !here some updates!
def erreurJ(theta, sigma):
    somme = 0
    somme = 1/4*(sigma(theta[1])**2+sigma(theta[0]+theta[1])**2)
    return somme

def grad(theta):
    w, b = theta[0], theta[1]
    return np.array([2*b**3+3*b**2*w+3*b*w**2-2*b+w**3- 
                     w,b**3+3*b**2*w+3*b*w**2-b+w**3-w])

# Gradient Descent (pasfixe)
def pasfixe(theta, eta, epsilon, sigma, sigmaprime): 
    n = 0
    theta = np.array(theta, dtype=np.float64)  # transform appropriate np.array
    while np.linalg.norm(grad(theta)) > epsilon and n < 10000:
        gradient = grad(theta)  
        theta = theta - eta * gradient 
        n += 1

        #Check divergence
        if np.any(np.abs(theta) > 100):
            return [100, 100]

    return theta


eta = 0.01  # Learning rate
epsilon = 1e-4 # Tolerance

#initial values
lst = [[3 * random() * (-1)**randint(0, 1), 3 * random() * (-1)**randint(0, 1)] for i in range(5000)]
listetheta = []
listeY = []

# Gradient Descent for each starting point
for i in lst:
    theta = i[:]  #Copy initial values
    CalculTheta = pasfixe(theta, eta, epsilon, sigma, sigmaprime) 
#gradient descent
    listetheta.append(CalculTheta)  #the new parameters
    listeY.append(erreurJ(CalculTheta, sigma))  #final cost function value


# Rounding
for i in range(len(listeY)):
    listeY[i] = round(listeY[i], 2)

for i in range(len(listetheta)):
    for j in range(2):
        listetheta[i][j] = round(listetheta[i][j], 2)

# Visualization
for i in range(len(lst)):
    if [int(listetheta[i][0]), int(listetheta[i][1])] in [[-2, 1]]:
        plt.plot(lst[i][0], lst[i][1], "bo")
    elif [int(listetheta[i][0]), int(listetheta[i][1])] in [[2, -1]]:
        plt.plot(lst[i][0], lst[i][1], "co")
    elif [int(listetheta[i][0]), int(listetheta[i][1])] in [[0, -1]]:
        plt.plot(lst[i][0], lst[i][1], "go")
    elif [int(listetheta[i][0]), int(listetheta[i][1])] in [[0, 1]]:
        plt.plot(lst[i][0], lst[i][1], "mo")
    elif int(listetheta[i][0])**2 + int(listetheta[i][1])**2 >= 10: #divergence zone
        plt.plot(lst[i][0], lst[i][1], "ro")

plt.xlabel("Initial w (Weight)")
plt.ylabel("Initial b (Bias)")
plt.title("Gradient Descent Convergence")
plt.show()

Upvotes: 1

Related Questions