Reputation: 19
i made a gradient descent code but it doesnt seem to work well
import numpy as np
from random import randint,random
import matplotlib . pyplot as plt
def calculh(theta, X):
h = 0
h+=theta[0]*X # w*X
h+= theta[-1] # +b
return h
def calculY(sigma, h) :
return sigma(h) # sigma peut-etre tanh, signoide etc.
def erreurJ(theta, sigma):
somme = 0
somme = 1/4*(sigma(theta[1])**2+sigma(theta[0]+theta[1])**2)
return somme
def gradient(X, Y, Ysol, sigmaprime, h):
return ((Y-Ysol)*sigmaprime(h)*X ,(Y-Ysol)*sigmaprime(h)*1)
def grad(theta):
w,b = theta[0],theta[1]
#print(theta)
return [2*b**3+3*b**2*w+3*b*w**2-2*b+w**3-w,b**3+3*b**2*w+3*b*w**2-b+w**3-w]
# *X correspond a 0 ou 1 : nos 2 entrées ; *1 correspond a derivee de b
def pasfixe(theta, eta, epsilon, X, Y, Ysol, sigma, sigmaprime, h):
n=0
while np.linalg.norm(gradient(X, Y, Ysol, sigmaprime, h)) > epsilon and n<10000 :
for i in range(len(theta)) :
theta[i] = theta[i] - eta*gradient(X, Y, Ysol, sigmaprime, h)[i]
h = calculh(theta, X)
Y = calculY(sigma, h)
n+=1
if theta[i]>100 : ### cas de divergence
return [100,100],Y
return theta,Y
sigma = lambda z : z**2-1
sigmaprime = lambda z : 2*z
eta = 0.1
X = 1
Ysol = 0
listeY = []
listetheta = []
lst = [[3*random()*(-1)**randint(0,1),3*random()*(-1)**randint(0,1)] for i in range(5000)]
nb = 0
for i in lst:
nb+=1
if nb%50 == 0:
print(nb)
theta = i[:]
h = calculh(theta, X)
Y = calculY(sigma, h)
CalculTheta = pasfixe( theta, eta, 10**-4, X,Y, Ysol, sigma, sigmaprime, h)
listetheta.append(CalculTheta[0])
listeY.append(CalculTheta[1])
for i in range (len(listeY)):
listeY[i] = round(listeY[i],2)
print (listeY)
for i in range (len(listetheta)):
for j in range(2):
listetheta[i][j] = round(listetheta[i][j],2)
print (listetheta)
for i in range(len(lst)):
if [int(listetheta[i][0]),int(listetheta[i][1])] in [[-2,1]]:
plt.plot(lst[i][0],lst[i][1],"bo")
elif [int(listetheta[i][0]),int(listetheta[i][1])] in [[2,-1]]:
plt.plot(lst[i][0],lst[i][1],"co")
elif [int(listetheta[i][0]),int(listetheta[i][1])] in [[0,-1]]:
plt.plot(lst[i][0],lst[i][1],"go")
elif [int(listetheta[i][0]),int(listetheta[i][1])] in [[0,1]]:
plt.plot(lst[i][0],lst[i][1],"mo")
elif int(listetheta[i][0])**2 +int(listetheta[i][1])**2 >= 10:
plt.plot(lst[i][0],lst[i][1],"ro")
plt.show()
in the end i make a graph with the bias and weight values ans each point is colored in function of what the theta (weight,bias) value given at the beginning of the loop is converging to. the graph i am supposed to have
i tried to calculate the gradient myself but it didnt work as well. i am supposed to get a graph like this one
Upvotes: 1
Views: 31
Reputation: 109
The changes are covered that gradient descent, correcting previous errors in the gradient calculation and how the parameters were updated. It now uses the correct analytical gradient, grad(theta), derived directly from the cost function (erreurJ), replacing the previous incorrect gradient() function, and removes an unnecessary inner loop, ensuring a single, vectorized parameter update in each iteration. These key changes—using the correct gradient and a streamlined update step—ensure the algorithm properly converges towards the minima of the cost function.
import numpy as np
from random import randint, random
import matplotlib.pyplot as plt
# Activation function (sigma) and its derivative (sigmaprime)
sigma = lambda z: z**2 - 1
sigmaprime = lambda z: 2 * z
# Cost function (erreurJ) - !here some updates!
def erreurJ(theta, sigma):
somme = 0
somme = 1/4*(sigma(theta[1])**2+sigma(theta[0]+theta[1])**2)
return somme
def grad(theta):
w, b = theta[0], theta[1]
return np.array([2*b**3+3*b**2*w+3*b*w**2-2*b+w**3-
w,b**3+3*b**2*w+3*b*w**2-b+w**3-w])
# Gradient Descent (pasfixe)
def pasfixe(theta, eta, epsilon, sigma, sigmaprime):
n = 0
theta = np.array(theta, dtype=np.float64) # transform appropriate np.array
while np.linalg.norm(grad(theta)) > epsilon and n < 10000:
gradient = grad(theta)
theta = theta - eta * gradient
n += 1
#Check divergence
if np.any(np.abs(theta) > 100):
return [100, 100]
return theta
eta = 0.01 # Learning rate
epsilon = 1e-4 # Tolerance
#initial values
lst = [[3 * random() * (-1)**randint(0, 1), 3 * random() * (-1)**randint(0, 1)] for i in range(5000)]
listetheta = []
listeY = []
# Gradient Descent for each starting point
for i in lst:
theta = i[:] #Copy initial values
CalculTheta = pasfixe(theta, eta, epsilon, sigma, sigmaprime)
#gradient descent
listetheta.append(CalculTheta) #the new parameters
listeY.append(erreurJ(CalculTheta, sigma)) #final cost function value
# Rounding
for i in range(len(listeY)):
listeY[i] = round(listeY[i], 2)
for i in range(len(listetheta)):
for j in range(2):
listetheta[i][j] = round(listetheta[i][j], 2)
# Visualization
for i in range(len(lst)):
if [int(listetheta[i][0]), int(listetheta[i][1])] in [[-2, 1]]:
plt.plot(lst[i][0], lst[i][1], "bo")
elif [int(listetheta[i][0]), int(listetheta[i][1])] in [[2, -1]]:
plt.plot(lst[i][0], lst[i][1], "co")
elif [int(listetheta[i][0]), int(listetheta[i][1])] in [[0, -1]]:
plt.plot(lst[i][0], lst[i][1], "go")
elif [int(listetheta[i][0]), int(listetheta[i][1])] in [[0, 1]]:
plt.plot(lst[i][0], lst[i][1], "mo")
elif int(listetheta[i][0])**2 + int(listetheta[i][1])**2 >= 10: #divergence zone
plt.plot(lst[i][0], lst[i][1], "ro")
plt.xlabel("Initial w (Weight)")
plt.ylabel("Initial b (Bias)")
plt.title("Gradient Descent Convergence")
plt.show()
Upvotes: 1