Reputation: 79
I tried to implement a simple demo that gets a polynomial regression, but the linear model's loss fails to decrease.
I am confused about where I went wrong.
If I trained the model one sample(batch size = 1) each time, it works fine. but when I feed the model with many samples a time, the loss increase and get inf.
import numpy as np
import torch
import math
from matplotlib import pyplot as plt
def rand_series(size):
x = np.linspace(-100, 100, size)
np.random.shuffle(x)
base_y = 20 * np.sin(2 * math.pi / 200 * x)
y = base_y + 10 * np.random.rand(size)
return x, y
def rescale_vec(vector):
vec_as_tensor = torch.tensor(vector, dtype=torch.float32)
max_in_vec = torch.max(vec_as_tensor)
min_in_vec = torch.min(vec_as_tensor)
if max_in_vec - min_in_vec == 0:
return torch.ones(vec_as_tensor.size(), dtype=torch.float32)
else:
return (vec_as_tensor - min_in_vec) / (max_in_vec - min_in_vec)
def rescale(vectors):
if len(vectors.shape) == 1:
return rescale_vec(vectors)
nor_vecs = torch.empty(vectors.shape)
for i in range(vectors.shape[0]):
nor_vecs[i] = rescale_vec(vectors[i])
return nor_vecs
class LinearRegression (torch.nn.Module):
def __init__ (self, power=4):
super().__init__()
self.layer = torch.nn.Linear(power, 1)
def forward(self, x):
return self.layer(x)
def regression(x_, y_, learning_rate):
x = torch.t(torch.tensor(x_, dtype=torch.float32))
y = torch.tensor(y_, dtype=torch.float32)
dim_size = x.size()[1]
print(dim_size, x.size())
model = LinearRegression(dim_size)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_func = torch.nn.MSELoss(reduction='sum')
batch_size = 400
for round in range(50):
sample_indices = torch.randint(0, len(x), (batch_size, ))
x_samples = torch.index_select(x, 0, sample_indices)
y_samples = torch.index_select(y, 0, sample_indices)
optimizer.zero_grad()
y_hat = model(x_samples.view(-1, dim_size))
loss = loss_func(y_hat, y_samples)
print(loss.item())
loss.backward()
optimizer.step()
return model
x_one, y = rand_series(1000)
b = np.ones(len(x_one))
x = np.array([b, x_one, x_one ** 2, x_one ** 3, x_one ** 4, x_one ** 5])
model = regression(rescale(x), torch.tensor(y, dtype=torch.float32), 0.002)
nor_x = rescale(x)
y_hat = model(torch.t(torch.tensor(x, dtype=torch.float32)))
plt.scatter(x_one, y)
plt.scatter(x_one, y_hat.data, c='red')
plt.show()
the loss:
4.7375866968775066e+19
1.6979300048622735e+26
6.0214270068868396e+32
inf
inf
inf
Upvotes: 1
Views: 624
Reputation:
You need to use loss_func = torch.nn.MSELoss(reduction='mean') to solve the NaN problem. A batch of one or two seems to work because the loss was small enough. By adding more epochs, you should see that your loss tend exponentially to infinity.
Upvotes: 1