Reputation: 1779
I am wondering what I am doing wrong when looking to see how the weights changed during training.
My loss goes down considerably but it appears that the initialized weights are the same as trained weights. Am I looking in the wrong location? I would appreciate any insight that you might have!
import torch
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
# setup GPU/CPU processing
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# initialize model
class mlp1(torch.nn.Module):
def __init__(self, num_features, num_hidden, num_classes):
super(mlp1, self).__init__()
self.num_classes = num_classes
self.input_layer = torch.nn.Linear(num_features, num_hidden)
self.out_layer = torch.nn.Linear(num_hidden, num_classes)
def forward(self, x):
x = self.input_layer(x)
x = torch.sigmoid(x)
logits = self.out_layer(x)
probas = torch.softmax(logits, dim=1)
return logits, probas
# instantiate model
model = mlp1(num_features=28*28, num_hidden=100, num_classes=10).to(device)
# check initial weights
weight_check_pre = model.state_dict()['input_layer.weight'][0][0:25]
# optim
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
# download data
train_dataset = datasets.MNIST(root='data',
train=True,
transform=transforms.ToTensor(),
download=True)
# data loader
train_dataloader = DataLoader(dataset=train_dataset,
batch_size=100,
shuffle=True)
# train
NUM_EPOCHS = 1
for epoch in range(NUM_EPOCHS):
model.train()
for batch_idx, (features, targets) in enumerate(train_dataloader):
# send data to device
features = features.view(-1, 28*28).to(device)
targets = targets.to(device)
# forward
logits, probas = model(features)
# loss
loss = F.cross_entropy(logits, targets)
optimizer.zero_grad()
loss.backward()
# now update weights
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %03d/%03d | Loss: %.4f'
%(epoch+1, NUM_EPOCHS, batch_idx,
len(train_dataloader), loss))
# check post training
weight_check_post = model.state_dict()['input_layer.weight'][0][0:25]
# compare
weight_check_pre == weight_check_post # all equal
Upvotes: 0
Views: 412
Reputation: 1410
That is because both variables are referencing the same object (dictionary) in memory and so will always equal to each other.
You can do this to get actual copies of the state_dict
.
import copy
# check initial weights
weight_check_pre = copy.deepcopy(model.state_dict()['input_layer.weight'][0][0:25])
...
# check post training
weight_check_post = copy.deepcopy(model.state_dict()['input_layer.weight'][0][0:25])
Upvotes: 1