malioboro
malioboro

Reputation: 3301

Pytorch Parameterized Layer not Updated

I have a problem here, so I want to make a layer where the weight value (and the bias) is based on the other frozen weight. So, let’s say I have a frozen weight (FW) as a base value, then my current model layer will have weight W = FW + D, where D is the trainable parameter. Later, when I train the model, I hope the only parameter that gets updated is D.

I made this simple code for illustration:

frozen = nn.Linear(100,10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc = nn.Linear(100,10)
        self.dw = nn.Parameter(torch.tensor(1.0, requires_grad=True))
        self.db = nn.Parameter(torch.tensor(1.0, requires_grad=True))

    def forward(self, x):
        # the weight (and the bias) of fc layer is from FW and D
        self.fc.weight = nn.Parameter(torch.add(frozen.weight, self.dw)) 
        self.fc.bias = nn.Parameter(torch.add(frozen.bias, self.db))
        return torch.sigmoid(self.fc(x))
    
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100)
y = torch.tensor([0]*9+[1], dtype=torch.float32)

for _ in range(10):
    out = model(x)
    loss = criterion(out, y)
    print(loss)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

But when I run that code, the model doesn’t train, and the self.dw and self.db doesn’t change. I am not sure whether my concept is wrong, so it’s not possible to train D, or I made a mistake in the implementation.

I also tried to implement using nn.utils.parameterize, but it still doesn’t work (I am new to using this, so I am not sure I implemented it correctly)

frozen = nn.Linear(100,10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False

class Adder(nn.Module):
    def __init__(self, delta, frozen):
        super().__init__()
        self.delta = nn.Parameter(torch.tensor(delta, requires_grad=True))
        self.frozen=frozen
    def forward(self, x):
        return torch.add(self.frozen, self.delta)    

class Net(nn.Module):
    def __init__(self):    
        super(Net, self).__init__()
        self.fc = nn.Linear(100,10)

    def forward(self, x):
        nn.utils.parametrize.register_parametrization(self.fc, "weight",  Adder(1.0, frozen.weight))
        nn.utils.parametrize.register_parametrization(self.fc, "bias", Adder(1.0, frozen.bias))

        return torch.sigmoid(self.fc(x))
    
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100)
y = torch.tensor([0]*9+[1], dtype=torch.float32)

for _ in range(10):
    out = model(x)
    loss = criterion(out, y)
    print(loss)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Thank you for any responses.

Upvotes: 0

Views: 231

Answers (1)

TQCH
TQCH

Reputation: 1262

Instead of recreating new weight and bias by

self.fc.weight = nn.Parameter(torch.add(frozen.weight, self.dw)) 
self.fc.bias = nn.Parameter(torch.add(frozen.bias, self.db))

You can utilize nn.functional.linear and intermediate variables

weight = self.weight + frozen.weight
bias = self.bias + frozen.bias
F.linear(x, weight, bias)

Complete version:

import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    
    def __init__(self, frozen):
        super(Net, self).__init__()
        self.weight = nn.Parameter(torch.ones(10, 100, dtype=torch.float32))
        self.bias = nn.Parameter(torch.zeros(10, dtype=torch.float32))
        self.frozen = frozen

    @property
    def weight_bias(self):
        weight = self.weight + self.frozen.weight
        bias = self.bias + self.frozen.bias
        return weight, bias
        
    def forward(self, x):
        # the weight (and the bias) of fc layer is from FW and D
        weight, bias = self.weight_bias
        return F.linear(x, weight, bias) # this should return raw logits as required by nn.CrossEntropyLoss
    
frozen = nn.Linear(100, 10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False

model = Net(frozen)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100).unsqueeze(0)
y = torch.tensor([0]*9+[1], dtype=torch.float32).unsqueeze(0)

for _ in range(10):
    out = model(x)
    loss = criterion(out, y)
    print(loss)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Upvotes: 1

Related Questions