Reputation: 3301
I have a problem here, so I want to make a layer where the weight value (and the bias) is based on the other frozen weight. So, let’s say I have a frozen weight (FW) as a base value, then my current model layer will have weight W = FW + D, where D is the trainable parameter. Later, when I train the model, I hope the only parameter that gets updated is D.
I made this simple code for illustration:
frozen = nn.Linear(100,10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc = nn.Linear(100,10)
self.dw = nn.Parameter(torch.tensor(1.0, requires_grad=True))
self.db = nn.Parameter(torch.tensor(1.0, requires_grad=True))
def forward(self, x):
# the weight (and the bias) of fc layer is from FW and D
self.fc.weight = nn.Parameter(torch.add(frozen.weight, self.dw))
self.fc.bias = nn.Parameter(torch.add(frozen.bias, self.db))
return torch.sigmoid(self.fc(x))
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100)
y = torch.tensor([0]*9+[1], dtype=torch.float32)
for _ in range(10):
out = model(x)
loss = criterion(out, y)
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
But when I run that code, the model doesn’t train, and the self.dw
and self.db
doesn’t change. I am not sure whether my concept is wrong, so it’s not possible to train D, or I made a mistake in the implementation.
I also tried to implement using nn.utils.parameterize
, but it still doesn’t work (I am new to using this, so I am not sure I implemented it correctly)
frozen = nn.Linear(100,10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False
class Adder(nn.Module):
def __init__(self, delta, frozen):
super().__init__()
self.delta = nn.Parameter(torch.tensor(delta, requires_grad=True))
self.frozen=frozen
def forward(self, x):
return torch.add(self.frozen, self.delta)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc = nn.Linear(100,10)
def forward(self, x):
nn.utils.parametrize.register_parametrization(self.fc, "weight", Adder(1.0, frozen.weight))
nn.utils.parametrize.register_parametrization(self.fc, "bias", Adder(1.0, frozen.bias))
return torch.sigmoid(self.fc(x))
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100)
y = torch.tensor([0]*9+[1], dtype=torch.float32)
for _ in range(10):
out = model(x)
loss = criterion(out, y)
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
Thank you for any responses.
Upvotes: 0
Views: 231
Reputation: 1262
Instead of recreating new weight and bias by
self.fc.weight = nn.Parameter(torch.add(frozen.weight, self.dw))
self.fc.bias = nn.Parameter(torch.add(frozen.bias, self.db))
You can utilize nn.functional.linear
and intermediate variables
weight = self.weight + frozen.weight
bias = self.bias + frozen.bias
F.linear(x, weight, bias)
Complete version:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self, frozen):
super(Net, self).__init__()
self.weight = nn.Parameter(torch.ones(10, 100, dtype=torch.float32))
self.bias = nn.Parameter(torch.zeros(10, dtype=torch.float32))
self.frozen = frozen
@property
def weight_bias(self):
weight = self.weight + self.frozen.weight
bias = self.bias + self.frozen.bias
return weight, bias
def forward(self, x):
# the weight (and the bias) of fc layer is from FW and D
weight, bias = self.weight_bias
return F.linear(x, weight, bias) # this should return raw logits as required by nn.CrossEntropyLoss
frozen = nn.Linear(100, 10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False
model = Net(frozen)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100).unsqueeze(0)
y = torch.tensor([0]*9+[1], dtype=torch.float32).unsqueeze(0)
for _ in range(10):
out = model(x)
loss = criterion(out, y)
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
Upvotes: 1