Xiaofan Lu
Xiaofan Lu

Reputation: 1

transfer learning with pytorch, RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

import torchvision.models as models
model_ft = models.resnet18(weights='IMAGENET1K_V1')
for param in model_ft.parameters():
    param.requires_grad = False

for param in model_ft.fc.parameters():
    param.requires_grad = True

num_ftrs = model_ft.fc.in_features
fc_layer = nn.Linear(in_features=num_ftrs, out_features=1)
model_ft.fc = fc_layer
    


model_ft = model_ft.to(device)
criterion = nn.MSELoss()
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = simpleTrainingLoop(model_ft,
                       train_data_loader,
                       criterion,
                       optimizer_ft,
                       exp_lr_scheduler)

def simpleTrainingLoop(model,
                dataloader_train,
                criterion, 
                optimizer, 
                scheduler, 
                num_epoch = 1):
        since = time.time()
        for e in range(num_epoch):
                running_loss = 0
                for inputs, labels in dataloader_train:
                        inputs = inputs.to(device)
                        labels = labels.to(device)
                        
                        optimizer.zero_grad()
                        outputs = model(inputs)
                        \_,preds = torch.max(outputs,1)
                        loss = criterion(preds,labels)
                        # if we can know which layer has the problem will be better 
                        loss.backward()
                        optimizer.step()
                        running_loss += loss.item() \* inputs.size(0)
                scheduler.step()
                train_loss = running_loss/train_size
                print(f' training loss: {train_loss}')
        time_elapsed = time.time() - since
        print(f'training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')

I got runtime error: element 0 of tensors does not require grad and does not have a grad_fn it happend while loss.backward(). I try to do transfer learning with resnet 18 and make all the parameters in resent 18 require grad to false and enable the fc layer grad. but this let me got runtime error:

is this problem related to gpu? becase I make input and model to device, but my cuda cannot access to my gpu.

c:\Users\LangZheZR\Desktop\projects\eye_on_ground_challenge\test_CNN_image_tabular_concat.ipynb Cell 11 line 2
     21 optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
     22 exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
---> 23 model_ft = simpleTrainingLoop(model_ft,
     24                        train_data_loader,
     25                        criterion,
     26                        optimizer_ft,
     27                        exp_lr_scheduler)

c:\Users\LangZheZR\Desktop\projects\eye_on_ground_challenge\test_CNN_image_tabular_concat.ipynb Cell 11 line 2
     18 loss = criterion(preds,labels)
     19 # if we can know which layer has the problem will be better 
---> 20 loss.backward()
     21 optimizer.step()
     22 running_loss += loss.item() * inputs.size(0)

File c:\Python311\Lib\site-packages\torch\_tensor.py:492, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs)
    482 if has_torch_function_unary(self):
    483     return handle_torch_function(
    484         Tensor.backward,
    485         (self,),
   (...)
    490         inputs=inputs,
...
    257     allow_unreachable=True,
    258     accumulate_grad=True,
    259 )

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

Upvotes: 0

Views: 72

Answers (1)

Dick Larsson
Dick Larsson

Reputation: 785

Can you try changing loss = criterion(preds, labels) to loss = criterion(outputs, labels) ? Perhaps it works.

Upvotes: 0

Related Questions