Function AddBackward0 returned an invalid gradient at index 1 - expected type torch.FloatTensor but got torch.cuda.FloatTensor

Question

My Model:

class myNet(nn.Module):
    def __init__(self):
        super(myNet,self).__init__()
        self.act1=Dynamic_relu_b(64)
        self.conv1=nn.Conv2d(3,64,3)
        self.pool=nn.AdaptiveAvgPool2d(1)
        self.fc=nn.Linear(128,20)
    def forward(self,x):
        x=self.conv1(x)
        x=self.act1(x)
        x=self.pool(x)
        x=x.view(x.shape[0],-1)
        x=self.fc(x)


        return x

A code that replicates the experiment is provided：

def one_hot_smooth_label(x,num_class,smooth=0.1):
    num=x.shape[0]
    labels=torch.zeros((num,20))
    for i in range(num):
        labels[i][x[i]]=1
    labels=(1-(num_class-1)/num_class*smooth)*labels+smooth/num_class
    return labels
images=torch.rand((4,3,300,300))
images=images.cuda()
labels=torch.from_numpy(np.array([1,0,0,1]))
model=myNet()
model=model.cuda()

output=model(images)
labels=one_hot_smooth_label(labels,20)
labels = labels.cuda()
criterion=nn.BCEWithLogitsLoss()

loss=criterion(output,labels)
loss.backward()

The error:

    RuntimeError                              Traceback (most recent call last)
 in ()
     21 
     22 loss=criterion(output,labels)
---> 23 loss.backward()

1 frames
/usr/local/lib/python3.6/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
     98     Variable._execution_engine.run_backward(
     99         tensors, grad_tensors, retain_graph, create_graph,
--> 100         allow_unreachable=True)  # allow_unreachable flag
    101 
    102 

RuntimeError: Function AddBackward0 returned an invalid gradient at index 1 - expected type TensorOptions(dtype=float, device=cpu, layout=Strided, requires_grad=false) but got TensorOptions(dtype=float, device=cuda:0, layout=Strided, requires_grad=false) (validate_outputs at /pytorch/torch/csrc/autograd/engine.cpp:484)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x46 (0x7fcf7711b536 in /usr/local/lib/python3.6/dist-packages/torch/lib/libc10.so)
frame #1:  + 0x2d84224 (0x7fcfb1bad224 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #2: torch::autograd::Engine::evaluate_function(std::shared_ptr&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 0x548 (0x7fcfb1baed58 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #3: torch::autograd::Engine::thread_main(std::shared_ptr const&, bool) + 0x3d2 (0x7fcfb1bb0ce2 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #4: torch::autograd::Engine::thread_init(int) + 0x39 (0x7fcfb1ba9359 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #5: torch::autograd::python::PythonEngine::thread_init(int) + 0x38 (0x7fcfbe2e8378 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_python.so)
frame #6:  + 0xbd6df (0x7fcfe23416df in /usr/lib/x86_64-linux-gnu/libstdc++.so.6)
frame #7:  + 0x76db (0x7fcfe34236db in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #8: clone + 0x3f (0x7fcfe375c88f in /lib/x86_64-linux-gnu/libc.so.6)

After many experiments, I found that act1 in the model was the problem. If you delete act1, the error will not appear！

But I don't know why act1 has this problem.

What seems to be the wrong part of the error is requiers_grad=False, and I don't know which part set this.

This is the code about act1(Dynamic_relu_b):

class Residual(nn.Module):
    def __init__(self, in_channel, R=8, k=2):
        super(Residual, self).__init__()
        self.avg = nn.AdaptiveAvgPool2d((1, 1))
        self.relu = nn.ReLU(inplace=True)
        self.R = R
        self.k = k
        out_channel = int(in_channel / R)
        self.fc1 = nn.Linear(in_channel, out_channel)
        fc_list = []
        for i in range(k):
            fc_list.append(nn.Linear(out_channel, 2 * in_channel))
        self.fc2 = nn.ModuleList(fc_list)

    def forward(self, x):
        x = self.avg(x)
        x = torch.squeeze(x)
        x = self.fc1(x)
        x = self.relu(x)
        result_list = []
        for i in range(self.k):
            result = self.fc2[i](x)
            result = 2 * torch.sigmoid(result) - 1
            result_list.append(result)
        return result_list


class Dynamic_relu_b(nn.Module):
    def __init__(self, inchannel, R=8, k=2):
        super(Dynamic_relu_b, self).__init__()
        self.lambda_alpha = 1
        self.lambda_beta = 0.5
        self.R = R
        self.k = k
        self.init_alpha = torch.zeros(self.k)
        self.init_beta = torch.zeros(self.k)
        self.init_alpha[0] = 1
        self.init_beta[0] = 1
        for i in range(1, k):
            self.init_alpha[i] = 0
            self.init_beta[i] = 0

        self.residual = Residual(inchannel)

    def forward(self, input):
        delta = self.residual(input)
        in_channel = input.shape[1]
        bs = input.shape[0]
        alpha = torch.zeros((self.k, bs, in_channel))
        beta = torch.zeros((self.k, bs, in_channel))
        for i in range(self.k):
            for j, c in enumerate(range(0, in_channel * 2, 2)):
                alpha[i, :, j] = delta[i][:, c]
                beta[i, :, j] = delta[i][:, c + 1]
        alpha1 = alpha[0]
        beta1 = beta[0]
        max_result = self.dynamic_function(alpha1, beta1, input, 0)
        for i in range(1, self.k):
            alphai = alpha[i]
            betai = beta[i]
            result = self.dynamic_function(alphai, betai, input, i)
            max_result = torch.max(max_result, result)
        return max_result

    def dynamic_function(self, alpha, beta, x, k):
        init_alpha = self.init_alpha[k]
        init_beta = self.init_beta[k]
        alpha = init_alpha + self.lambda_alpha * alpha
        beta = init_beta + self.lambda_beta * beta
        bs = x.shape[0]
        channel = x.shape[1]
        results = torch.zeros_like(x)
        for i in range(bs):
            for c in range(channel):
                results[i, c, :, :] = x[i, c] * alpha[i, c] + beta[i, c]
        return results

How should I solve this problem?

Function AddBackward0 returned an invalid gradient at index 1 - expected type torch.FloatTensor but got torch.cuda.FloatTensor

Answers (1)

Related Questions