Reputation: 7393
I am using pytorch(0.4.0) on google-colaboratory ( NVIDIA-SMI 396.44 Driver Version: 396.44)
When running my code outside any function, I am able to send pytorch tensors and model to the GPU :
...
model.cuda()
data_tensor = data_tensor.cuda()
...
And my CNN model is trained successfully with 98% accurancy.
But when I put the same code in a function,
def main(...):
....
model.cuda()
data_tensor= data_tensor.cuda()
...
if __name__ == "__main__":
main('...)
I have the following error:
cuda runtime error (77) : an illegal memory access was encountered at /pytorch/aten/src/THC/generic/THCTensorCopy.c:20
UPDATE(18/11/21):
It turned out that being part or not of a function is irrelevant. Usually, I have first a CUDNN_STATUS_EXECUTION_FAILED error then the second time a cuda runtime error (77) as shown below. But it sometimes works a few times before failing.
CUDNN_STATUS_EXECUTION_FAILED (first try) :
RuntimeError Traceback (most recent call last)
<ipython-input-27-53476e08e017> in <module>()
1 main('mnist', 'to', 'ndd', Xd=16, epo=5, bs=100, tXn=-1, vXn=300,
----> 2 lr=0.05, suf="s1", n_class=10, cuda=True)
<ipython-input-23-918584456207> in main(ds, framework, format, Xd, epo, bs, tXn, vXn, lr, suf, n_class, cuda)
12 opt = torch.optim.SGD(net.parameters(), lr)
13
---> 14 train(net, opt, Xd, epo, bs, cuda, tXn, tX, tT, vX, vT,lr)
15
<ipython-input-26-6b574a9e8af6> in train(model, optimizer, Xd, epo, bs, cuda, Xn, tX, tT, vX, vT, lr)
26 #t = t.cuda()
27 optimizer.zero_grad()
---> 28 z = model(x)
29 bat_loss = criterion(z, t)
30 bat_loss.backward()
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
<ipython-input-22-b4bc2e0b39b8> in forward(self, X)
10 H0 = torch.zeros(self.n_H, X.size(0), self.Wh)
11 C0 = torch.zeros(self.n_H, X.size(0), self.Wh)
---> 12 O, (Hn, Cn), = self.lstm1(X, (H0, C0))
13 O = self.linear1(O[:, -1, :])
14 return O
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
190 flat_weight=flat_weight
191 )
--> 192 output, hidden = func(input, self.all_weights, hx, batch_sizes)
193 if is_packed:
194 output = PackedSequence(output, batch_sizes)
/usr/local/lib/python3.6/dist-packages/torch/nn/_functions/rnn.py in forward(input, *fargs, **fkwargs)
321 func = decorator(func)
322
--> 323 return func(input, *fargs, **fkwargs)
324
325 return forward
/usr/local/lib/python3.6/dist-packages/torch/nn/_functions/rnn.py in forward(input, weight, hx, batch_sizes)
285 batch_first, dropout, train, bool(bidirectional),
286 list(batch_sizes.data) if variable_length else (),
--> 287 dropout_ts)
288
289 if cx is not None:
RuntimeError: CUDNN_STATUS_EXECUTION_FAILED
cuda runtime error (77) (other tries):
RuntimeError Traceback (most recent call last)
<ipython-input-28-53476e08e017> in <module>()
1 main('mnist', 'to', 'ndd', Xd=16, epo=5, bs=100, tXn=-1, vXn=300,
----> 2 lr=0.05, suf="s1", n_class=10, cuda=True)
<ipython-input-23-918584456207> in main(ds, framework, format, Xd, epo, bs, tXn, vXn, lr, suf, n_class, cuda)
12 opt = torch.optim.SGD(net.parameters(), lr)
13
---> 14 train(net, opt, Xd, epo, bs, cuda, tXn, tX, tT, vX, vT,lr)
15
<ipython-input-26-6b574a9e8af6> in train(model, optimizer, Xd, epo, bs, cuda, Xn, tX, tT, vX, vT, lr)
4 if cuda and torch.cuda.is_available():
5 print("tX type (before):", tX.type())
----> 6 model.cuda()
7 tX = tX.cuda()
8 tT = tT.cuda()
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in cuda(self, device)
247 Module: self
248 """
--> 249 return self._apply(lambda t: t.cuda(device))
250
251 def cpu(self):
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
174 def _apply(self, fn):
175 for module in self.children():
--> 176 module._apply(fn)
177
178 for param in self._parameters.values():
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in _apply(self, fn)
109
110 def _apply(self, fn):
--> 111 ret = super(RNNBase, self)._apply(fn)
112 self.flatten_parameters()
113 return ret
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
180 # Tensors stored in modules are graph leaves, and we don't
181 # want to create copy nodes, so we have to unpack the data.
--> 182 param.data = fn(param.data)
183 if param._grad is not None:
184 param._grad.data = fn(param._grad.data)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in <lambda>(t)
247 Module: self
248 """
--> 249 return self._apply(lambda t: t.cuda(device))
250
251 def cpu(self):
RuntimeError: cuda runtime error (77) : an illegal memory access was encountered at /pytorch/aten/src/THC/generic/THCTensorCopy.c:20
Upvotes: 2
Views: 4520
Reputation: 7393
It now works with Pytorch 1.0 using:
!pip3 install https://download.pytorch.org/whl/cu80/torch-1.0.0-cp36-cp36m-linux_x86_64.whl
Upvotes: 2