Force BERT transformer to use CUDA

Question

I want to force the Huggingface transformer (BERT) to make use of CUDA. nvidia-smi showed that all my CPU cores were maxed out during the code execution, but my GPU was at 0% utilization. Unfortunately, I'm new to the Hugginface library as well as PyTorch and don't know where to place the CUDA attributes device = cuda:0 or .to(cuda:0).

The code below is basically a customized part from german sentiment BERT working example

class SentimentModel_t(pt.nn.Module):
      def __init__(self, model_name: str = "oliverguhr/german-sentiment-bert"):
           DEVICE = "cuda:0" if pt.cuda.is_available() else "cpu"
           print(DEVICE)
           super(SentimentModel_t,self).__init__()

           self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(DEVICE)
           self.tokenizer = BertTokenizerFast.from_pretrained(model_name)
    
        def predict_sentiment(self, texts: List[str])-> List[str]:
            texts = [self.clean_text(text) for text in texts]
            # Add special tokens takes care of adding [CLS], [SEP], ... tokens in the right way for each model.
            input_ids = self.tokenizer.batch_encode_plus(texts,padding=True, add_special_tokens=True, truncation=True, max_length=self.tokenizer.max_len_single_sentence)
            input_ids = pt.tensor(input_ids["input_ids"])
    
            with pt.no_grad():
                logits = self.model(input_ids)
    
            label_ids = pt.argmax(logits[0], axis=1)
    
            labels = [self.model.config.id2label[label_id] for label_id in label_ids.tolist()]
            return labels

EDIT: After applying the suggestions of @KonstantinosKokos (see edited code above) I got a

RuntimeError: Input, output and indices must be on the current device

pointing to

with pt.no_grad(): logits = self.model(input_ids)

The full error code can be obtained down below:

in predict_sentiment(self, texts) 23 24 with pt.no_grad(): ---> 25 logits = self.model(input_ids) 26 27 label_ids = pt.argmax(logits[0], axis=1) ~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 725 result = self._slow_forward(*input, **kwargs) 726 else: --> 727 result = self.forward(*input, **kwargs) 728 for hook in itertools.chain( 729 _global_forward_hooks.values(), ~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict) 1364 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 1365 -> 1366 outputs = self.bert( 1367 input_ids, 1368 attention_mask=attention_mask, ~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 725 result = self._slow_forward(*input, **kwargs) 726 else: --> 727 result = self.forward(*input, **kwargs) 728 for hook in itertools.chain( 729 _global_forward_hooks.values(), ~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, output_attentions, output_hidden_states, return_dict) 859 head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers) 860 --> 861 embedding_output = self.embeddings( 862 input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds 863 ) ~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 725 result = self._slow_forward(*input, **kwargs) 726 else: --> 727 result = self.forward(*input, **kwargs) 728 for hook in itertools.chain( 729 _global_forward_hooks.values(), ~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, token_type_ids, position_ids, inputs_embeds) 196 197 if inputs_embeds is None: --> 198 inputs_embeds = self.word_embeddings(input_ids) 199 token_type_embeddings = self.token_type_embeddings(token_type_ids) 200 ~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 725 result = self._slow_forward(*input, **kwargs) 726 else: --> 727 result = self.forward(*input, **kwargs) 728 for hook in itertools.chain( 729 _global_forward_hooks.values(), ~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/modules/sparse.py in forward(self, input) 122 123 def forward(self, input: Tensor) -> Tensor: --> 124 return F.embedding( 125 input, self.weight, self.padding_idx, self.max_norm, 126 self.norm_type, self.scale_grad_by_freq, self.sparse) ~/PycharmProjects/Test_project/venv/lib/python3.8/site-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse) 1850 # remove once script supports set_grad_enabled 1851 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type) -> 1852 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) 1853 1854

KonstantinosKokos · Accepted Answer

You can make the entire class inherit torch.nn.Module like so:

class SentimentModel_t(torch.nn.Module):
    def __init___(...)
    super(SentimentModel_t, self).__init__()
    ...

Upon initializing your model you can then call .to(device) to cast it to the device of your choice, like so:

sentiment_model = SentimentModel_t(...)
sentiment_model.to('cuda')

The .to() recursively applies to all submodules of the class, model being one of them (hugging face model inherit torch.nn.Module, thus providing an implementation for to()). Note that this makes choosing device in the __init__() redundant: its now an external context that you can switch to/from easily.

Alternatively, you can hardcode the device by casting the contained BERT model directly into cuda (less elegant):

class SentimentModel_t():
        def __init__(self, ...):
            DEVICE = "cuda:0" if pt.cuda.is_available() else "cpu"
            print(DEVICE)

            self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(DEVICE)

Force BERT transformer to use CUDA

Answers (2)

Related Questions