AzureStrannik
AzureStrannik

Reputation: 1

Error: torch.dtype' object has no attribute 'base_dtype

I am trying to train the BERT model but I haven't figured out the structure of TensorFlow yet. In the line for x = self.bert_module(book) an error occurs.

Exception encountered when calling layer 'Embedding-Token' (type TokenEmbedding).
'torch.dtype' object has no attribute 'base_dtype'
Call arguments received by layer 'Embedding-Token' (type TokenEmbedding):
  • inputs=tensor([  101, 10531, 18301, 10124, 10127, 38036, 10251,   102,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0]) 

Actually, the dictionary is what I came to. It was originally x = self.bert_module(ids, mask, token_type_ids). But the dictionary seems to me to be more correct.

Next, I attach the code.

import sys
sys.path.append(r'C:\Users\Demiurg\AppData\Local\Programs\Python\Python38\Lib\site-packages')
import pandas as pd
import time
import torch.nn as nn
import torch
import logging
import numpy as np
import argparse
from keras_bert import load_trained_model_from_checkpoint

from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel

logging.basicConfig(format='%(asctime)s [%(levelname)s]: %(message)s', level=logging.DEBUG)
logging.getLogger().setLevel(logging.INFO)

# --- КОНСТАНТЫ ---
BERT_MODEL_NAME = 'small_bert/bert_en_uncased_L-2_H-128_A-2'

if torch.cuda.is_available():
    logging.info(f"GPU: {torch.cuda.get_device_name(0)} is available.")
    DEVICE = torch.device('cuda')
else:
    logging.info("No GPU available. Training will run on CPU.")
    DEVICE = torch.device('cpu')

def my_collate_fn(data):
    zipped = zip(data)
    print("!!!")
    print(data)
    print("!!!")
    return list(zipped)

# --- Подготовка и токенизация данных ---
class BertDataset(Dataset):
    def __init__(self, df, tokenizer, max_length=100):
        super(BertDataset, self).__init__()
        self.batch_size = 1
        df.columns = ['bodyText', 'Target'] # присвоение имен столбцам
        self.df=df
        self.tokenizer=tokenizer
        self.target=self.df['Target']
        self.max_length=max_length
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        
        X = self.df['bodyText'].values[idx]
        y = self.target.values[idx]
        
        inputs = self.tokenizer.encode_plus(
            X,
            pad_to_max_length=True,
            add_special_tokens=True,
            return_attention_mask=True,
            max_length=self.max_length,
        )
        ids = inputs["input_ids"]
        token_type_ids = inputs["token_type_ids"]
        mask = inputs["attention_mask"]

        x = {
            'ids': torch.tensor(ids, dtype=torch.long).to(DEVICE),
            'mask': torch.tensor(mask, dtype=torch.long).to(DEVICE),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long).to(DEVICE)
            }

        y = self.tokenizer(y)["input_ids"]
        y = torch.tensor(y, dtype=torch.long).to(DEVICE)
        
        return x, y

# --- Определение модели ---
class SentimentBERT(nn.Module):
    def __init__(self, bert_model):
        super().__init__()
        self.bert_module = bert_model
        self.dropout = nn.Dropout(0.1)
        self.final = nn.Linear(in_features=128, out_features=3, bias=True) 
        
    def forward(self, inputs):
        ids, mask, token_type_ids = inputs['ids'], inputs['mask'], inputs['token_type_ids']
        book = {"Input-Token" : ids, "Input-Masked" : mask, "Input-Segment" : token_type_ids}
        x = self.bert_module(book)
        x = self.dropout(x['pooler_output'])
        out = self.final(x)
        return out

# --- Цикл обучения ---
def train(epoch, model, dataloader, loss_fn, optimizer, max_steps=None):
    model.train()
    total_acc, total_count = 0, 0
    log_interval = 50
    start_time = time.time()

    for batch_idx, sample in enumerate(dataloader):
        inputs = sample[batch_idx][0][0]
        label = sample[batch_idx][0][1]
        print(label)
        print(type(model))
        optimizer.zero_grad()
        predicted_label = model(inputs)
        
        loss = loss_fn(predicted_label, label)
        loss.backward()
        optimizer.step()
        
        total_acc += (predicted_label.argmax(1) == label).sum().item()
        total_count += label.size(0)
        
        if idx % log_interval == 0:
            elapsed = time.time() - start_time
            print(
                "Epoch {:3d} | {:5d}/{:5d} batches "
                "| accuracy {:8.3f} | loss {:8.3f} ({:.3f}s)".format(
                    epoch, idx, len(dataloader), total_acc / total_count, loss.item(), elapsed
                )
            )
            total_acc, total_count = 0, 0
            start_time = time.time()

        if max_steps is not None:
            if idx == max_steps:
                return {'loss': loss.item(), 'acc': total_acc / total_count}
    
    return {'loss': loss.item(), 'acc': total_acc / total_count}

# --- Цикл оценки качества модели ---
def evaluate(model, dataloader, loss_fn):
    model.eval()
    total_acc, total_count = 0, 0

    with torch.no_grad():
        for idx, (inputs, label) in enumerate(dataloader):
            predicted_label = model(inputs)
            loss = loss_fn(predicted_label, label)
            total_acc += (predicted_label.argmax(1) == label).sum().item()
            total_count += label.size(0)

    return {'loss': loss.item(), 'acc': total_acc / total_count}

# --- Главная функция ---
def train_and_evaluate(**params):

    logging.info("running with the following params :")
    logging.info(params)

    # Загрузка предварительно обученного токенизатора и модели BERT
    # поменяйте пути на те, которые используете
    folder = 'multi_cased_L-12_H-768_A-12'

    config_path = folder+'/bert_config.json'
    checkpoint_path = folder+'/bert_model.ckpt'
    vocab_path = folder+'/vocab.txt'

    model = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True)
    tokenizer = BertTokenizer.from_pretrained('multi_cased_L-12_H-768_A-12/vocab.txt')
    
    # Параметры обучения
    epochs = int(params.get('epochs'))
    batch_size = int(params.get('batch_size'))
    learning_rate = float(params.get('learning_rate'))
    
    #  Загрузка данных
    df_train = pd.read_csv(params.get('training_file'))
    df_eval = pd.read_csv(params.get('validation_file'))
    df_test = pd.read_csv(params.get('testing_file'))

    # Создание загрузчиков данных
    train_ds = BertDataset(df_train, tokenizer, max_length=100)
    train_loader = DataLoader(dataset=train_ds,batch_size=batch_size, shuffle=True, collate_fn=my_collate_fn)
    eval_ds = BertDataset(df_eval, tokenizer, max_length=100)
    eval_loader = DataLoader(dataset=eval_ds,batch_size=batch_size, collate_fn=my_collate_fn)
    test_ds = BertDataset(df_test, tokenizer, max_length=100)
    test_loader = DataLoader(dataset=test_ds,batch_size=batch_size, collate_fn=my_collate_fn)
    
    # Создание модели
    classifier = SentimentBERT(bert_model=model).to(DEVICE)
    total_parameters = sum([np.prod(p.size()) for p in classifier.parameters()])
    model_parameters = filter(lambda p: p.requires_grad, classifier.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    logging.info(f"Total params : {total_parameters} - Trainable : {params} ({params/total_parameters*100}% of total)")
    
    # Оптимизатор и функция потерь
    optimizer = torch.optim.Adam([p for p in classifier.parameters() if p.requires_grad], learning_rate)
    loss_fn = nn.CrossEntropyLoss()

    # При пробном запуске выполнить лишь следующее
    logging.info(f'Training model with {BERT_MODEL_NAME}')
    if args.dry_run:
        logging.info("Dry run mode")
        epochs = 1
        steps_per_epoch = 1
    else:
        steps_per_epoch = None
        
    # Вперёд!
    for epoch in range(1, epochs + 1):
        epoch_start_time = time.time()
        train_metrics = train(epoch, classifier, train_loader, loss_fn=loss_fn, optimizer=optimizer, max_steps=steps_per_epoch)
        eval_metrics = evaluate(classifier, eval_loader, loss_fn=loss_fn)
        
        print("-" * 59)
        print(
            "End of epoch {:3d} - time: {:5.2f}s - loss: {:.4f} - accuracy: {:.4f} - valid_loss: {:.4f} - valid accuracy {:.4f} ".format(
                epoch, time.time() - epoch_start_time, train_metrics['loss'], train_metrics['acc'], eval_metrics['loss'], eval_metrics['acc']
            )
        )
        print("-" * 59)
    
    if args.dry_run:
        # При пробном запуске не выполнять оценку качества модели
        return None
    
    test_metrics = evaluate(classifier, test_loader, loss_fn=loss_fn)
    
    metrics = {
        'train': train_metrics,
        'val': eval_metrics,
        'test': test_metrics,
    }
    logging.info(metrics)
    
    # Сохранение модели и архитектуры в одном файле
    if params.get('job_dir') is None:
        logging.warning("No job dir provided, model will not be saved")
    else:
        logging.info("Saving model to {} ".format(params.get('job_dir')))
        torch.save(classifier.state_dict(), params.get('job_dir'))
    logging.info("Bye bye")
    

if __name__ == '__main__':
    # Создание аргументов
    parser = argparse.ArgumentParser()
    parser.add_argument('--training-file', type=str, default='Source/LTMV3/training-file.csv')
    parser.add_argument('--validation-file', type=str, default='Source/LTMV2/validation-file.csv')
    parser.add_argument('--testing-file', type=str, default='Source/LTMV2/testing-file.csv')
    parser.add_argument('--job-dir', type=str, default='LTMV2/')
    parser.add_argument('--epochs', type=float, default=2)
    parser.add_argument('--batch-size', type=float, default=1024)
    parser.add_argument('--learning-rate', type=float, default=0.01)
    parser.add_argument('--dry-run', action="store_true", default=False)

    # Парсинг аргументов
    args, _ = parser.parse_known_args()

    # Запуск обучения
    train_and_evaluate(**vars(args))

Upvotes: 0

Views: 46

Answers (0)

Related Questions