More than 1 year has passed since last update.

huggingface/transformersのALBERTで回帰のfine-tuning

Last updated at 2023-11-27Posted at 2023-11-14

背景

前回: huggingface/transformersのALBERTで分類タスクのfine-tuningをするでALBERTを使った分類のfine-tuningを行いました
今回はALBERTで回帰をやっていきます
AlbertForSequenceClassification の nums_labe=1 でやってもいいですが、この場合モデルは分類用に設計されているため、出力層にアクティベーション関数が含まれる可能性があり、回帰問題には不適切な場合があるとのこと
独自の出力層を追加することで、回帰問題に適したネットワークを構築していくことにします

実装

from datasets import load_dataset
from transformers import TrainingArguments, Trainer, AlbertModel, AlbertTokenizer
import numpy as np
import evaluate
import torch
import torch.nn as nn

class AlbertForRegression(nn.Module):
    def __init__(self, model_name):
        super(AlbertForRegression, self).__init__()
        self.albert = AlbertModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.1)
        self.regressor = nn.Linear(self.albert.config.hidden_size, 1)
        self.loss_fn = nn.MSELoss()

    def forward(self, input_ids, attention_mask, labels):
        outputs = self.albert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)
        regression_output = self.regressor(pooled_output).squeeze(-1)
        labels = labels.float()
        
        loss = None
        if labels is not None:
            # 訓練時には損失を計算
            labels = labels.float()
            loss = self.loss_fn(regression_output, labels)
        return loss, regression_output

dataset = load_dataset("yelp_review_full")
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

def label_change(examples):
  labels = []
  for label in examples['label']:
      new_label = 1 if label == 4 else 0
      labels.append(new_label)
  examples['label'] = labels
  return examples

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.map(label_change, batched=True)
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(100))
model = AlbertForRegression("albert-base-v1")
metric = evaluate.load("accuracy")

training_args = TrainingArguments(
    output_dir="test_trainer",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2.0e-05,
    weight_decay=0.01,
    num_train_epochs=10
  )

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=metrics
)

trainer.train()

検証

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

PATH = "/content/test_trainer/checkpoint-375/pytorch_model.bin"
model.load_state_dict(torch.load(PATH))
model.to(device)

# 絶対評価1だろっていう文章と評価5だろっていう文章を用意
tokenized_list = tokenizer(["I never watch this again. it's just waste of time.", "I love this moview. this is the best one ever."], padding='max_length', truncation=True)

# 予測
with torch.no_grad():
      outputs = model(
          input_ids=torch.tensor(tokenized_list['input_ids'][0])[None].to(device),
          attention_mask=torch.tensor(tokenized_list['attention_mask'][0])[None].to(device)
      )
      print("1つ目: ", outputs)

      
      outputs = model(
          input_ids=torch.tensor(tokenized_list['input_ids'][1])[None].to(device),
          attention_mask=torch.tensor(tokenized_list['attention_mask'][1])[None].to(device)
      )
      print("2つ目: ", outputs)

結果

(None, tensor([0.0663], device='cuda:0'))
(None, tensor([0.8153], device='cuda:0'))

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up