背景
- 前回: huggingface/transformersのALBERTで分類タスクのfine-tuningをする でALBERTを使った分類のfine-tuningを行いました
- 今回はALBERTで回帰をやっていきます
- AlbertForSequenceClassification の nums_labe=1 でやってもいいですが、この場合モデルは分類用に設計されているため、出力層にアクティベーション関数が含まれる可能性があり、回帰問題には不適切な場合があるとのこと
- 独自の出力層を追加することで、回帰問題に適したネットワークを構築していくことにします
実装
from datasets import load_dataset
from transformers import TrainingArguments, Trainer, AlbertModel, AlbertTokenizer
import numpy as np
import evaluate
import torch
import torch.nn as nn
class AlbertForRegression(nn.Module):
def __init__(self, model_name):
super(AlbertForRegression, self).__init__()
self.albert = AlbertModel.from_pretrained(model_name)
self.dropout = nn.Dropout(0.1)
self.regressor = nn.Linear(self.albert.config.hidden_size, 1)
self.loss_fn = nn.MSELoss()
def forward(self, input_ids, attention_mask, labels):
outputs = self.albert(input_ids=input_ids, attention_mask=attention_mask)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output)
regression_output = self.regressor(pooled_output).squeeze(-1)
labels = labels.float()
loss = None
if labels is not None:
# 訓練時には損失を計算
labels = labels.float()
loss = self.loss_fn(regression_output, labels)
return loss, regression_output
dataset = load_dataset("yelp_review_full")
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v1")
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
def label_change(examples):
labels = []
for label in examples['label']:
new_label = 1 if label == 4 else 0
labels.append(new_label)
examples['label'] = labels
return examples
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.map(label_change, batched=True)
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(100))
model = AlbertForRegression("albert-base-v1")
metric = evaluate.load("accuracy")
training_args = TrainingArguments(
output_dir="test_trainer",
evaluation_strategy="epoch",
save_strategy="epoch",
learning_rate=2.0e-05,
weight_decay=0.01,
num_train_epochs=10
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=small_train_dataset,
eval_dataset=small_eval_dataset,
compute_metrics=metrics
)
trainer.train()
検証
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
PATH = "/content/test_trainer/checkpoint-375/pytorch_model.bin"
model.load_state_dict(torch.load(PATH))
model.to(device)
# 絶対評価1だろっていう文章と評価5だろっていう文章を用意
tokenized_list = tokenizer(["I never watch this again. it's just waste of time.", "I love this moview. this is the best one ever."], padding='max_length', truncation=True)
# 予測
with torch.no_grad():
outputs = model(
input_ids=torch.tensor(tokenized_list['input_ids'][0])[None].to(device),
attention_mask=torch.tensor(tokenized_list['attention_mask'][0])[None].to(device)
)
print("1つ目: ", outputs)
outputs = model(
input_ids=torch.tensor(tokenized_list['input_ids'][1])[None].to(device),
attention_mask=torch.tensor(tokenized_list['attention_mask'][1])[None].to(device)
)
print("2つ目: ", outputs)
結果
(None, tensor([0.0663], device='cuda:0'))
(None, tensor([0.8153], device='cuda:0'))