0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

漢文生成モデルXunzi-Qwen3-8B-baseでのFew-Shot Promptingによる品詞付与

0
Posted at

XunziALLMXunzi-Qwen3-8B-baseが追加されていたので、Few-Shot PromptingによるUPOS (Universal Part-Of-Speech)品詞付与を試してみた。Few-Shot Promptingの例文は、2024年6月21日の記事と同じく、UD_Classical_Chinese-Kyotoの訓練データのうち、『論語』里仁篇の冒頭部分を用いた。

#! /usr/bin/python3
# pip3 install transformers accelerate
import os
url="https://www.modelscope.cn/models/Xunzillm4cc/Xunzi-Qwen3-8B-base"
os.system(f"test -d {os.path.basename(url)} || git clone --depth=1 {url}")
class TextUPOSList(list):
  __str__=lambda self:"\n".join("###text:"+"".join(t for t,u in s)+"\n###UPOS:"+"|".join(t+"_"+u for t,u in s) for s in self)+"\n"
ex=TextUPOSList()
ex.append([("子","NOUN"),("曰","VERB"),("里","VERB"),("仁","NOUN"),("為","AUX"),("美","NOUN"),("擇","VERB"),("不","ADV"),("處","VERB"),("仁","NOUN"),("焉","ADV"),("得","AUX"),("知","VERB")])
ex.append([("子","NOUN"),("曰","VERB"),("不","ADV"),("仁","VERB"),("者","PART"),("不","ADV"),("可","AUX"),("以","VERB"),("久","VERB"),("處","VERB"),("約","NOUN"),("不","ADV"),("可","AUX"),("以","VERB"),("長","VERB"),("處","VERB"),("樂","NOUN"),("仁","VERB"),("者","PART"),("安","VERB"),("仁","VERB"),("知","VERB"),("者","PART"),("利","VERB"),("仁","VERB")])
ex.append([("子","NOUN"),("曰","VERB"),("唯","ADV"),("仁","VERB"),("者","PART"),("能","AUX"),("好","VERB"),("人","NOUN"),("能","AUX"),("惡","VERB"),("人","NOUN")])
ex.append([("子","NOUN"),("曰","VERB"),("苟","ADV"),("志","VERB"),("於","ADP"),("仁","NOUN"),("矣","PART"),("無","ADV"),("惡","VERB"),("也","PART")])
ex.append([("子","NOUN"),("曰","VERB"),("富","VERB"),("與","ADP"),("貴","VERB"),("是","PRON"),("人","NOUN"),("之","SCONJ"),("所","PART"),("欲","VERB"),("也","PART"),("不","ADV"),("以","VERB"),("其","PRON"),("道","NOUN"),("得","VERB"),("之","PRON"),("不","ADV"),("處","VERB"),("也","PART"),("貧","VERB"),("與","ADP"),("賤","VERB"),("是","PRON"),("人","NOUN"),("之","SCONJ"),("所","PART"),("惡","VERB"),("也","PART"),("不","ADV"),("以","VERB"),("其","PRON"),("道","NOUN"),("得","VERB"),("之","PRON"),("不","ADV"),("去","VERB"),("也","PART"),("君子","NOUN"),("去","VERB"),("仁","VERB"),("惡","ADV"),("乎","PART"),("成","VERB"),("名","NOUN"),("君子","NOUN"),("無","VERB"),("終","VERB"),("食","NOUN"),("之","SCONJ"),("間","NOUN"),("違","VERB"),("仁","VERB"),("造","VERB"),("次","NOUN"),("必","ADV"),("於","VERB"),("是","PRON"),("顛","ADV"),("沛","VERB"),("必","ADV"),("於","VERB"),("是","PRON")])
ex.append([("子","NOUN"),("曰","VERB"),("我","PRON"),("未","ADV"),("見","VERB"),("好","VERB"),("仁","VERB"),("者","PART"),("惡","VERB"),("不","ADV"),("仁","VERB"),("者","PART"),("好","VERB"),("仁","VERB"),("者","PART"),("無","ADV"),("以","ADV"),("尚","VERB"),("之","PRON"),("惡","VERB"),("不","ADV"),("仁","VERB"),("者","PART"),("其","PRON"),("為","VERB"),("仁","NOUN"),("矣","PART"),("不","ADV"),("使","VERB"),("不","ADV"),("仁","VERB"),("者","PART"),("加","VERB"),("乎","ADP"),("其","PRON"),("身","NOUN"),("有","VERB"),("能","AUX"),("一","NUM"),("日","NOUN"),("用","VERB"),("其","PRON"),("力","NOUN"),("於","ADP"),("仁","NOUN"),("矣","PART"),("乎","PART"),("我","PRON"),("未","ADV"),("見","VERB"),("力","NOUN"),("不","ADV"),("足","VERB"),("者","PART"),("蓋","PART"),("有","VERB"),("之","PRON"),("矣","PART"),("我","PRON"),("未","ADV"),("之","PRON"),("見","VERB"),("也","PART")])
ex.append([("子","NOUN"),("曰","VERB"),("人","NOUN"),("之","SCONJ"),("過","VERB"),("也","PART"),("各","ADV"),("於","VERB"),("其","PRON"),("黨","NOUN"),("觀","VERB"),("過","NOUN"),("斯","ADV"),("知","VERB"),("人","NOUN"),("矣","PART")])
ex.append([("子","NOUN"),("曰","VERB"),("朝","NOUN"),("聞","VERB"),("道","NOUN"),("夕","NOUN"),("死","VERB"),("可","VERB"),("矣","PART")])
ex.append([("子","NOUN"),("曰","VERB"),("士","NOUN"),("志","VERB"),("於","ADP"),("道","NOUN"),("而","CCONJ"),("恥","VERB"),("惡","NOUN"),("衣","NOUN"),("惡","NOUN"),("食","NOUN"),("者","PART"),("未","ADV"),("足","AUX"),("與","ADV"),("議","VERB"),("也","PART")])
ex.append([("子","NOUN"),("曰","VERB"),("君子","NOUN"),("之","SCONJ"),("於","VERB"),("天","NOUN"),("下","NOUN"),("也","PART"),("無","VERB"),("適","VERB"),("也","PART"),("無","VERB"),("莫","ADV"),("也","PART"),("義","NOUN"),("之","PRON"),("與","ADP"),("比","VERB")])
ex.append([("子","NOUN"),("曰","VERB"),("君子","NOUN"),("懷","VERB"),("德","NOUN"),("小","VERB"),("人","NOUN"),("懷","VERB"),("土","NOUN"),("君子","NOUN"),("懷","VERB"),("刑","NOUN"),("小","VERB"),("人","NOUN"),("懷","VERB"),("惠","NOUN")])
ex.append([("子","NOUN"),("曰","VERB"),("放","VERB"),("於","ADP"),("利","NOUN"),("而","CCONJ"),("行","VERB"),("多","VERB"),("怨","NOUN")])
ex.append([("子","NOUN"),("曰","VERB"),("能","AUX"),("以","VERB"),("禮","VERB"),("讓","VERB"),("為","VERB"),("國","NOUN"),("於","ADP"),("從","VERB"),("政","NOUN"),("乎","PART"),("何","PRON"),("有","VERB"),("不","ADV"),("能","AUX"),("以","VERB"),("禮","VERB"),("讓","VERB"),("為","VERB"),("國","NOUN"),("如","VERB"),("禮","NOUN"),("何","PRON")])
from transformers import pipeline
tgn=pipeline("text-generation","Xunzi-Qwen3-8B-base/Xunzi-Qwen3-8B-base",max_new_tokens=128)
nlp=lambda t:"\n".join(tgn(str(ex)+f"###text:{t}\n###UPOS:")[0]["generated_text"].split("\n")[len(ex)*2:len(ex)*2+2])
print(nlp("子曰不患無位患所以立不患莫己知求為可知也"))

「子曰不患無位患所以立不患莫己知求為可知也」に品詞付与してみたところ、私(安岡孝一)の手元では以下の結果が得られた。

###text:子曰不患無位患所以立不患莫己知求為可知也
###UPOS:子_NOUN|曰_VERB|不_ADV|患_VERB|無_VERB|位_NOUN|患_VERB|所_SCONJ|以_SCONJ|立_VERB|不_ADV|患_VERB|莫_ADV|己_PRON|知_VERB|求_VERB|爲_VERB|可_AUX|知_VERB|也_PART

所_PART|以_ADV」を間違ってるものの、それ以外は読めているようだ。まあ、この「以」の品詞には議論の余地があって、どこまでOKか難しいところだが、それでもSCONJは無理だと思う。古典中国語(漢文)モデルとしては、あと一歩かな。

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?