XunziALLMにXunzi-Qwen3-8B-baseが追加されていたので、Few-Shot PromptingによるUPOS (Universal Part-Of-Speech)品詞付与を試してみた。Few-Shot Promptingの例文は、2024年6月21日の記事と同じく、UD_Classical_Chinese-Kyotoの訓練データのうち、『論語』里仁篇の冒頭部分を用いた。
#! /usr/bin/python3
# pip3 install transformers accelerate
import os
url="https://www.modelscope.cn/models/Xunzillm4cc/Xunzi-Qwen3-8B-base"
os.system(f"test -d {os.path.basename(url)} || git clone --depth=1 {url}")
class TextUPOSList(list):
__str__=lambda self:"\n".join("###text:"+"".join(t for t,u in s)+"\n###UPOS:"+"|".join(t+"_"+u for t,u in s) for s in self)+"\n"
ex=TextUPOSList()
ex.append([("子","NOUN"),("曰","VERB"),("里","VERB"),("仁","NOUN"),("為","AUX"),("美","NOUN"),("擇","VERB"),("不","ADV"),("處","VERB"),("仁","NOUN"),("焉","ADV"),("得","AUX"),("知","VERB")])
ex.append([("子","NOUN"),("曰","VERB"),("不","ADV"),("仁","VERB"),("者","PART"),("不","ADV"),("可","AUX"),("以","VERB"),("久","VERB"),("處","VERB"),("約","NOUN"),("不","ADV"),("可","AUX"),("以","VERB"),("長","VERB"),("處","VERB"),("樂","NOUN"),("仁","VERB"),("者","PART"),("安","VERB"),("仁","VERB"),("知","VERB"),("者","PART"),("利","VERB"),("仁","VERB")])
ex.append([("子","NOUN"),("曰","VERB"),("唯","ADV"),("仁","VERB"),("者","PART"),("能","AUX"),("好","VERB"),("人","NOUN"),("能","AUX"),("惡","VERB"),("人","NOUN")])
ex.append([("子","NOUN"),("曰","VERB"),("苟","ADV"),("志","VERB"),("於","ADP"),("仁","NOUN"),("矣","PART"),("無","ADV"),("惡","VERB"),("也","PART")])
ex.append([("子","NOUN"),("曰","VERB"),("富","VERB"),("與","ADP"),("貴","VERB"),("是","PRON"),("人","NOUN"),("之","SCONJ"),("所","PART"),("欲","VERB"),("也","PART"),("不","ADV"),("以","VERB"),("其","PRON"),("道","NOUN"),("得","VERB"),("之","PRON"),("不","ADV"),("處","VERB"),("也","PART"),("貧","VERB"),("與","ADP"),("賤","VERB"),("是","PRON"),("人","NOUN"),("之","SCONJ"),("所","PART"),("惡","VERB"),("也","PART"),("不","ADV"),("以","VERB"),("其","PRON"),("道","NOUN"),("得","VERB"),("之","PRON"),("不","ADV"),("去","VERB"),("也","PART"),("君子","NOUN"),("去","VERB"),("仁","VERB"),("惡","ADV"),("乎","PART"),("成","VERB"),("名","NOUN"),("君子","NOUN"),("無","VERB"),("終","VERB"),("食","NOUN"),("之","SCONJ"),("間","NOUN"),("違","VERB"),("仁","VERB"),("造","VERB"),("次","NOUN"),("必","ADV"),("於","VERB"),("是","PRON"),("顛","ADV"),("沛","VERB"),("必","ADV"),("於","VERB"),("是","PRON")])
ex.append([("子","NOUN"),("曰","VERB"),("我","PRON"),("未","ADV"),("見","VERB"),("好","VERB"),("仁","VERB"),("者","PART"),("惡","VERB"),("不","ADV"),("仁","VERB"),("者","PART"),("好","VERB"),("仁","VERB"),("者","PART"),("無","ADV"),("以","ADV"),("尚","VERB"),("之","PRON"),("惡","VERB"),("不","ADV"),("仁","VERB"),("者","PART"),("其","PRON"),("為","VERB"),("仁","NOUN"),("矣","PART"),("不","ADV"),("使","VERB"),("不","ADV"),("仁","VERB"),("者","PART"),("加","VERB"),("乎","ADP"),("其","PRON"),("身","NOUN"),("有","VERB"),("能","AUX"),("一","NUM"),("日","NOUN"),("用","VERB"),("其","PRON"),("力","NOUN"),("於","ADP"),("仁","NOUN"),("矣","PART"),("乎","PART"),("我","PRON"),("未","ADV"),("見","VERB"),("力","NOUN"),("不","ADV"),("足","VERB"),("者","PART"),("蓋","PART"),("有","VERB"),("之","PRON"),("矣","PART"),("我","PRON"),("未","ADV"),("之","PRON"),("見","VERB"),("也","PART")])
ex.append([("子","NOUN"),("曰","VERB"),("人","NOUN"),("之","SCONJ"),("過","VERB"),("也","PART"),("各","ADV"),("於","VERB"),("其","PRON"),("黨","NOUN"),("觀","VERB"),("過","NOUN"),("斯","ADV"),("知","VERB"),("人","NOUN"),("矣","PART")])
ex.append([("子","NOUN"),("曰","VERB"),("朝","NOUN"),("聞","VERB"),("道","NOUN"),("夕","NOUN"),("死","VERB"),("可","VERB"),("矣","PART")])
ex.append([("子","NOUN"),("曰","VERB"),("士","NOUN"),("志","VERB"),("於","ADP"),("道","NOUN"),("而","CCONJ"),("恥","VERB"),("惡","NOUN"),("衣","NOUN"),("惡","NOUN"),("食","NOUN"),("者","PART"),("未","ADV"),("足","AUX"),("與","ADV"),("議","VERB"),("也","PART")])
ex.append([("子","NOUN"),("曰","VERB"),("君子","NOUN"),("之","SCONJ"),("於","VERB"),("天","NOUN"),("下","NOUN"),("也","PART"),("無","VERB"),("適","VERB"),("也","PART"),("無","VERB"),("莫","ADV"),("也","PART"),("義","NOUN"),("之","PRON"),("與","ADP"),("比","VERB")])
ex.append([("子","NOUN"),("曰","VERB"),("君子","NOUN"),("懷","VERB"),("德","NOUN"),("小","VERB"),("人","NOUN"),("懷","VERB"),("土","NOUN"),("君子","NOUN"),("懷","VERB"),("刑","NOUN"),("小","VERB"),("人","NOUN"),("懷","VERB"),("惠","NOUN")])
ex.append([("子","NOUN"),("曰","VERB"),("放","VERB"),("於","ADP"),("利","NOUN"),("而","CCONJ"),("行","VERB"),("多","VERB"),("怨","NOUN")])
ex.append([("子","NOUN"),("曰","VERB"),("能","AUX"),("以","VERB"),("禮","VERB"),("讓","VERB"),("為","VERB"),("國","NOUN"),("於","ADP"),("從","VERB"),("政","NOUN"),("乎","PART"),("何","PRON"),("有","VERB"),("不","ADV"),("能","AUX"),("以","VERB"),("禮","VERB"),("讓","VERB"),("為","VERB"),("國","NOUN"),("如","VERB"),("禮","NOUN"),("何","PRON")])
from transformers import pipeline
tgn=pipeline("text-generation","Xunzi-Qwen3-8B-base/Xunzi-Qwen3-8B-base",max_new_tokens=128)
nlp=lambda t:"\n".join(tgn(str(ex)+f"###text:{t}\n###UPOS:")[0]["generated_text"].split("\n")[len(ex)*2:len(ex)*2+2])
print(nlp("子曰不患無位患所以立不患莫己知求為可知也"))
「子曰不患無位患所以立不患莫己知求為可知也」に品詞付与してみたところ、私(安岡孝一)の手元では以下の結果が得られた。
###text:子曰不患無位患所以立不患莫己知求為可知也
###UPOS:子_NOUN|曰_VERB|不_ADV|患_VERB|無_VERB|位_NOUN|患_VERB|所_SCONJ|以_SCONJ|立_VERB|不_ADV|患_VERB|莫_ADV|己_PRON|知_VERB|求_VERB|爲_VERB|可_AUX|知_VERB|也_PART
「所_PART|以_ADV」を間違ってるものの、それ以外は読めているようだ。まあ、この「以」の品詞には議論の余地があって、どこまでOKか難しいところだが、それでもSCONJは無理だと思う。古典中国語(漢文)モデルとしては、あと一歩かな。