SAMPLE
吾輩|は|猫|で|ある|。|名前|は|まだ|無|い|。
REFERENCE
PYTHON
微妙に改変。
text_m = []
text = "吾輩は猫である。名前はまだ無い。"
p = re.compile(r"/|[A-Z]+|[a-z]+|[ァ-ンー]+|[ぁ-ん-]+|[ァ-ヶ]+|[一-龍]+|[。、]|/")
m = p.findall(text)
for row in m:
if re.compile(r'^[あ-ん]+$').fullmatch(row):
if row[0] in 'はがのにへともでを':
prefix = row[0]
token = row[1:]
text_m.append(prefix)
if (len(token)>0):
text_m.append(token)
elif row[-2:] in 'のでからまで':
token = row[0:-2]
suffix = row[-2:]
text_m.append(token)
text_m.append(suffix)
elif row[-1:] in 'もはがでを':
token = row[0:-1]
suffix = row[-1:]
text_m.append(token)
text_m.append(suffix)
else:
text_m.append(row)
else:
text_m.append(row)
## output
'|'.join(text_m)