3

More than 5 years have passed since last update.

単語境界で分割する。

Last updated at 2018-04-07Posted at 2018-04-07

tagcloudや既出単語のautocompleteで使う単語境界正規表現。

/[\w$]+/g

optが存在すれば重複を許可する。

fn.anyword=(d,opt)=>{
 let ary=d.match(/[\w$]+/g);
 //default only one. //opt is is allow doubles
 return (opt)?ary:Array.from(new Set(ary))
}

全部

<div f><textarea a></textarea><textarea b></textarea></div>
<style>
 body{margin:0;width:100vw;height:100vh;}
*{background-color:#111;color:#eee;outline:none;resize:none}
[f]{display:flex;height:100%}
textarea{width:50%;padding:0.5rem}
</style>

let fn={}
fn.q=(d=>document.querySelector(d))
fn.sol=(d=>Promise.resolve(d))
;
fn.anyword=(d,opt)=>{
 let ary=d.match(/[\w$]+/g);
 //default only one. //opt is is allow doubles
 return (opt)?ary:Array.from(new Set(ary))
}
;
let a=fn.q('[a]'),b=fn.q('[b]')
,calc=(d)=>{ b.value=JSON.stringify(fn.anyword(d),null,2) }
a.oninput=function(ev){ fn.sol(this.value).then(calc) }

追記、日本語について。

送仮名等は字句解析が必要になるが、それを除けば下の通り。

fn.hiraword=(d,opt)=>{
 let ary=d.match(/[\u3040-\u309f]+/g);
 return (opt)?ary:Array.from(new Set(ary)) 
}
fn.kataword=(d,opt)=>{
 let ary=d.match(/[\u30a0-\u30ff]+/g);
 return (opt)?ary:Array.from(new Set(ary))
}
fn.kanjiword=(d,opt)=>{
//4E00～9FFF 但しこの範囲は簡易的な漢字の範囲。
 let ary=d.match(/[\u4e00-\u9fff]+/g);
 return (opt)?ary:Array.from(new Set(ary)) 
}

3

Register as a new user and use Qiita more conveniently

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up

3