More than 3 years have passed since last update.

CLARANSをJavaScriptで実装した

Posted at 2020-11-22

はじめに

「色々な機械学習処理をブラウザ上で試せるサイトを作った」中で実装したモデルの解説の七回目です。

今回はPAMおよびCLARANSの実装について解説します。

デモはこちらから。（TaskをClusteringにして、ModelのCLARANSを選択）
実際のコードはclarans.jsにあります。

なお、可視化部分については一切触れません。

概説

朱鷺の杜Wikiのアルゴリズムをコードに起こしただけです。

※PAMは認証モジュールのことではありませんのでご注意ください。

PAM(Partitioning Around Medoids)

最初にランダムにセントロイドを選択します。
そのセントロイドの中から一つをガリガリと入れ替え、最善のコストになる場所を見つけます。
fit関数一回の呼び出に対して、最善の入れ替えを各セントロイドで一回ずつ行います。

コストはセントロイドとそのカテゴリに属するデータとの距離の総和とし、それもガリガリと計算しています。
k-medois法と同じようなものですが、それとは異なり全パターンをチェックすることになります。

なんとなく間違っているような気がしているため、GUI側からは使用できないようにしています。

CLARA(Clustering LARge Applications)

イメージがいまいちつかめていないため、実装していません。

CLARANS(Clustering Large Applications based on RANdomized Search)

隣接した分割は、一つのデータだけカテゴリを変更した状態と考えます。

本質的なアルゴリズムは単純です。
全データ中から一つデータを選択し、そのカテゴリをランダムに変更してコストを計算、そしてコストが小さくなれば採用する、ということを繰り返します。
この繰り返しに対して、numlocalとmaxneighborという細かい条件を付けているだけです。

使用するコストは、カテゴリに属するデータの重心と各データとの距離の総和とします。

コード

PAM

this._centroidsには各クラスタのセントロイドとするデータのインデックスが設定されます。

class PAM {
	// http://ibisforest.org/index.php?CLARANS
	constructor(k) {
		this._k = k
	}

	_distance(a, b) {
		return Math.sqrt(a.reduce((acc, v, i) => acc + (v - b[i]) ** 2, 0));
	}

	_cost(centroids) {
		const c = centroids.map(v => this._x[v])
		const n = this._x.length
		let cost = 0;
		for (let i = 0; i < n; i++) {
			const category = argmin(c, v => this._distance(this._x[i], v))
			cost += this._distance(this._x[i], c[category])
		}
		return cost
	}

	init(datas) {
		this._x = datas;
		const idx = []
		for (let i = 0; i < this._x.length; idx.push(i++));
		shuffle(idx)
		this._centroids = idx.slice(0, this._k);
	}

	fit() {
		const n = this._x.length
		let init_cost = this._cost(this._centroids)
		for (let k = 0; k < this._k; k++) {
			let min_cost = Infinity;
			let min_idx = -1;
			for (let i = 0; i < n; i++) {
				if (this._centroids.some(c => c === i)) {
					continue
				}
				const new_c = this._centroids.concat()
				new_c[k] = i
				const new_cost = this._cost(new_c);
				if (new_cost < min_cost) {
					min_cost = new_cost
					min_idx = i
				}
			}
			if (min_cost < init_cost) {
				this._centroids[k] = min_idx
				init_cost = min_cost
			}
		}
	}

	predict() {
		const c = this._centroids.map(v => this._x[v])
		return this._x.map(x => {
			return argmin(c, v => this._distance(x, v));
		});
	}
}

CLARANS

this._categoriesには各データのクラスタ番号が設定されます。

class CLARANS {
	// http://ibisforest.org/index.php?CLARANS
	constructor(k) {
		this._k = k
	}

	_distance(a, b) {
		return Math.sqrt(a.reduce((acc, v, i) => acc + (v - b[i]) ** 2, 0));
	}

	_cost(categories) {
		const n = this._x.length
		const centroids = []
		const count = []
		for (let i = 0; i < n; i++) {
			const cat = categories[i]
			if (!centroids[cat]) {
				centroids[cat] = this._x[i].concat()
				count[cat] = 1
			} else {
				for (let k = 0; k < this._x[i].length; k++) {
					centroids[cat][k] += this._x[i][k]
				}
				count[cat]++
			}
		}
		for (let i = 0; i < centroids.length; i++) {
			for (let k = 0; k < centroids[i].length; k++) {
				centroids[i][k] /= count[i]
			}
		}
		let cost = 0;
		for (let i = 0; i < n; i++) {
			cost += this._distance(this._x[i], centroids[categories[i]])
		}
		return cost;
	}

	init(datas) {
		this._x = datas;
		this._categories = [];
		for (let i = 0; i < this._x.length; i++) {
			this._categories[i] = Math.floor(Math.random() * this._k);
		}
	}

	fit(numlocal, maxneighbor) {
		const n = this._x.length
		const categories = this._categories;
		let i = 1;
		let mincost = Infinity
		while (i <= numlocal) {
			let j = 1
			let cur_cost = this._cost(categories)
			while (j <= maxneighbor) {
				const swap = Math.floor(Math.random() * n)
				const cur_cat = categories[swap]
				const new_cat = Math.floor(Math.random() * (this._k - 1));
				categories[swap] = (new_cat >= cur_cat) ? new_cat + 1 : new_cat;
				const new_cost = this._cost(categories)
				if (new_cost < cur_cost) {
					j = 1;
					cur_cost = new_cost
					continue
				}
				j++
				categories[swap] = cur_cat
			}
			if (cur_cost < mincost) {
				mincost = cur_cost
			}
			i++
		}
	}

	predict() {
		return this._categories
	}
}

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up