0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

gsacaで接尾辞配列(suffix array)構築

Last updated at Posted at 2025-04-27

接尾辞配列を線形時間で構築するgsacaの紹介。特徴は再帰を利用しない事。しかし入力に対して約17倍もの記憶空間を消費します。

実装編

ただ実装するだけでは面白くないので、本家の不便な点を改良。何が不便かと言うと入力配列の末尾が0かつ、途中に0を含めてはいけない点です。ちなみにJavaScriptで記述

/*
gsaca(S[,A,n,k])
@S: input(Array/Uint8Array/Uint16Array)
@A: output of suffixes(Array/Int32Array/Uint32Array). A size must be n+1 if A.constructor is TypedArray. if !A==true then process time is fast.
@n: S.length
@k: max symbol of S[0..n-1]. k:0-65535
@return: @A
*/
function gsaca(S,A,n,k){
	n=n||S.length;k=k&65535||255;
	var p=n++,P=new Uint32Array(n*3),ISA=P.subarray(n),GS=P.subarray(n*2),SA=A||new Uint32Array(n),a,b,c,i=0,j=-1,r,s=0,t,z=p;
	if(p<2)return p?SA[0]=0:0,SA;
	if(p<3)return SA[n=0|S[0]<S[1]]=1,SA[1-n]=0,SA;
	for(c=new Uint32Array(k+2<<1);i<p;)++c[S[i++]+1<<1|1];
	for(c[1]=1;s<n;)
		if(b=c[++j<<1|1])
			s+=GS[c[j<<1]=s]=b;
	for(SA[--c[1]]=i;i;SA[ISA[i]=--c[s+1]+a]=i)a=P[--i]=c[s=S[i]+1<<1];
	for(;z>0;z=b-1){
		for(GS[a=b=P[SA[i=c=z]]]=0;i>=a;P[s]=p)
			for(s=SA[i--],p=s-1>>>0;p<n;p=P[p])
				if((r=ISA[p])<=z){
					if(r>=a)GS[r]=1;
					break
				}
		for(i=j=a;i<=z;GS[i++]||(SA[j++]=s))ISA[s=SA[i]]=z;
		z=j;j=0;
		do{
			for(i=z-1,r=z;i>=a;)
				if(p=P[s=SA[i]],p<n)
					if(ISA[p]<b)SA[i--]=SA[--z],SA[z]=p;
					else--i,P[s]=P[p],P[p]=n;
				else SA[i]=SA[a++];
			if(z<r)GS[z]=r-z,++j
		}while(a<z);
		for(;j--;){
			for(i=z=a+GS[a];i>a;ISA[SA[r]=p]=r)
				r=P[p=SA[--i]],
				ISA[SA[t=ISA[p]]=SA[r+=--GS[r]]]=t;
			for(;i<z;P[p]=r+GS[r])r=P[p=SA[i++]];
			for(;a<z;)++GS[P[SA[a++]]]
		}SA[c]=b
	}
	for(SA[i=0]=n-1;i<n;)
		for(s=SA[i++]-1>>>0;s<n&&(r=ISA[s])^n;s=P[s])ISA[SA[SA[r]++]=s]=n;
	if(A){for(i=0;i<n;)A[i++]=A[i];return--A.length,A}
	return SA.subarray?SA.subarray(1):SA.slice(1)
}
var S=[98,97,110,97,110,97];//構築元の配列
//Array版
var SA=[];gsaca(S,SA);
//TypedArray版
var SA=new Uint32Array(S.length+1);
gsaca(S,SA);SA=SA.subarray(0,S.length)
//TypedArray 簡易版
var SA=gsaca(S)

性能

かなり高速ですがSAIS程ではないかな…。

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?