0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

JavaScriptでdivsufsort

Posted at

divsufsortとは接尾辞配列を構築するprogramである(その手の業界では極めて高速な事で有名)。原作はC言語で書かれているが、他の言語に移植するとなると事情は変わってくるかもしれない。

v1.01辺りまでは移植は容易だったが…、ある版からpointer乱舞しだしたのだ。移植の困難さに加え、高速な処理の弊害にもなってくる可能性もある。

JavaScriptではTypedArraysubarray関数が使えるようになってから実装が容易になったが、その成果はと言うと…

実装編

const SS_BLOCKSIZE=1024,SS_BLOCKSIZE2=SS_BLOCKSIZE*SS_BLOCKSIZE-1,
	SS_MISORT_STACKSIZE=16,SS_SMERGE_STACKSIZE=32,TR_STACKSIZE=64,
	SQQ_TABLE=function(A,B,a,b,c,d,e,f){for(;b<255;B[++b]=c+=f&(1<<e)-1,f>>=e,d-=e)for(e=b<7?5:b<65?2:1;d<e;d+=6)f|=(A.charCodeAt(a++)&63)<<d;return B}("PSiLH?njgjffYVfUVYUVUUUUz??}{{{^w]{vvnvvV[mVkVkZUkjUUUUUA",new Uint8Array(256),0,0,0,0),
	LOG_TABLE=function(A,a){for(A[0]=-1;a<255;)A[++a]=A[a>>1]+1;return A}(new Int8Array(256),1);

function getIndex(a){if(a>-1)return a;return~a}

function TRBudget(chance,incVal){
	this.chance=chance;
	this.remain=this.incVal=incVal
}
TRBudget.prototype={count:0,
check:function(size){
	if(size<=this.remain){
		this.remain-=size;
		return 1}
	if(!this.chance){
		this.count+=size;
		return}
	this.remain+=this.incVal-size;
	return this.chance--
}};
function ssIlg(n){
	if(n&0xFF00)return 8+LOG_TABLE[n>>8&0xFF];
	return LOG_TABLE[n&0xFF]
}
function ssIsqrt(x){
	if(x>SS_BLOCKSIZE2)return SS_BLOCKSIZE;
	var e=x&0xFFFF0000?x&0xFF000000?24+LOG_TABLE[x>>>24]:16+LOG_TABLE[x>>16&0xFF]: x&0xFF00?8+LOG_TABLE[x>>8&0xFF]:LOG_TABLE[x&0xFF],y;
	if(e<8)return SQQ_TABLE[x]>>4;
	if(e>15){
		y=SQQ_TABLE[x>>e-6-(e&1)]<<(e>>1)-7;
		if(e>23)y=y+1+x/y>>1;
		y=y+1+x/y>>1}
	else y=(SQQ_TABLE[x>>e-6-(1&e)]>>8-(e>>1));
	if(x<y*y)return--y;
	return y
}
function ssCompare3(B,SA,p,q,depth){
	var a=depth+SA[p],b=depth+SA[q];p=SA[p+1]+2;q=SA[q+1]+2;
	if(p-a>q-b)for(;b<q&&B[a]===B[b];b++)a++;
	else for(;a<p&&B[a]===B[b];b++)a++;
	if(a<p){
		if(b<q)return B[a]-B[b];
		return 1}
	if(b<q)return-1;
	return 0
}
function ssCompare4(B,SA,a,b,p,depth){
	var l=b+2,m=SA[p+1]+2;a+=depth;b=depth+SA[p];
	if(l-a>m-b)for(;b<m&&B[a]===B[b];b++)a++;
	else for(;a<l&&B[a]===B[b];b++)a++;
	if(a<l){
		if(b<m)return B[a]-B[b];
		return 1}
	if(b<m)return-1;
	return 0
}
function ssInsertionSort(B,SA,P,first,last,depth){
	for(var i=--last,j,r,t;i>first;SA[--j]=t){
		for(j=i,t=SA[--i];0<(r=ssCompare3(B,P,t,SA[j],depth));){
			for(;SA[j-1]=SA[j],j++<last&&SA[j]<0;);
			if(last<j)break
		}
		if(!r)SA[j]=~SA[j]
	}
}
function ssFixDown(A,P,S,i,size){
	var v=S[i],c=A[P[v]],d,e,j,k;
	for(;(j=2*i+1)<size;S[i]=S[i=k]){
		d=A[P[S[k=j++]]];
		e=A[P[S[j]]];
		if(d<e)k=j,d=e;
		if(d<=c)break
	}S[i]=v
}
function ssHeapSort(A,SA,P,saIdx,size){
	var m=size,i,t,S=SA.subarray(saIdx);
	if(~m&1)
		if(A[P[t=S[i=--m>>1]]]<A[P[S[m]]])
			S[i]=S[m],S[m]=t;
	for(i=m>>1;i;)ssFixDown(A,P,S,--i,m);
	if(~size&1)
		t=S[0],S[0]=S[m],S[m]=t,
		ssFixDown(A,P,S,0,m);
	for(;m;S[m]=t)
		t=S[0],S[0]=S[--m],
		ssFixDown(A,P,S,0,m)
}
function ssMedian3(T,SA,P,v1,v2,v3){
	var t,a=T[P[SA[v1]]],b=T[P[SA[v2]]],c=T[P[SA[v3]]];
	if(a>b)t=v1,v1=v2,v2=t,t=a,a=b,b=t;
	if(b>c){
		if(a>c)return v1;
		return v3
	}return v2
}
function ssMedian5(T,SA,P,v1,v2,v3,v4,v5){
	var t,a=T[P[SA[v1]]],b=T[P[SA[v2]]],c=T[P[SA[v3]]],d=T[P[SA[v4]]],e=T[P[SA[v5]]];
	if(b>c)t=v2,v2=v3,v3=t,t=b,b=c,c=t;
	if(d>e)t=v4,v4=v5,v5=t,t=d,d=e,e=t;
	if(b>d)t=v2,v2=v4,v4=t,t=b,b=d,d=t,t=v3,v3=v5,v5=t,t=c,c=e,e=t;
	if(a>c)t=v1,v1=v3,v3=t,t=a,a=c,c=t;
	if(a>d)t=v1,v1=v4,v4=t,t=a,a=d,d=t,t=v3,v3=v5,v5=t,t=c,c=e,e=t;
	if(c>d)return v4;
	return v3
}
function ssPivot(A,SA,P,first,last){
	var t=last-first,middle=first+(t>>1);
	if(t<513){
		if(t<33)return ssMedian3(A,SA,P,first,middle,--last);
		t>>=2;return ssMedian5(A,SA,P,first,first+t,middle,--last-t,last);
	}
	t>>=3;
	return ssMedian3(A,SA,P,ssMedian3(A,SA,P,first,first+t,first+t*2),ssMedian3(A,SA,P,middle-t,middle,middle+t),ssMedian3(A,SA,P,--last-t*2,last-t,last))
}
function ssPartition(SA,P,first,last,depth){
	for(var a=first-1,b=last,t;;SA[a]=t){
		for(;++a<b&&P[t=SA[a]]+depth>P[++t];)SA[a]=~SA[a];
		for(;--b>a&&P[t=SA[b]]+depth<=P[++t];);
		if(b<=a)break;
		t=~SA[b];SA[b]=SA[a]}
	if(first<a)SA[first]=~SA[first];
	return a
}
function ssMultiKeyIntroSort(B,SA,P,first,last,depth){
	var a,b,c,d,e,f,p=0,s=SS_MISORT_STACKSIZE,t,v,x=0,limit=ssIlg(last-first),A,Fi=new Int32Array(s*4),La=Fi.subarray(s),De=La.subarray(s),Li=De.subarray(s);
	for(;;){
		if(last-first<9){
			if(last-first>1)ssInsertionSort(B,SA,P,first,last,depth);
			if(!p)return;
			first=Fi[--p];last=La[p];depth=De[p];limit=Li[p];
			continue
		}
		A=B.subarray(depth);
		limit||ssHeapSort(A,SA,P,first,last-first);
		if(--limit<0){
			for(v=A[P[SA[a=first]]];++a<last;)
				if((x=A[P[SA[a]]])!==v){
					if(a-first>1)break;
					v=x;first=a
				}
			if(A[P[SA[first]]-1]<v)
				first=ssPartition(SA,P,first,a,depth);
			if(a-first<=last-a)
				if(a-first>1)
					La[p]=last,Fi[p]=last=a,De[p]=depth++,Li[p++]=-1,
					limit=ssIlg(a-first);
				else first=a,limit=-1;
			else if(last-a>1)
				Fi[p]=first,La[p]=first=a,De[p]=depth+1,Li[p++]=ssIlg(a-first),
				limit=-1;
			else last=a,depth++,limit=ssIlg(a-first);
			continue
		}
		v=A[P[SA[a=ssPivot(A,SA,P,first,last)]]];
		t=SA[first],SA[first]=SA[a],SA[a]=t;
		for(b=first;++b<last&&(x=A[P[SA[b]]])===v;);
		if((a=b)<last&&x<v)
			for(;++b<last&&(x=A[P[SA[b]]])<=v;)
				x===v&&(t=SA[b],SA[b]=SA[a],SA[a++]=t);
		for(c=last;--c>b&&(x=A[P[SA[c]]])===v;);
		if(b<(d=c)&&x>v)
			for(;--c>b&&(x=A[P[SA[c]]])>=v;)
				x===v&&(t=SA[c],SA[c]=SA[d],SA[d--]=t);
		for(;b<c;){
			t=SA[b],SA[b]=SA[c],SA[c]=t;
			for(;++b<c&&(x=A[P[SA[b]]])<=v;)
				x===v&&(t=SA[b],SA[b]=SA[a],SA[a++]=t);
			for(;--c>b&&(x=A[P[SA[c]]])>=v;)
				x===v&&(t=SA[c],SA[c]=SA[d],SA[d--]=t)
		}
		if(a<=d){c=b-1;
			if((s=a-first)>(t=b-a))s=t;
			for(e=first,f=b-s;s-->0;SA[f++]=t)t=SA[e],SA[e++]=SA[f];
			if((s=d-c)>(t=~d+last))s=t;
			for(e=b,f=last-s;s-->0;SA[f++]=t)t=SA[e],SA[e++]=SA[f];
			a=first+b-a;c+=last-d;
			b=v<=A[P[SA[a]]-1]?a:ssPartition(SA,P,a,c,depth);
			if(a-first<=last-c)
				if(last-c<=c-b)
					Fi[p]=b,La[p]=c,De[p]=depth+1,Li[p++]=ssIlg(c-b),
					Fi[p]=c,La[p]=last,De[p]=depth,Li[p++]=limit,
					last=a;
				else if(a-first<=c-b)
					Fi[p]=c,La[p]=last,De[p]=depth,Li[p++]=limit,
					Fi[p]=b,La[p]=c,De[p]=depth+1,Li[p++]=ssIlg(c-b),
					last=a;
				else Fi[p]=c,La[p]=last,De[p]=depth,Li[p++]=limit,
					Fi[p]=first,La[p]=a,De[p]=depth++,Li[p++]=limit,
					first=b,last=c,limit=ssIlg(c-b);
			else if(a-first<=c-b)
				Fi[p]=b,La[p]=c,De[p]=depth+1,Li[p++]=ssIlg(c-b),
				Fi[p]=first,La[p]=a,De[p]=depth,Li[p++]=limit,
				first=c;
			else if(last-c<=c-b)
				Fi[p]=first,La[p]=a,De[p]=depth,Li[p++]=limit,
				Fi[p]=b,La[p]=first=c,De[p]=depth+1,Li[p++]=ssIlg(c-b);
			else Fi[p]=first,La[p]=a,De[p]=depth,Li[p++]=limit,
				La[p]=last,Fi[p]=last=c,De[p]=depth++,Li[p++]=limit,
				first=b,limit=ssIlg(c-b)
		}else{
			if(A[P[SA[first]]-1]<v)
				first=ssPartition(SA,P,first,last,depth),
				limit=ssIlg(last-first);
			else++limit;
			depth++
		}
	}
}
function ssBlockSwap(SA,a,b,n,t){
	for(;n-->0;SA[b++]=t)t=SA[a],SA[a++]=SA[b]
}
function ssRotate(SA,first,middle,last){
	var l=middle-first,r=last-middle,a,b,t;
	for(;l>0&&r>0;){
		if(l===r){ssBlockSwap(SA,first,middle,l);break}
		if(l<r)for(b=middle-1,t=SA[a=last-1];;){
			SA[a]=SA[b];SA[b]=SA[--a];
			if(--b<first){
				SA[last=a]=t;r+=~l;
				if(r<=l)break;
				b=middle-1;t=SA[--a]
			}
		}else for(b=middle,t=SA[a=first];;){
			SA[a]=SA[b];SA[b]=SA[++a];
			if(++b>=last){
				SA[a]=t;first=a+1;l+=~r;
				if(l<=r)break;
				b=middle;t=SA[++a]
			}
		}
	}
}
function ssInplaceMerge(B,SA,P,first,middle,last,depth){
	var a,b,p,q,r,x,half,len;
	do{
		a=SA[last-1];x=a<0;p=x?~a:a;
		a=first;r=-1;len=middle-first;
		for(half=len>>1;len;half>>=1){
			q=SA[b=a+half],q=ssCompare3(B,P,q>-1?q:~q,p,depth);
			if(q<0)a=b+1,half-=~len&1;
			else r=q;len=half
		}
		if(a<middle){
			if(!r)SA[a]=~SA[a];
			ssRotate(SA,a,middle,last);
			last-=middle-a;middle=a;
			if(first===a)break
		}
		--last;
		if(x)for(;SA[--last]<0;);
	}while(middle!==last)
}
function ssMergeForward(B,SA,P,first,middle,last,buf,depth){
	ssBlockSwap(SA,buf,first,middle-first);
	var bufEnd=~first+buf+middle,a=first,b=buf,c=middle,r,t=SA[a];
	for(;;){
		r=ssCompare3(B,P,SA[b],SA[c],depth);
		if(r<0)
			do{
				SA[a++]=SA[b];
				if(bufEnd<=b)return SA[bufEnd]=t;
				SA[b++]=SA[a]
			}while(SA[b]<0);
		else if(r>0)
			do{
				SA[a++]=SA[c],SA[c++]=SA[a];
				if(last<=c){
					for(;b<bufEnd;SA[b++]=SA[a])SA[a++]=SA[b];
					SA[a]=SA[b];SA[b]=t;
					return
				}
			}while(SA[c]<0);
		else{SA[c]=~SA[c];
			do{
				SA[a++]=SA[b];
				if(bufEnd<=b)return SA[bufEnd]=t;
				SA[b++]=SA[a]
			}while(SA[b]<0);
			do{
				SA[a++]=SA[c],SA[c++]=SA[a];
				if(last<=c){
					for(;b<bufEnd;SA[b++]=SA[a])SA[a++]=SA[b];
					SA[a]=SA[b];SA[b]=t;
					return
				}
			}while(SA[c]<0)
		}
	}
}
function ssMergeBackward(B,SA,P,first,middle,last,buf,depth){
	ssBlockSwap(SA,buf,middle,last-middle);
	var x=0,a=last-1,b=~middle+buf+last,c=middle-1,t=SA[a],r,p1=SA[b],p2=SA[middle-1];
	if(p1<0)p1=~p1,x|=1;
	if(p2<0)p2=~p2,x|=2;
	for(;;){
		r=ssCompare3(B,P,p1,p2,depth);
		if(r>0){
			if(x&1)for(x^=1;SA[a--]=SA[b],SA[b--]=SA[a],SA[b]<0;);
			SA[a--]=SA[b];
			if(b<=buf){SA[buf]=t;break}
			SA[b--]=SA[a];p1=SA[b];
			if(p1<0)p1=~p1,x|=1
		}else if(r<0){
			if(x&2)for(x^=2;SA[a--]=SA[c],SA[c--]=SA[a],SA[c]<0;);
			SA[a--]=SA[c];SA[c--]=SA[a];
			if(c<first){
				for(;buf<b;SA[b--]=SA[a])SA[a--]=SA[b];
				SA[a]=SA[b];SA[b]=t;break
			}
			p2=SA[c];if(p2<0)p2=~p2,x|=2
		}else{
			if(x&1)for(x^=1;SA[a--]=SA[b],SA[b--]=SA[a],SA[b]<0;);
			SA[a--]=~SA[b];
			if(b<=buf){SA[buf]=t;break}
			SA[b--]=SA[a];
			if(x&2)for(x^=2;SA[a--]=SA[c],SA[c--]=SA[a],SA[c]<0;);
			SA[a--]=SA[c],SA[c--]=SA[a];
			if(c<first){
				for(;buf<b;SA[b--]=SA[a])SA[a--]=SA[b];
				SA[a]=SA[b];SA[b]=t;break
			}
			p1=SA[b];if(p1<0)p1=~p1,x|=1;
			p2=SA[c];if(p2<0)p2=~p2,x|=2
		}
	}
}
function ssSwapMerge(B,SA,P,first,middle,last,buf,bufSize,depth){
	var p=0,check=0,len,l=SS_SMERGE_STACKSIZE,r,m,lm,rm,half,next,F=new Int32Array(l*4),M=F.subarray(l),L=M.subarray(l),C=L.subarray(l);
	for(;;){
		if(last-middle<=bufSize){
			first<middle&&middle<last&&ssMergeBackward(B,SA,P,first,middle,last,buf,depth);
			if(check&1||(check&2)&&!ssCompare3(B,P,getIndex(SA[first-1]),SA[first],depth))
				SA[first]=~SA[first];
			if((check&4)&&!ssCompare3(B,P,getIndex(SA[last-1]),SA[last],depth))
				SA[last]=~SA[last];
			if(!p)return;
			first=F[--p];middle=M[p];last=L[p];check=C[p];
			continue
		}
		if(middle-first<=bufSize){
			first<middle&&ssMergeForward(B,SA,P,first,middle,last,buf,depth);
			if(check&1||(check&2)&&!ssCompare3(B,P,getIndex(SA[first-1]),SA[first],depth))
				SA[first]=~SA[first];
			if((check&4)&&!ssCompare3(B,P,getIndex(SA[last-1]),SA[last],depth))
				SA[last]=~SA[last];
			if(!p)return;
			first=F[--p];middle=M[p];last=L[p];check=C[p];
			continue
		}
		m=0,len=middle-first<last-middle?middle-first:last-middle;
		for(half=len>>1;len>0;len=half,half>>=1)
			if(ssCompare3(B,P,getIndex(SA[middle+m+half]),getIndex(SA[~m+middle-half]),depth)<0)
				m-=~half,half-=1&len^1;
		if(m>0){
			rm=middle+m;next=0;
			ssBlockSwap(SA,lm=middle-m,l=r=middle,m);
			if(rm<last)
				if(SA[rm]<0){
					SA[rm]=~SA[rm];
					if(first<lm)for(next|=4;SA[--l]<0;);
					next|=1
				}else if(first<lm)for(next|=2;SA[r]<0;)r++;
			if(l-first<=last-r)
				F[p]=r,M[p]=rm,L[p]=last,C[p++]=next&3|check&4,
				middle=lm,last=l,check=check&3|next&4;
			else{
				if(r===middle&&(next&2))next^=6;
				F[p]=first,M[p]=lm,L[p]=l,C[p++]=check&3|next&4;
				first=r;middle=rm;check=next&3|check&4
			}
		}else{
			if(!ssCompare3(B,P,getIndex(SA[middle-1]),SA[middle],depth))
				SA[middle]=~SA[middle];
			if(check&1||(check&2)&&!ssCompare3(B,P,getIndex(SA[first-1]),SA[first],depth))
				SA[first]=~SA[first];
			if((check&4)&&!ssCompare3(B,P,getIndex(SA[last-1]),SA[last],depth))
				SA[last]=~SA[last];
			if(!p)return;
			first=F[--p];middle=M[p];last=L[p];check=C[p]
		}
	}
}
function ssSort(B,SA,P,first,last,buf,bufSize,depth,n,lastSuffix){
	lastSuffix&&first++;
	var limit=0,middle=last,a=first,b,i=0,j,k,curBuf,curBufSize;
	if(bufSize<SS_BLOCKSIZE&&bufSize<last-first){
		limit=ssIsqrt(last-first);
		if(bufSize<limit){
			if(limit>SS_BLOCKSIZE)limit=SS_BLOCKSIZE;
			buf=middle=last-limit;
			bufSize=limit
		}else limit=0
	}
	for(;middle>a+SS_BLOCKSIZE;a+=SS_BLOCKSIZE){
		k=SS_BLOCKSIZE;
		ssMultiKeyIntroSort(B,SA,P,b=a,curBuf=a+k,depth);
		curBufSize=last-curBuf;
		if(curBufSize<=bufSize)curBufSize=bufSize,curBuf=buf;
		for(j=i++;j&1;j>>=1)
			ssSwapMerge(B,SA,P,b-k,b,b+k,curBuf,curBufSize,depth),
			b-=k,k<<=1
	}
	ssMultiKeyIntroSort(B,SA,P,a,middle,depth);
	for(k=SS_BLOCKSIZE;i;i>>=1,k<<=1)
		if(i&1)ssSwapMerge(B,SA,P,a-k,a,middle,buf,bufSize,depth),a-=k;
	if(limit)
		ssMultiKeyIntroSort(B,SA,P,middle,last,depth),
		ssInplaceMerge(B,SA,P,first,middle,last,depth);
	if(lastSuffix){
		b=P[i=SA[first-1]];n-=2;
		for(a=first;a<last&&(SA[a]<0||ssCompare4(B,P,b,n,SA[a],depth)>0);)
			SA[a-1]=SA[a++];
		SA[a-1]=i
	}
}
function trIlg(n){
	if(n&0xFFFF0000){
		if(n&0xFF000000)return 24+LOG_TABLE[n>>>24];
		return 16+LOG_TABLE[n>>16&0xFF]}
	if(n&0xFF00)return 8+LOG_TABLE[n>>8&0xFF];
	return LOG_TABLE[n&0xFF]
}
function trInsertionSort(SA,ISd,first,last){
	for(var a=first,b,r,t;b=a,++a<last;SA[b+1]=t){
		for(t=SA[a];(r=ISd[t]-ISd[SA[b]])<0;){
			for(;SA[b+1]=SA[b],first<b--&&SA[b]<0;);
			if(b<first)break
		}
		if(!r)SA[b]=~SA[b]
	}
}
function trFixDown(SA,ISd,I,i,size){
	var v=I[i],c=ISd[v],d,e,j,k;
	for(;(j=i*2+1)<size;I[i]=I[i=k]){
		d=ISd[I[k=j++]];
		e=ISd[I[j]];
		if(d<e)k=j,d=e;
		if(d<=c)break
	}I[i]=v
}
function trHeapSort(SA,ISd,I,size){
	var i,m=size,t;
	if(~size&1)
		if(ISd[t=I[i=--m>>1]]<ISd[I[m]])
			I[i]=I[m],I[m]=t;
	for(i=m>>1;i;)trFixDown(SA,ISd,I,--i,m);
	if(~size&1)
		t=I[0],I[0]=I[m],I[m]=t,
		trFixDown(SA,ISd,I,0,m);
	for(;m;I[m]=t)
		t=I[0],I[0]=I[--m],
		trFixDown(SA,ISd,I,0,m)
}
function trMedian3(SA,ISd,v1,v2,v3){
	var t,a=ISd[SA[v1]],b=ISd[SA[v2]],c=ISd[SA[v3]];
	if(a>b)t=v1,v1=v2,v2=t,t=a,a=b,b=t;
	if(b>c){
		if(a>c)return v1;
		return v3
	}return v2
}
function trMedian5(SA,ISd,v1,v2,v3,v4,v5){
	var t,a=ISd[SA[v1]],b=ISd[SA[v2]],c=ISd[SA[v3]],d=ISd[SA[v4]],e=ISd[SA[v5]];
	if(b>c)t=v2,v2=v3,v3=t,t=b,b=c,c=t;
	if(d>e)t=v4,v4=v5,v5=t,t=d,d=e,e=t;
	if(b>d)t=v2,v2=v4,v4=t,t=b,b=d,d=t,t=v3,v3=v5,v5=t,t=c,c=e,e=t;
	if(a>c)t=v1,v1=v3,v3=t,t=a,a=c,c=t;
	if(a>d)t=v1,v1=v4,v4=t,t=a,a=d,d=t,t=v3,v3=v5,v5=t,t=c,c=e,e=t;
	if(c>d)return v4;
	return v3
}
function trPivot(SA,ISd,first,last){
	var t=last-first,middle=first+(t>>1);
	if(t<513){
		if(t<33)return trMedian3(SA,ISd,first,middle,--last);
		t>>=2;return trMedian5(SA,ISd,first,first+t,middle,--last-t,last)
	}
	t>>=3;
	first=trMedian3(SA,ISd,first,first+t,first+t*2);
	middle=trMedian3(SA,ISd,middle-t,middle,middle+t);
	last=trMedian3(SA,ISd,--last-t*2,last-t,last);
	return trMedian3(SA,ISd,first,middle,last)
}
function trPartition(SA,ISd,first,middle,last,v){
	var a,b=middle-1,c,d,e,f,s,t,x=0;
	for(;++b<last&&(x=ISd[SA[b]])===v;);
	if((a=b)<last&&x<v)
		for(;++b<last&&(x=ISd[SA[b]])<=v;)
			x===v&&(t=SA[a],SA[a++]=SA[b],SA[b]=t);
	for(c=last;--c>b&&(x=ISd[SA[c]])===v;);
	if(b<(d=c)&&x>v)
		for(;--c>b&&(x=ISd[SA[c]])>=v;)
			x===v&&(t=SA[c],SA[c]=SA[d],SA[d--]=t);
	for(;b<c;){
		t=SA[c],SA[c]=SA[b],SA[b]=t;
		for(;++b<c&&(x=ISd[SA[b]])<=v;)
			x===v&&(t=SA[a],SA[a++]=SA[b],SA[b]=t);
		for(;--c>b&&(x=ISd[SA[c]])>=v;)
			x===v&&(t=SA[c],SA[c]=SA[d],SA[d--]=t)
	}
	if(a<=d){c=b-1;
		if((s=a-first)>(t=b-a))s=t;
		for(e=first,f=b-s;s--;SA[f++]=t)t=SA[e],SA[e++]=SA[f];
		if((s=d-c)>(t=~d+last))s=t;
		for(e=b,f=last-s;s--;SA[f++]=t)t=SA[e],SA[e++]=SA[f];
		first+=b-a,last-=d-c
	}SA.f=first;SA.l=last
}
function trCopy(SA,ISA,first,a,b,last,depth){
	var v=b-1,c=first-1,d=a-1,e,s;
	for(;c<d;)
		if((s=SA[++c]-depth)>-1&&ISA[s]===v)
			SA[++d]=s,ISA[s]=d;
	e=d+1;c=last;
	for(d=b;d>e;)
		if((s=SA[--c]-depth)>-1&&ISA[s]===v)
			SA[--d]=s,ISA[s]=d
}
function trPartialCopy(SA,ISA,first,a,b,last,depth){
	var c=first-1,d=a-1,e,r,s,v=b-1,lastRank=-1,newRank=-1;
	for(;c<d;)
		if((s=SA[++c]-depth)>-1&&ISA[s]===v){
			SA[++d]=s,r=ISA[s+depth];
			if(lastRank!==r)lastRank=r,newRank=d;
			ISA[s]=newRank
		}
	lastRank=-1;
	for(e=d;first<=e;e--){
		r=ISA[SA[e]];
		if(lastRank!==r)lastRank=r,newRank=e;
		if(newRank!==r)ISA[SA[e]]=newRank
	}
	lastRank=-1;c=last;e=d+1;
	for(d=b;e<d;)
		if((s=SA[--c]-depth)>-1&&ISA[s]===v){
			SA[--d]=s;
			r=ISA[s+depth];
			if(lastRank!==r)lastRank=r,newRank=d;
			ISA[s]=newRank
		}
}
function trIntroSort(SA,isa,isad,first,last,budget){
	var a,b,c=TR_STACKSIZE,p=0,t,v,x=0,next,incr=isad-isa,limit=trIlg(last-first),trlink=-1,ISd,ISA=SA.subarray(isa),I=new Int32Array(c*5),F=I.subarray(c),L=F.subarray(c),M=L.subarray(c),N=M.subarray(c);
	for(;;){
		if(limit<0){
			if(limit>-2){
				trPartition(SA,SA.subarray(isad-incr),first,first,last,last-1);
				a=SA.f,b=SA.l;
				if(a<last)
					for(c=first,v=a-1;c<a;)ISA[SA[c++]]=v;
				if(b<last)
					for(c=a,v=b-1;c<b;)ISA[SA[c++]]=v;
				if(b-a>1)
					F[p]=a,L[p]=b,I[p]=M[p]=N[p++]=0,
					I[p]=isad-incr,F[p]=first,L[p]=last,M[p]=-2,N[p++]=trlink,
					trlink=p-2;
				if(a-first<=last-b)
					if(a-first>1)
						I[p]=isad,F[p]=b,L[p]=last,M[p]=trIlg(last-b),N[p++]=trlink,
						last=a,limit=trIlg(a-first);
					else if(last-b>1)
						first=b,limit=trIlg(last-b);
					else{if(!p)return;
						isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]}
				else if(last-b>1)
					I[p]=isad,F[p]=first,L[p]=a,M[p]=trIlg(a-first),N[p++]=trlink,
					first=b,limit=trIlg(last-b);
				else if(a-first>1)
					last=a,limit=trIlg(a-first);
				else{if(!p)return;
					isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]
				}
			}else if(limit===-2){
				a=F[--p],b=L[p];
				if(!M[p])trCopy(SA,ISA,first,a,b,last,isad-isa);
				else{
					if(trlink>-1)M[trlink]=-1;
					trPartialCopy(SA,ISA,first,a,b,last,isad-isa)
				}
				if(!p)return;
				isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]
			}else{
				if(SA[first]>-1){
					for(a=first;ISA[SA[a]]=a,++a<last&&SA[a]>-1;);
					first=a
				}
				if(first<last){
					for(a=first;SA[a]=~SA[a],SA[++a]<0;);
					next=ISA[SA[a]]!==SA[isad+SA[a]]?trIlg(a-first+1):-1;
					if(++a<last)
						for(b=first,v=a-1;b<a;)ISA[SA[b++]]=v;
					if(budget.check(a-first))
						if(a-first<=last-a)
							I[p]=isad,L[p]=last,F[p]=last=a,M[p]=-3,N[p++]=trlink,
							isad+=incr,limit=next;
						else if(last-a>1)
							I[p]=isad+incr,F[p]=first,L[p]=first=a,M[p]=next,N[p++]=trlink,
							limit=-3;
						else isad+=incr,last=a,limit=next;
					else{
						if(trlink>-1)M[trlink]=-1;
						if(last-a>1)first=a,limit=-3;
						else{if(!p)return;
							isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]}
					}
				}else{if(!p)return;
					isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]}
			}continue
		}
		ISd=SA.subarray(isad);
		if(last-first<9){
			trInsertionSort(SA,ISd,first,last);
			limit=-3;continue
		}
		if(!limit--){
			trHeapSort(SA,ISd,SA.subarray(first),last-first);
			for(a=last-1;first<a;a=b)
				for(b=a-1,x=ISd[SA[a]];first<=b&&ISd[SA[b]]===x;)
					SA[b]=~SA[b--];
			limit=-3;continue
		}
		t=SA[first],SA[first]=SA[a=trPivot(SA,ISd,first,last)],SA[a]=t;
		trPartition(SA,ISd,first,first+1,last,v=ISd[SA[first]]);
		a=SA.f,b=SA.l;
		if(last-first!==b-a){
			next=ISA[SA[a]]!==v?trIlg(b-a):-1;
			for(v=a-1,c=first;c<a;)ISA[SA[c++]]=v;
			if(b<last)
				for(v=b-1,c=a;c<b;)ISA[SA[c++]]=v;
			if(b-a>1&&budget.check(b-a)){
				if(a-first<=last-b)
					if(last-b<=b-a)
						if(a-first>1)
							I[p]=isad+incr,F[p]=a,L[p]=b,M[p]=next,N[p++]=N[p]=trlink,
							I[p]=isad,F[p]=b,L[p]=last,M[p++]=limit,last=a;
						else if(last-b>1)
							I[p]=isad+incr,F[p]=a,L[p]=first=b,M[p]=next,N[p++]=trlink;
						else isad+=incr,first=a,last=b,limit=next;
					else if(a-first<=b-a)
						if(a-first>1)
							I[p]=isad,F[p]=b,L[p]=last,M[p]=limit,N[p++]=N[p]=trlink,
							I[p]=isad+incr,F[p]=last=a,L[p]=b,M[p++]=next;
						else
							I[p]=isad,F[p]=b,L[p]=last,M[p]=limit,N[p++]=trlink,
							isad+=incr,first=a,last=b,limit=next;
					else
						I[p]=isad,F[p]=b,L[p]=last,M[p]=limit,N[p++]=N[p]=trlink,
						I[p]=isad,F[p]=first,L[p]=a,M[p++]=limit,
						isad+=incr,first=a,last=b,limit=next;
				else if(a-first<=b-a)
					if(last-b>1)
						I[p]=isad+incr,F[p]=a,L[p]=b,M[p]=next,N[p++]=N[p]=trlink,
						I[p]=isad,F[p]=first,L[p]=a,M[p++]=limit,first=b;
					else if(a-first>1)
						I[p]=isad+incr,F[p]=last=a,L[p]=b,M[p]=next,N[p++]=trlink;
					else isad+=incr,first=a,last=b,limit=next;
				else if(last-b<=b-a)
					if(last-b>1)
						I[p]=isad,F[p]=first,L[p]=a,M[p]=limit,N[p++]=N[p]=trlink,
						I[p]=isad+incr,F[p]=a,L[p]=first=b,M[p++]=next;
					else I[p]=isad,F[p]=first,L[p]=a,M[p]=limit,N[p++]=trlink,
						isad+=incr,first=a,last=b,limit=next;
				else I[p]=isad,F[p]=first,L[p]=a,M[p]=limit,N[p++]=N[p]=trlink,
					I[p]=isad,L[p]=last,F[p]=last=b,M[p++]=limit,
					isad+=incr,first=a,limit=next
			}else{
				if(b-a>1&&trlink>-1)M[trlink]=-1;
				if(a-first<=last-b)
					if(a-first>1)
						I[p]=isad,F[p]=b,L[p]=last,M[p]=limit,N[p++]=trlink,
						last=a;
					else if(last-b>1)first=b;
					else{if(!p)return;
						isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]}
				else if(last-b>1)
					I[p]=isad,F[p]=first,L[p]=a,M[p]=limit,N[p++]=trlink,
					first=b;
				else if(a-first>1)last=a;
				else{if(!p)return;
					isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]}
			}
		}else if(budget.check(last-first))
			limit=trIlg(last-first),
			isad+=incr;
		else{
			if(trlink>-1)M[trlink]=-1;
			if(!p)return;
			isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]
		}
	}
}
function trSort(SA,isa,n,depth){
	var budget=new TRBudget(trIlg(n)*2/3|0,n),isad=isa+depth,first,skip,unsorted,t,last;
	for(;SA[first=skip=unsorted=0]>-n;isad+=isad-isa){
		do if((t=SA[first])<0)first-=t,skip+=t;
			else{
				if(skip)SA[first+skip]=skip,skip=0;
				last=SA[isa+t]+1;
				if(last-first>1)
					budget.count=0,
					trIntroSort(SA,isa,isad,first,last,budget),
					budget.count?unsorted+=budget.count:skip=first-last;
				else if(last-first===1)skip=-1;
				first=last
			}
		while(first<n);
		if(skip)SA[first+skip]=skip;
		if(!unsorted)break
	}
}
function sortTypeBstar(B,SA,CA,CB,n){
	for(var i=n-1,j=0,m=n,t,b=B[i],c,p;~i;){
		while(CA[c=b]++,i--&&(b=B[i])>=c);
		if(i<0)break;SA[--m]=i;
		CB[b<<8|c]++;
		for(c=b;i--&&(b=B[i])<=c;c=b)
			CB[c<<8|b]++
	}
	for(i=b=0,m=n-m;b<256;b++){
		t=i+CA[b];CA[b]=i+j;
		p=b<<8;i=t+CB[p+b];
		for(c=b;++c<256;i+=CB[c<<8|b])
			CB[p+c]=j+=CB[p+c]
	}
	if(m>0){p=SA.subarray(n-m);
		for(i=m-1;i;)SA[--CB[B[t=p[--i]]<<8|B[++t]]]=i;
		b=B[t=p[m-1]];c=B[++t];
		SA[--CB[b<<8|c]]=m-1;
		j=m;t=n-m-m;
		for(b=254;j;b--)
			for(c=255;c>b;j=i)
				i=CB[b<<8|c--],
				j-i>1&&ssSort(B,SA,p,i,j,m,t,2,n,SA[i]===m-1);
		for(i=m;i;SA[m+SA[i]]=j){
			if(SA[--i]>-1){
				for(j=i;(SA[m+SA[i]]=i--)&&SA[i]>-1;);
				SA[i+1]=i-j;
				if(i<1)break}
			for(j=i;SA[m+(SA[i]=~SA[i])]=j,SA[--i]<0;);
		}
		trSort(SA,m,p=j=m,1);CB[65535]=n;
		for(b=B[i=n-1];~i;SA[SA[m+--j]]=!t||t-i>1?t:~t){
			for(c=b;i--&&(b=B[i])>=c;)c=b;
			if(i<0)break;t=i;
			for(c=b;i--&&(b=B[i])<=c;)c=b
		}
		for(b=255;b;t=b<<8|b,CB[t+1]=i-CB[t]+1,CB[t]=i)
			for(i=CA[b--]-1,c=255;c>b;)
				for(t=i-CB[n=c<<8|b],CB[n]=i,i=t,j=CB[b<<8|c--];j<p;)
					SA[i--]=SA[--p]
	}return m
}
// Constructs the suffix array by using the sorted order of type B* suffixes
function constructSA(B,SA,CA,CB,n,m){
	var b,c=255,d,i,j,k,p,s;
	if(m>0)for(;c;)
		for(j=CA[c],i=CB[c+(p=--c<<8)],k=0,d=-1;i<j;){
			s=SA[--j];SA[j]=~s;
			if(s>0){b=B[--s];
			if(s>0&&B[s-1]>b)s=~s;
			if(b^d){
				if(d>-1)CB[p+d]=k;
				d=b;k=CB[p+d]}
			SA[k--]=s}}
	k=CA[d=B[n-1]];
	SA[k++]=B[n-2]<d?~n+1:n-1;
	for(i=0;i<n;i++){
		s=SA[i];
		if(s<1){SA[i]=~s;continue}
		b=B[--s];
		if(!s||B[s-1]<b)s=~s;
		if(b^d)CA[d]=k,k=CA[d=b];
		SA[k++]=s
	}
}
// Constructs the burrows-wheeler transformed string directly by using the sorted order of type B* suffixes
function constructBWT(T,SA,CA,CB,n,m){
	var i,j,k,s,b,c,d;
	if(0<m)for(c=255;c;)
		for(j=CA[c],i=CB[c|--c<<8],k=0,d=-1;i<j;)
			if(0<(s=SA[--j])){
				b=T[--s];SA[j]=~b;
				if(0<s&&T[s-1]>b)s=~s;
				if(b^d){
					if(0<=d)CB[c<<8|d]=k;
					k=CB[c<<8|(d=b)]
				}SA[k--]=s
			}else if(s)SA[j]=~s;
	k=CA[d=T[n-1]];
	SA[k++]=T[n-2]<d?~T[n-2]:n-1;
	for(i=0;i<n;++i)
		if(0<(s=SA[i])){
			b=T[--s];SA[i]=b;
			if(0<s&&T[s-1]<b)s=~T[s-1];
			if(b^d)CA[d]=k,k=CA[d=b];
			SA[k++]=s
		}else s?SA[i]=~s:c=i;
	return c
}
/* computeSA
@A: Array/Uint8Array. suffix array構築元となる配列
@return: suffix array
*/
function computeSA(A){
	let n=A.length,SA=new Int32Array(n);
	if(n<3){
		if(n==1)SA[0]=0;
		if(n>1)n=A[0]<A[1],SA[n^1]=0,SA[+n]=1;return SA
	}
	let CA=new Int32Array(256),CB=new Int32Array(65536);
	constructSA(A.buffer?A:A=Uint8Array.from(A),SA,CA,CB,n,sortTypeBstar(A,SA,CA,CB,n));
	return SA
}
/* divBWTe
@T: Array/Uint8Array. BWT構築元となる配列
@U: BWT格納配列
@return:
	@Uを省略した場合, BWT配列にprimary indexを格納して返却
	しなかった場合はprimary indexを返却
*/
function divBWTe(T,U){
	let n=T.length,b=n<257?1:n<65537?2:n<16777217?3:4,i=n,SA=new Int32Array(n);
	if(n<2){SA=U||SA;if(n)SA[0]=T[0];return U?n:(SA[1]=1,SA)}
	let CA=new Int32Array(256),CB=new Int32Array(65536),p=constructBWT(T.buffer?T:T=Uint8Array.from(T),SA,CA,CB,n,sortTypeBstar(T,SA,CA,CB,n));
	for(CA=U||new Uint8Array(n+b);i>p;)CA[--i]=SA[i]
	for(p++;i;)CA[i]=SA[--i];CA[i]=T[n-1];
	if(U)return p;
	for(;b--;p>>=8)CA[n++]=p&255;
	return CA
}
使用例
var A=Uint8Array.from("That that is is that that is not is not is that it it is",a=>a.charCodeAt())
var SA=computeSA(A);
var BWT=Uint8Array(A.lnegth), p=divBWTe(A,BWT);
var bwt=divBWTe(A); //primary index格納版

性能

高速ではあるが、やはり配列の入れ子参照が多過ぎるせいか、速度が犠牲になっている印象。SAISを使った方が良いかも…

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?