divsufsortとは接尾辞配列を構築するprogramである(その手の業界では極めて高速な事で有名)。原作はC言語で書かれているが、他の言語に移植するとなると事情は変わってくるかもしれない。
v1.01辺りまでは移植は容易だったが…、ある版からpointer乱舞しだしたのだ。移植の困難さに加え、高速な処理の弊害にもなってくる可能性もある。
JavaScriptではTypedArray
とsubarray
関数が使えるようになってから実装が容易になったが、その成果はと言うと…
実装編
const SS_BLOCKSIZE=1024,SS_BLOCKSIZE2=SS_BLOCKSIZE*SS_BLOCKSIZE-1,
SS_MISORT_STACKSIZE=16,SS_SMERGE_STACKSIZE=32,TR_STACKSIZE=64,
SQQ_TABLE=function(A,B,a,b,c,d,e,f){for(;b<255;B[++b]=c+=f&(1<<e)-1,f>>=e,d-=e)for(e=b<7?5:b<65?2:1;d<e;d+=6)f|=(A.charCodeAt(a++)&63)<<d;return B}("PSiLH?njgjffYVfUVYUVUUUUz??}{{{^w]{vvnvvV[mVkVkZUkjUUUUUA",new Uint8Array(256),0,0,0,0),
LOG_TABLE=function(A,a){for(A[0]=-1;a<255;)A[++a]=A[a>>1]+1;return A}(new Int8Array(256),1);
function getIndex(a){if(a>-1)return a;return~a}
function TRBudget(chance,incVal){
this.chance=chance;
this.remain=this.incVal=incVal
}
TRBudget.prototype={count:0,
check:function(size){
if(size<=this.remain){
this.remain-=size;
return 1}
if(!this.chance){
this.count+=size;
return}
this.remain+=this.incVal-size;
return this.chance--
}};
function ssIlg(n){
if(n&0xFF00)return 8+LOG_TABLE[n>>8&0xFF];
return LOG_TABLE[n&0xFF]
}
function ssIsqrt(x){
if(x>SS_BLOCKSIZE2)return SS_BLOCKSIZE;
var e=x&0xFFFF0000?x&0xFF000000?24+LOG_TABLE[x>>>24]:16+LOG_TABLE[x>>16&0xFF]: x&0xFF00?8+LOG_TABLE[x>>8&0xFF]:LOG_TABLE[x&0xFF],y;
if(e<8)return SQQ_TABLE[x]>>4;
if(e>15){
y=SQQ_TABLE[x>>e-6-(e&1)]<<(e>>1)-7;
if(e>23)y=y+1+x/y>>1;
y=y+1+x/y>>1}
else y=(SQQ_TABLE[x>>e-6-(1&e)]>>8-(e>>1));
if(x<y*y)return--y;
return y
}
function ssCompare3(B,SA,p,q,depth){
var a=depth+SA[p],b=depth+SA[q];p=SA[p+1]+2;q=SA[q+1]+2;
if(p-a>q-b)for(;b<q&&B[a]===B[b];b++)a++;
else for(;a<p&&B[a]===B[b];b++)a++;
if(a<p){
if(b<q)return B[a]-B[b];
return 1}
if(b<q)return-1;
return 0
}
function ssCompare4(B,SA,a,b,p,depth){
var l=b+2,m=SA[p+1]+2;a+=depth;b=depth+SA[p];
if(l-a>m-b)for(;b<m&&B[a]===B[b];b++)a++;
else for(;a<l&&B[a]===B[b];b++)a++;
if(a<l){
if(b<m)return B[a]-B[b];
return 1}
if(b<m)return-1;
return 0
}
function ssInsertionSort(B,SA,P,first,last,depth){
for(var i=--last,j,r,t;i>first;SA[--j]=t){
for(j=i,t=SA[--i];0<(r=ssCompare3(B,P,t,SA[j],depth));){
for(;SA[j-1]=SA[j],j++<last&&SA[j]<0;);
if(last<j)break
}
if(!r)SA[j]=~SA[j]
}
}
function ssFixDown(A,P,S,i,size){
var v=S[i],c=A[P[v]],d,e,j,k;
for(;(j=2*i+1)<size;S[i]=S[i=k]){
d=A[P[S[k=j++]]];
e=A[P[S[j]]];
if(d<e)k=j,d=e;
if(d<=c)break
}S[i]=v
}
function ssHeapSort(A,SA,P,saIdx,size){
var m=size,i,t,S=SA.subarray(saIdx);
if(~m&1)
if(A[P[t=S[i=--m>>1]]]<A[P[S[m]]])
S[i]=S[m],S[m]=t;
for(i=m>>1;i;)ssFixDown(A,P,S,--i,m);
if(~size&1)
t=S[0],S[0]=S[m],S[m]=t,
ssFixDown(A,P,S,0,m);
for(;m;S[m]=t)
t=S[0],S[0]=S[--m],
ssFixDown(A,P,S,0,m)
}
function ssMedian3(T,SA,P,v1,v2,v3){
var t,a=T[P[SA[v1]]],b=T[P[SA[v2]]],c=T[P[SA[v3]]];
if(a>b)t=v1,v1=v2,v2=t,t=a,a=b,b=t;
if(b>c){
if(a>c)return v1;
return v3
}return v2
}
function ssMedian5(T,SA,P,v1,v2,v3,v4,v5){
var t,a=T[P[SA[v1]]],b=T[P[SA[v2]]],c=T[P[SA[v3]]],d=T[P[SA[v4]]],e=T[P[SA[v5]]];
if(b>c)t=v2,v2=v3,v3=t,t=b,b=c,c=t;
if(d>e)t=v4,v4=v5,v5=t,t=d,d=e,e=t;
if(b>d)t=v2,v2=v4,v4=t,t=b,b=d,d=t,t=v3,v3=v5,v5=t,t=c,c=e,e=t;
if(a>c)t=v1,v1=v3,v3=t,t=a,a=c,c=t;
if(a>d)t=v1,v1=v4,v4=t,t=a,a=d,d=t,t=v3,v3=v5,v5=t,t=c,c=e,e=t;
if(c>d)return v4;
return v3
}
function ssPivot(A,SA,P,first,last){
var t=last-first,middle=first+(t>>1);
if(t<513){
if(t<33)return ssMedian3(A,SA,P,first,middle,--last);
t>>=2;return ssMedian5(A,SA,P,first,first+t,middle,--last-t,last);
}
t>>=3;
return ssMedian3(A,SA,P,ssMedian3(A,SA,P,first,first+t,first+t*2),ssMedian3(A,SA,P,middle-t,middle,middle+t),ssMedian3(A,SA,P,--last-t*2,last-t,last))
}
function ssPartition(SA,P,first,last,depth){
for(var a=first-1,b=last,t;;SA[a]=t){
for(;++a<b&&P[t=SA[a]]+depth>P[++t];)SA[a]=~SA[a];
for(;--b>a&&P[t=SA[b]]+depth<=P[++t];);
if(b<=a)break;
t=~SA[b];SA[b]=SA[a]}
if(first<a)SA[first]=~SA[first];
return a
}
function ssMultiKeyIntroSort(B,SA,P,first,last,depth){
var a,b,c,d,e,f,p=0,s=SS_MISORT_STACKSIZE,t,v,x=0,limit=ssIlg(last-first),A,Fi=new Int32Array(s*4),La=Fi.subarray(s),De=La.subarray(s),Li=De.subarray(s);
for(;;){
if(last-first<9){
if(last-first>1)ssInsertionSort(B,SA,P,first,last,depth);
if(!p)return;
first=Fi[--p];last=La[p];depth=De[p];limit=Li[p];
continue
}
A=B.subarray(depth);
limit||ssHeapSort(A,SA,P,first,last-first);
if(--limit<0){
for(v=A[P[SA[a=first]]];++a<last;)
if((x=A[P[SA[a]]])!==v){
if(a-first>1)break;
v=x;first=a
}
if(A[P[SA[first]]-1]<v)
first=ssPartition(SA,P,first,a,depth);
if(a-first<=last-a)
if(a-first>1)
La[p]=last,Fi[p]=last=a,De[p]=depth++,Li[p++]=-1,
limit=ssIlg(a-first);
else first=a,limit=-1;
else if(last-a>1)
Fi[p]=first,La[p]=first=a,De[p]=depth+1,Li[p++]=ssIlg(a-first),
limit=-1;
else last=a,depth++,limit=ssIlg(a-first);
continue
}
v=A[P[SA[a=ssPivot(A,SA,P,first,last)]]];
t=SA[first],SA[first]=SA[a],SA[a]=t;
for(b=first;++b<last&&(x=A[P[SA[b]]])===v;);
if((a=b)<last&&x<v)
for(;++b<last&&(x=A[P[SA[b]]])<=v;)
x===v&&(t=SA[b],SA[b]=SA[a],SA[a++]=t);
for(c=last;--c>b&&(x=A[P[SA[c]]])===v;);
if(b<(d=c)&&x>v)
for(;--c>b&&(x=A[P[SA[c]]])>=v;)
x===v&&(t=SA[c],SA[c]=SA[d],SA[d--]=t);
for(;b<c;){
t=SA[b],SA[b]=SA[c],SA[c]=t;
for(;++b<c&&(x=A[P[SA[b]]])<=v;)
x===v&&(t=SA[b],SA[b]=SA[a],SA[a++]=t);
for(;--c>b&&(x=A[P[SA[c]]])>=v;)
x===v&&(t=SA[c],SA[c]=SA[d],SA[d--]=t)
}
if(a<=d){c=b-1;
if((s=a-first)>(t=b-a))s=t;
for(e=first,f=b-s;s-->0;SA[f++]=t)t=SA[e],SA[e++]=SA[f];
if((s=d-c)>(t=~d+last))s=t;
for(e=b,f=last-s;s-->0;SA[f++]=t)t=SA[e],SA[e++]=SA[f];
a=first+b-a;c+=last-d;
b=v<=A[P[SA[a]]-1]?a:ssPartition(SA,P,a,c,depth);
if(a-first<=last-c)
if(last-c<=c-b)
Fi[p]=b,La[p]=c,De[p]=depth+1,Li[p++]=ssIlg(c-b),
Fi[p]=c,La[p]=last,De[p]=depth,Li[p++]=limit,
last=a;
else if(a-first<=c-b)
Fi[p]=c,La[p]=last,De[p]=depth,Li[p++]=limit,
Fi[p]=b,La[p]=c,De[p]=depth+1,Li[p++]=ssIlg(c-b),
last=a;
else Fi[p]=c,La[p]=last,De[p]=depth,Li[p++]=limit,
Fi[p]=first,La[p]=a,De[p]=depth++,Li[p++]=limit,
first=b,last=c,limit=ssIlg(c-b);
else if(a-first<=c-b)
Fi[p]=b,La[p]=c,De[p]=depth+1,Li[p++]=ssIlg(c-b),
Fi[p]=first,La[p]=a,De[p]=depth,Li[p++]=limit,
first=c;
else if(last-c<=c-b)
Fi[p]=first,La[p]=a,De[p]=depth,Li[p++]=limit,
Fi[p]=b,La[p]=first=c,De[p]=depth+1,Li[p++]=ssIlg(c-b);
else Fi[p]=first,La[p]=a,De[p]=depth,Li[p++]=limit,
La[p]=last,Fi[p]=last=c,De[p]=depth++,Li[p++]=limit,
first=b,limit=ssIlg(c-b)
}else{
if(A[P[SA[first]]-1]<v)
first=ssPartition(SA,P,first,last,depth),
limit=ssIlg(last-first);
else++limit;
depth++
}
}
}
function ssBlockSwap(SA,a,b,n,t){
for(;n-->0;SA[b++]=t)t=SA[a],SA[a++]=SA[b]
}
function ssRotate(SA,first,middle,last){
var l=middle-first,r=last-middle,a,b,t;
for(;l>0&&r>0;){
if(l===r){ssBlockSwap(SA,first,middle,l);break}
if(l<r)for(b=middle-1,t=SA[a=last-1];;){
SA[a]=SA[b];SA[b]=SA[--a];
if(--b<first){
SA[last=a]=t;r+=~l;
if(r<=l)break;
b=middle-1;t=SA[--a]
}
}else for(b=middle,t=SA[a=first];;){
SA[a]=SA[b];SA[b]=SA[++a];
if(++b>=last){
SA[a]=t;first=a+1;l+=~r;
if(l<=r)break;
b=middle;t=SA[++a]
}
}
}
}
function ssInplaceMerge(B,SA,P,first,middle,last,depth){
var a,b,p,q,r,x,half,len;
do{
a=SA[last-1];x=a<0;p=x?~a:a;
a=first;r=-1;len=middle-first;
for(half=len>>1;len;half>>=1){
q=SA[b=a+half],q=ssCompare3(B,P,q>-1?q:~q,p,depth);
if(q<0)a=b+1,half-=~len&1;
else r=q;len=half
}
if(a<middle){
if(!r)SA[a]=~SA[a];
ssRotate(SA,a,middle,last);
last-=middle-a;middle=a;
if(first===a)break
}
--last;
if(x)for(;SA[--last]<0;);
}while(middle!==last)
}
function ssMergeForward(B,SA,P,first,middle,last,buf,depth){
ssBlockSwap(SA,buf,first,middle-first);
var bufEnd=~first+buf+middle,a=first,b=buf,c=middle,r,t=SA[a];
for(;;){
r=ssCompare3(B,P,SA[b],SA[c],depth);
if(r<0)
do{
SA[a++]=SA[b];
if(bufEnd<=b)return SA[bufEnd]=t;
SA[b++]=SA[a]
}while(SA[b]<0);
else if(r>0)
do{
SA[a++]=SA[c],SA[c++]=SA[a];
if(last<=c){
for(;b<bufEnd;SA[b++]=SA[a])SA[a++]=SA[b];
SA[a]=SA[b];SA[b]=t;
return
}
}while(SA[c]<0);
else{SA[c]=~SA[c];
do{
SA[a++]=SA[b];
if(bufEnd<=b)return SA[bufEnd]=t;
SA[b++]=SA[a]
}while(SA[b]<0);
do{
SA[a++]=SA[c],SA[c++]=SA[a];
if(last<=c){
for(;b<bufEnd;SA[b++]=SA[a])SA[a++]=SA[b];
SA[a]=SA[b];SA[b]=t;
return
}
}while(SA[c]<0)
}
}
}
function ssMergeBackward(B,SA,P,first,middle,last,buf,depth){
ssBlockSwap(SA,buf,middle,last-middle);
var x=0,a=last-1,b=~middle+buf+last,c=middle-1,t=SA[a],r,p1=SA[b],p2=SA[middle-1];
if(p1<0)p1=~p1,x|=1;
if(p2<0)p2=~p2,x|=2;
for(;;){
r=ssCompare3(B,P,p1,p2,depth);
if(r>0){
if(x&1)for(x^=1;SA[a--]=SA[b],SA[b--]=SA[a],SA[b]<0;);
SA[a--]=SA[b];
if(b<=buf){SA[buf]=t;break}
SA[b--]=SA[a];p1=SA[b];
if(p1<0)p1=~p1,x|=1
}else if(r<0){
if(x&2)for(x^=2;SA[a--]=SA[c],SA[c--]=SA[a],SA[c]<0;);
SA[a--]=SA[c];SA[c--]=SA[a];
if(c<first){
for(;buf<b;SA[b--]=SA[a])SA[a--]=SA[b];
SA[a]=SA[b];SA[b]=t;break
}
p2=SA[c];if(p2<0)p2=~p2,x|=2
}else{
if(x&1)for(x^=1;SA[a--]=SA[b],SA[b--]=SA[a],SA[b]<0;);
SA[a--]=~SA[b];
if(b<=buf){SA[buf]=t;break}
SA[b--]=SA[a];
if(x&2)for(x^=2;SA[a--]=SA[c],SA[c--]=SA[a],SA[c]<0;);
SA[a--]=SA[c],SA[c--]=SA[a];
if(c<first){
for(;buf<b;SA[b--]=SA[a])SA[a--]=SA[b];
SA[a]=SA[b];SA[b]=t;break
}
p1=SA[b];if(p1<0)p1=~p1,x|=1;
p2=SA[c];if(p2<0)p2=~p2,x|=2
}
}
}
function ssSwapMerge(B,SA,P,first,middle,last,buf,bufSize,depth){
var p=0,check=0,len,l=SS_SMERGE_STACKSIZE,r,m,lm,rm,half,next,F=new Int32Array(l*4),M=F.subarray(l),L=M.subarray(l),C=L.subarray(l);
for(;;){
if(last-middle<=bufSize){
first<middle&&middle<last&&ssMergeBackward(B,SA,P,first,middle,last,buf,depth);
if(check&1||(check&2)&&!ssCompare3(B,P,getIndex(SA[first-1]),SA[first],depth))
SA[first]=~SA[first];
if((check&4)&&!ssCompare3(B,P,getIndex(SA[last-1]),SA[last],depth))
SA[last]=~SA[last];
if(!p)return;
first=F[--p];middle=M[p];last=L[p];check=C[p];
continue
}
if(middle-first<=bufSize){
first<middle&&ssMergeForward(B,SA,P,first,middle,last,buf,depth);
if(check&1||(check&2)&&!ssCompare3(B,P,getIndex(SA[first-1]),SA[first],depth))
SA[first]=~SA[first];
if((check&4)&&!ssCompare3(B,P,getIndex(SA[last-1]),SA[last],depth))
SA[last]=~SA[last];
if(!p)return;
first=F[--p];middle=M[p];last=L[p];check=C[p];
continue
}
m=0,len=middle-first<last-middle?middle-first:last-middle;
for(half=len>>1;len>0;len=half,half>>=1)
if(ssCompare3(B,P,getIndex(SA[middle+m+half]),getIndex(SA[~m+middle-half]),depth)<0)
m-=~half,half-=1&len^1;
if(m>0){
rm=middle+m;next=0;
ssBlockSwap(SA,lm=middle-m,l=r=middle,m);
if(rm<last)
if(SA[rm]<0){
SA[rm]=~SA[rm];
if(first<lm)for(next|=4;SA[--l]<0;);
next|=1
}else if(first<lm)for(next|=2;SA[r]<0;)r++;
if(l-first<=last-r)
F[p]=r,M[p]=rm,L[p]=last,C[p++]=next&3|check&4,
middle=lm,last=l,check=check&3|next&4;
else{
if(r===middle&&(next&2))next^=6;
F[p]=first,M[p]=lm,L[p]=l,C[p++]=check&3|next&4;
first=r;middle=rm;check=next&3|check&4
}
}else{
if(!ssCompare3(B,P,getIndex(SA[middle-1]),SA[middle],depth))
SA[middle]=~SA[middle];
if(check&1||(check&2)&&!ssCompare3(B,P,getIndex(SA[first-1]),SA[first],depth))
SA[first]=~SA[first];
if((check&4)&&!ssCompare3(B,P,getIndex(SA[last-1]),SA[last],depth))
SA[last]=~SA[last];
if(!p)return;
first=F[--p];middle=M[p];last=L[p];check=C[p]
}
}
}
function ssSort(B,SA,P,first,last,buf,bufSize,depth,n,lastSuffix){
lastSuffix&&first++;
var limit=0,middle=last,a=first,b,i=0,j,k,curBuf,curBufSize;
if(bufSize<SS_BLOCKSIZE&&bufSize<last-first){
limit=ssIsqrt(last-first);
if(bufSize<limit){
if(limit>SS_BLOCKSIZE)limit=SS_BLOCKSIZE;
buf=middle=last-limit;
bufSize=limit
}else limit=0
}
for(;middle>a+SS_BLOCKSIZE;a+=SS_BLOCKSIZE){
k=SS_BLOCKSIZE;
ssMultiKeyIntroSort(B,SA,P,b=a,curBuf=a+k,depth);
curBufSize=last-curBuf;
if(curBufSize<=bufSize)curBufSize=bufSize,curBuf=buf;
for(j=i++;j&1;j>>=1)
ssSwapMerge(B,SA,P,b-k,b,b+k,curBuf,curBufSize,depth),
b-=k,k<<=1
}
ssMultiKeyIntroSort(B,SA,P,a,middle,depth);
for(k=SS_BLOCKSIZE;i;i>>=1,k<<=1)
if(i&1)ssSwapMerge(B,SA,P,a-k,a,middle,buf,bufSize,depth),a-=k;
if(limit)
ssMultiKeyIntroSort(B,SA,P,middle,last,depth),
ssInplaceMerge(B,SA,P,first,middle,last,depth);
if(lastSuffix){
b=P[i=SA[first-1]];n-=2;
for(a=first;a<last&&(SA[a]<0||ssCompare4(B,P,b,n,SA[a],depth)>0);)
SA[a-1]=SA[a++];
SA[a-1]=i
}
}
function trIlg(n){
if(n&0xFFFF0000){
if(n&0xFF000000)return 24+LOG_TABLE[n>>>24];
return 16+LOG_TABLE[n>>16&0xFF]}
if(n&0xFF00)return 8+LOG_TABLE[n>>8&0xFF];
return LOG_TABLE[n&0xFF]
}
function trInsertionSort(SA,ISd,first,last){
for(var a=first,b,r,t;b=a,++a<last;SA[b+1]=t){
for(t=SA[a];(r=ISd[t]-ISd[SA[b]])<0;){
for(;SA[b+1]=SA[b],first<b--&&SA[b]<0;);
if(b<first)break
}
if(!r)SA[b]=~SA[b]
}
}
function trFixDown(SA,ISd,I,i,size){
var v=I[i],c=ISd[v],d,e,j,k;
for(;(j=i*2+1)<size;I[i]=I[i=k]){
d=ISd[I[k=j++]];
e=ISd[I[j]];
if(d<e)k=j,d=e;
if(d<=c)break
}I[i]=v
}
function trHeapSort(SA,ISd,I,size){
var i,m=size,t;
if(~size&1)
if(ISd[t=I[i=--m>>1]]<ISd[I[m]])
I[i]=I[m],I[m]=t;
for(i=m>>1;i;)trFixDown(SA,ISd,I,--i,m);
if(~size&1)
t=I[0],I[0]=I[m],I[m]=t,
trFixDown(SA,ISd,I,0,m);
for(;m;I[m]=t)
t=I[0],I[0]=I[--m],
trFixDown(SA,ISd,I,0,m)
}
function trMedian3(SA,ISd,v1,v2,v3){
var t,a=ISd[SA[v1]],b=ISd[SA[v2]],c=ISd[SA[v3]];
if(a>b)t=v1,v1=v2,v2=t,t=a,a=b,b=t;
if(b>c){
if(a>c)return v1;
return v3
}return v2
}
function trMedian5(SA,ISd,v1,v2,v3,v4,v5){
var t,a=ISd[SA[v1]],b=ISd[SA[v2]],c=ISd[SA[v3]],d=ISd[SA[v4]],e=ISd[SA[v5]];
if(b>c)t=v2,v2=v3,v3=t,t=b,b=c,c=t;
if(d>e)t=v4,v4=v5,v5=t,t=d,d=e,e=t;
if(b>d)t=v2,v2=v4,v4=t,t=b,b=d,d=t,t=v3,v3=v5,v5=t,t=c,c=e,e=t;
if(a>c)t=v1,v1=v3,v3=t,t=a,a=c,c=t;
if(a>d)t=v1,v1=v4,v4=t,t=a,a=d,d=t,t=v3,v3=v5,v5=t,t=c,c=e,e=t;
if(c>d)return v4;
return v3
}
function trPivot(SA,ISd,first,last){
var t=last-first,middle=first+(t>>1);
if(t<513){
if(t<33)return trMedian3(SA,ISd,first,middle,--last);
t>>=2;return trMedian5(SA,ISd,first,first+t,middle,--last-t,last)
}
t>>=3;
first=trMedian3(SA,ISd,first,first+t,first+t*2);
middle=trMedian3(SA,ISd,middle-t,middle,middle+t);
last=trMedian3(SA,ISd,--last-t*2,last-t,last);
return trMedian3(SA,ISd,first,middle,last)
}
function trPartition(SA,ISd,first,middle,last,v){
var a,b=middle-1,c,d,e,f,s,t,x=0;
for(;++b<last&&(x=ISd[SA[b]])===v;);
if((a=b)<last&&x<v)
for(;++b<last&&(x=ISd[SA[b]])<=v;)
x===v&&(t=SA[a],SA[a++]=SA[b],SA[b]=t);
for(c=last;--c>b&&(x=ISd[SA[c]])===v;);
if(b<(d=c)&&x>v)
for(;--c>b&&(x=ISd[SA[c]])>=v;)
x===v&&(t=SA[c],SA[c]=SA[d],SA[d--]=t);
for(;b<c;){
t=SA[c],SA[c]=SA[b],SA[b]=t;
for(;++b<c&&(x=ISd[SA[b]])<=v;)
x===v&&(t=SA[a],SA[a++]=SA[b],SA[b]=t);
for(;--c>b&&(x=ISd[SA[c]])>=v;)
x===v&&(t=SA[c],SA[c]=SA[d],SA[d--]=t)
}
if(a<=d){c=b-1;
if((s=a-first)>(t=b-a))s=t;
for(e=first,f=b-s;s--;SA[f++]=t)t=SA[e],SA[e++]=SA[f];
if((s=d-c)>(t=~d+last))s=t;
for(e=b,f=last-s;s--;SA[f++]=t)t=SA[e],SA[e++]=SA[f];
first+=b-a,last-=d-c
}SA.f=first;SA.l=last
}
function trCopy(SA,ISA,first,a,b,last,depth){
var v=b-1,c=first-1,d=a-1,e,s;
for(;c<d;)
if((s=SA[++c]-depth)>-1&&ISA[s]===v)
SA[++d]=s,ISA[s]=d;
e=d+1;c=last;
for(d=b;d>e;)
if((s=SA[--c]-depth)>-1&&ISA[s]===v)
SA[--d]=s,ISA[s]=d
}
function trPartialCopy(SA,ISA,first,a,b,last,depth){
var c=first-1,d=a-1,e,r,s,v=b-1,lastRank=-1,newRank=-1;
for(;c<d;)
if((s=SA[++c]-depth)>-1&&ISA[s]===v){
SA[++d]=s,r=ISA[s+depth];
if(lastRank!==r)lastRank=r,newRank=d;
ISA[s]=newRank
}
lastRank=-1;
for(e=d;first<=e;e--){
r=ISA[SA[e]];
if(lastRank!==r)lastRank=r,newRank=e;
if(newRank!==r)ISA[SA[e]]=newRank
}
lastRank=-1;c=last;e=d+1;
for(d=b;e<d;)
if((s=SA[--c]-depth)>-1&&ISA[s]===v){
SA[--d]=s;
r=ISA[s+depth];
if(lastRank!==r)lastRank=r,newRank=d;
ISA[s]=newRank
}
}
function trIntroSort(SA,isa,isad,first,last,budget){
var a,b,c=TR_STACKSIZE,p=0,t,v,x=0,next,incr=isad-isa,limit=trIlg(last-first),trlink=-1,ISd,ISA=SA.subarray(isa),I=new Int32Array(c*5),F=I.subarray(c),L=F.subarray(c),M=L.subarray(c),N=M.subarray(c);
for(;;){
if(limit<0){
if(limit>-2){
trPartition(SA,SA.subarray(isad-incr),first,first,last,last-1);
a=SA.f,b=SA.l;
if(a<last)
for(c=first,v=a-1;c<a;)ISA[SA[c++]]=v;
if(b<last)
for(c=a,v=b-1;c<b;)ISA[SA[c++]]=v;
if(b-a>1)
F[p]=a,L[p]=b,I[p]=M[p]=N[p++]=0,
I[p]=isad-incr,F[p]=first,L[p]=last,M[p]=-2,N[p++]=trlink,
trlink=p-2;
if(a-first<=last-b)
if(a-first>1)
I[p]=isad,F[p]=b,L[p]=last,M[p]=trIlg(last-b),N[p++]=trlink,
last=a,limit=trIlg(a-first);
else if(last-b>1)
first=b,limit=trIlg(last-b);
else{if(!p)return;
isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]}
else if(last-b>1)
I[p]=isad,F[p]=first,L[p]=a,M[p]=trIlg(a-first),N[p++]=trlink,
first=b,limit=trIlg(last-b);
else if(a-first>1)
last=a,limit=trIlg(a-first);
else{if(!p)return;
isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]
}
}else if(limit===-2){
a=F[--p],b=L[p];
if(!M[p])trCopy(SA,ISA,first,a,b,last,isad-isa);
else{
if(trlink>-1)M[trlink]=-1;
trPartialCopy(SA,ISA,first,a,b,last,isad-isa)
}
if(!p)return;
isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]
}else{
if(SA[first]>-1){
for(a=first;ISA[SA[a]]=a,++a<last&&SA[a]>-1;);
first=a
}
if(first<last){
for(a=first;SA[a]=~SA[a],SA[++a]<0;);
next=ISA[SA[a]]!==SA[isad+SA[a]]?trIlg(a-first+1):-1;
if(++a<last)
for(b=first,v=a-1;b<a;)ISA[SA[b++]]=v;
if(budget.check(a-first))
if(a-first<=last-a)
I[p]=isad,L[p]=last,F[p]=last=a,M[p]=-3,N[p++]=trlink,
isad+=incr,limit=next;
else if(last-a>1)
I[p]=isad+incr,F[p]=first,L[p]=first=a,M[p]=next,N[p++]=trlink,
limit=-3;
else isad+=incr,last=a,limit=next;
else{
if(trlink>-1)M[trlink]=-1;
if(last-a>1)first=a,limit=-3;
else{if(!p)return;
isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]}
}
}else{if(!p)return;
isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]}
}continue
}
ISd=SA.subarray(isad);
if(last-first<9){
trInsertionSort(SA,ISd,first,last);
limit=-3;continue
}
if(!limit--){
trHeapSort(SA,ISd,SA.subarray(first),last-first);
for(a=last-1;first<a;a=b)
for(b=a-1,x=ISd[SA[a]];first<=b&&ISd[SA[b]]===x;)
SA[b]=~SA[b--];
limit=-3;continue
}
t=SA[first],SA[first]=SA[a=trPivot(SA,ISd,first,last)],SA[a]=t;
trPartition(SA,ISd,first,first+1,last,v=ISd[SA[first]]);
a=SA.f,b=SA.l;
if(last-first!==b-a){
next=ISA[SA[a]]!==v?trIlg(b-a):-1;
for(v=a-1,c=first;c<a;)ISA[SA[c++]]=v;
if(b<last)
for(v=b-1,c=a;c<b;)ISA[SA[c++]]=v;
if(b-a>1&&budget.check(b-a)){
if(a-first<=last-b)
if(last-b<=b-a)
if(a-first>1)
I[p]=isad+incr,F[p]=a,L[p]=b,M[p]=next,N[p++]=N[p]=trlink,
I[p]=isad,F[p]=b,L[p]=last,M[p++]=limit,last=a;
else if(last-b>1)
I[p]=isad+incr,F[p]=a,L[p]=first=b,M[p]=next,N[p++]=trlink;
else isad+=incr,first=a,last=b,limit=next;
else if(a-first<=b-a)
if(a-first>1)
I[p]=isad,F[p]=b,L[p]=last,M[p]=limit,N[p++]=N[p]=trlink,
I[p]=isad+incr,F[p]=last=a,L[p]=b,M[p++]=next;
else
I[p]=isad,F[p]=b,L[p]=last,M[p]=limit,N[p++]=trlink,
isad+=incr,first=a,last=b,limit=next;
else
I[p]=isad,F[p]=b,L[p]=last,M[p]=limit,N[p++]=N[p]=trlink,
I[p]=isad,F[p]=first,L[p]=a,M[p++]=limit,
isad+=incr,first=a,last=b,limit=next;
else if(a-first<=b-a)
if(last-b>1)
I[p]=isad+incr,F[p]=a,L[p]=b,M[p]=next,N[p++]=N[p]=trlink,
I[p]=isad,F[p]=first,L[p]=a,M[p++]=limit,first=b;
else if(a-first>1)
I[p]=isad+incr,F[p]=last=a,L[p]=b,M[p]=next,N[p++]=trlink;
else isad+=incr,first=a,last=b,limit=next;
else if(last-b<=b-a)
if(last-b>1)
I[p]=isad,F[p]=first,L[p]=a,M[p]=limit,N[p++]=N[p]=trlink,
I[p]=isad+incr,F[p]=a,L[p]=first=b,M[p++]=next;
else I[p]=isad,F[p]=first,L[p]=a,M[p]=limit,N[p++]=trlink,
isad+=incr,first=a,last=b,limit=next;
else I[p]=isad,F[p]=first,L[p]=a,M[p]=limit,N[p++]=N[p]=trlink,
I[p]=isad,L[p]=last,F[p]=last=b,M[p++]=limit,
isad+=incr,first=a,limit=next
}else{
if(b-a>1&&trlink>-1)M[trlink]=-1;
if(a-first<=last-b)
if(a-first>1)
I[p]=isad,F[p]=b,L[p]=last,M[p]=limit,N[p++]=trlink,
last=a;
else if(last-b>1)first=b;
else{if(!p)return;
isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]}
else if(last-b>1)
I[p]=isad,F[p]=first,L[p]=a,M[p]=limit,N[p++]=trlink,
first=b;
else if(a-first>1)last=a;
else{if(!p)return;
isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]}
}
}else if(budget.check(last-first))
limit=trIlg(last-first),
isad+=incr;
else{
if(trlink>-1)M[trlink]=-1;
if(!p)return;
isad=I[--p],first=F[p],last=L[p],limit=M[p],trlink=N[p]
}
}
}
function trSort(SA,isa,n,depth){
var budget=new TRBudget(trIlg(n)*2/3|0,n),isad=isa+depth,first,skip,unsorted,t,last;
for(;SA[first=skip=unsorted=0]>-n;isad+=isad-isa){
do if((t=SA[first])<0)first-=t,skip+=t;
else{
if(skip)SA[first+skip]=skip,skip=0;
last=SA[isa+t]+1;
if(last-first>1)
budget.count=0,
trIntroSort(SA,isa,isad,first,last,budget),
budget.count?unsorted+=budget.count:skip=first-last;
else if(last-first===1)skip=-1;
first=last
}
while(first<n);
if(skip)SA[first+skip]=skip;
if(!unsorted)break
}
}
function sortTypeBstar(B,SA,CA,CB,n){
for(var i=n-1,j=0,m=n,t,b=B[i],c,p;~i;){
while(CA[c=b]++,i--&&(b=B[i])>=c);
if(i<0)break;SA[--m]=i;
CB[b<<8|c]++;
for(c=b;i--&&(b=B[i])<=c;c=b)
CB[c<<8|b]++
}
for(i=b=0,m=n-m;b<256;b++){
t=i+CA[b];CA[b]=i+j;
p=b<<8;i=t+CB[p+b];
for(c=b;++c<256;i+=CB[c<<8|b])
CB[p+c]=j+=CB[p+c]
}
if(m>0){p=SA.subarray(n-m);
for(i=m-1;i;)SA[--CB[B[t=p[--i]]<<8|B[++t]]]=i;
b=B[t=p[m-1]];c=B[++t];
SA[--CB[b<<8|c]]=m-1;
j=m;t=n-m-m;
for(b=254;j;b--)
for(c=255;c>b;j=i)
i=CB[b<<8|c--],
j-i>1&&ssSort(B,SA,p,i,j,m,t,2,n,SA[i]===m-1);
for(i=m;i;SA[m+SA[i]]=j){
if(SA[--i]>-1){
for(j=i;(SA[m+SA[i]]=i--)&&SA[i]>-1;);
SA[i+1]=i-j;
if(i<1)break}
for(j=i;SA[m+(SA[i]=~SA[i])]=j,SA[--i]<0;);
}
trSort(SA,m,p=j=m,1);CB[65535]=n;
for(b=B[i=n-1];~i;SA[SA[m+--j]]=!t||t-i>1?t:~t){
for(c=b;i--&&(b=B[i])>=c;)c=b;
if(i<0)break;t=i;
for(c=b;i--&&(b=B[i])<=c;)c=b
}
for(b=255;b;t=b<<8|b,CB[t+1]=i-CB[t]+1,CB[t]=i)
for(i=CA[b--]-1,c=255;c>b;)
for(t=i-CB[n=c<<8|b],CB[n]=i,i=t,j=CB[b<<8|c--];j<p;)
SA[i--]=SA[--p]
}return m
}
// Constructs the suffix array by using the sorted order of type B* suffixes
function constructSA(B,SA,CA,CB,n,m){
var b,c=255,d,i,j,k,p,s;
if(m>0)for(;c;)
for(j=CA[c],i=CB[c+(p=--c<<8)],k=0,d=-1;i<j;){
s=SA[--j];SA[j]=~s;
if(s>0){b=B[--s];
if(s>0&&B[s-1]>b)s=~s;
if(b^d){
if(d>-1)CB[p+d]=k;
d=b;k=CB[p+d]}
SA[k--]=s}}
k=CA[d=B[n-1]];
SA[k++]=B[n-2]<d?~n+1:n-1;
for(i=0;i<n;i++){
s=SA[i];
if(s<1){SA[i]=~s;continue}
b=B[--s];
if(!s||B[s-1]<b)s=~s;
if(b^d)CA[d]=k,k=CA[d=b];
SA[k++]=s
}
}
// Constructs the burrows-wheeler transformed string directly by using the sorted order of type B* suffixes
function constructBWT(T,SA,CA,CB,n,m){
var i,j,k,s,b,c,d;
if(0<m)for(c=255;c;)
for(j=CA[c],i=CB[c|--c<<8],k=0,d=-1;i<j;)
if(0<(s=SA[--j])){
b=T[--s];SA[j]=~b;
if(0<s&&T[s-1]>b)s=~s;
if(b^d){
if(0<=d)CB[c<<8|d]=k;
k=CB[c<<8|(d=b)]
}SA[k--]=s
}else if(s)SA[j]=~s;
k=CA[d=T[n-1]];
SA[k++]=T[n-2]<d?~T[n-2]:n-1;
for(i=0;i<n;++i)
if(0<(s=SA[i])){
b=T[--s];SA[i]=b;
if(0<s&&T[s-1]<b)s=~T[s-1];
if(b^d)CA[d]=k,k=CA[d=b];
SA[k++]=s
}else s?SA[i]=~s:c=i;
return c
}
/* computeSA
@A: Array/Uint8Array. suffix array構築元となる配列
@return: suffix array
*/
function computeSA(A){
let n=A.length,SA=new Int32Array(n);
if(n<3){
if(n==1)SA[0]=0;
if(n>1)n=A[0]<A[1],SA[n^1]=0,SA[+n]=1;return SA
}
let CA=new Int32Array(256),CB=new Int32Array(65536);
constructSA(A.buffer?A:A=Uint8Array.from(A),SA,CA,CB,n,sortTypeBstar(A,SA,CA,CB,n));
return SA
}
/* divBWTe
@T: Array/Uint8Array. BWT構築元となる配列
@U: BWT格納配列
@return:
@Uを省略した場合, BWT配列にprimary indexを格納して返却
しなかった場合はprimary indexを返却
*/
function divBWTe(T,U){
let n=T.length,b=n<257?1:n<65537?2:n<16777217?3:4,i=n,SA=new Int32Array(n);
if(n<2){SA=U||SA;if(n)SA[0]=T[0];return U?n:(SA[1]=1,SA)}
let CA=new Int32Array(256),CB=new Int32Array(65536),p=constructBWT(T.buffer?T:T=Uint8Array.from(T),SA,CA,CB,n,sortTypeBstar(T,SA,CA,CB,n));
for(CA=U||new Uint8Array(n+b);i>p;)CA[--i]=SA[i]
for(p++;i;)CA[i]=SA[--i];CA[i]=T[n-1];
if(U)return p;
for(;b--;p>>=8)CA[n++]=p&255;
return CA
}
使用例
var A=Uint8Array.from("That that is is that that is not is not is that it it is",a=>a.charCodeAt())
var SA=computeSA(A);
var BWT=Uint8Array(A.lnegth), p=divBWTe(A,BWT);
var bwt=divBWTe(A); //primary index格納版
性能
高速ではあるが、やはり配列の入れ子参照が多過ぎるせいか、速度が犠牲になっている印象。SAISを使った方が良いかも…