PPM star編第3話。圧縮力を高めるためにPPMd等で使われている技術を組み込んでいきます。遺伝法、SEE、そして新たに二次記号推定(Secondary Symbol Estimation, SSE)の3つです。
function Suffix(){this.root=this.point={size:0,cs:-1,sum:0,o:0}}
function update(sa,s){
var a,b,e,m,n,r,t,
f=sa.fit,p=sa.max,q=sa.med,l=sa.min,
o=l,u=f?l.sum+l.cs:0,v=f?f.hit:0,
c={size:p.size+1, cs:-1, sum:0, o:0};
for(;p!==q;p=p.dad){
m=p.sum+1,n=2*v*(6+m),m+=u;
// inheritance
n=n>m*6?3+(n>m*9)+(n>m*12)+(n>m*15):(p.sum++,n>m)+(n>m*3);
t={char:s, hit:n, son:c};
p.sum+=n;p.cs++;
if(e=p.next)
if(e.hit>n)t.next=e.next,e.next=t;
else p.next=t,t.next=e;
else p.next=t
}
if(p){
s:for(n=f,m=l;p!==l;p=p.dad){
t=p.next;
do if(e=t,e.char===s){
m=p;n=e;e.hit>v&&(v=e.hit,o=p);break s
}while(t=e.next);
// inheritance
b=p.sum+1,a=2*v*(6+b),b+=u;
a=a>b*6?3+(a>b*9)+(a>b*12)+(a>b*15):(p.sum++,a>b)+(a>b*3);
e.next={char:s, hit:a, son:c};
p.sum+=a;p.cs++
}
e=n,q=e.son;
if(~p.size+q.size){
q.dad=c.dad=u=r={dad:q.dad, size:p.size+1, cs:q.cs, o:q.o},b=0;
for(t=q;t=t.next;b+=a)
u=u.next={char:t.char, hit:a=t.hit>>1, son:t.son};
for(r.sum=b;e.son=r,p=p.dad;){
for(e=p.next;e.char^s;)e=e.next;
e.hit>v&&(v=e.hit,o=p);
if(e.son!==q)break
}
}else c.dad=q;
if(m!==l)
for(p=m,e=n;;e.hit>v&&(v=e.hit,o=p)){
if(e.hit>254)// rescale
for(t=p.next,p.sum=0;p.sum+=t.hit>>=1,t=t.next;);
t=p.next;p.sum++;
if(++e.hit>t.hit)
u=e.hit, e.hit=t.hit, t.hit=u,
u=e.char, e.char=t.char, t.char=u,
u=e.son, e.son=t.son, t.son=u;
if((p=p.dad)===l)break;
for(e=p.next;e.char^s;)e=e.next
}
if(f.hit>253)// rescale
for(t=l.next,l.sum=0;l.sum+=t.hit>>=1,t=t.next;);
t=l.next;l.sum+=2;o.o++;
if((f.hit+=2)>t.hit)
u=f.hit, f.hit=t.hit, t.hit=u,
u=f.char, f.char=t.char, t.char=u,
u=f.son, f.son=t.son, t.son=u
}else(c.dad=sa.root).o++;
sa.point=c
}
// initialize SEE and SSE model
function init(D,M,SEE,SSE){
for(var a,f,o=128,p;o--;D[o]=1024/(o+40)|0)
for(p=SSE[o]=[],f=1<<24|o<<25,a=16;a;)p[--a]=f;
for(;++o<16;)
for(p=32;p;)SEE[o<<5|--p]={b:3,c:7,s:8*o+5<<3};
for(;a<256;)M[a]=Math.sqrt(a++)
}
// update SEE
function upSEE(S){if(!--S.c){
var b=S.b,i=S.s>>b;
i=7-(i>40)-(i>280)-(i>1020);
if(i<b)S.s>>=1,S.b--;
else if(i>b)S.s<<=1,S.b++;
S.c=5<<S.b}
}
/* compressor
@A :input(Array / Uint8Array)
@done: call back of last process.
done(A,a,z)
@A: compressed array of input
@a: last position
@z: compressed size
@rate: call back of progress
rate(a,z)
@a: current position
@z: last position
@return:
call with await: decompressed array of @A
without await: Promise object
*/
async function PPMe(A,done,rate){
function enc(c,f,t){
R=R/t>>>0;L+=R*c;
for(R*=f;R<16777216;R*=256,L=L<<8>>>0)
if((c=L>0xffffffff)||255>L>>>24)
for(O[o++]=255&c--+B,B=L>>>24;C;C--)O[o++]=255&c;
else++C
}
var a=0,b,d=0,o=-1,z=A.length,
L=0,R=o>>>0,B=0,C=0,
O=[],M=[],SEE=[],SSE=[],D=[],E=[],X={b:0,c:0,s:0},sa=new Suffix,st=+new Date;
const pass=typeof rate=="function"?8191:-1,fn=b=>wait0(c=>b(rate(a,z)));
for(init(D,M,SEE,SSE);;a&pass||Date.now()-st<200||await new Promise(fn,st=+new Date)){
let s=A[a++], p=sa.max=sa.point, x=E.length=0;
for(sa.med=sa.min=sa.fit=void 0;p=p.dad;){
let e,f,h,r,l,q=p,m=p,t=0,S;
// deside order by Shortest Deterministic Context and Local Order Estimation
for(;(q=q.dad)&&(q.size*2>p.size?~p.next.hit*(~q.cs*2-q.sum)<=~q.next.hit*(~p.cs*2-p.sum):q.cs===p.cs);)p=q;
q=p.next;
if(x){
for(e=0;q;q=q.next)if(!E[r=q.char])
E[r]=q, f=q.hit+1,
s===r&&(h=t,l=f), t+=f, e++
if(e){
x+=e;f=p.cs;
if(255>f)
r=p.dad?2*(2*f<p.dad.cs+x-e)+4*(p.size<p.dad.size*2):0,
S=SEE[M[f]<<5|(p.sum>++f*8)+8*(2*e<p.next.hit)+d*16+r],
e=S.s>>S.b, 2*f<++e&&(e=f+1);
else S=X,e=1;
if(q=E[s]){ // hit!
enc(h,l,t+e);d=1;
e>2&&(S.s-=e);upSEE(S);
sa.min=p;sa.med=m;sa.fit=q;
break
}
S.s+=t;d=0;
enc(t,e,t+e) // escape
}
}else{
let c=q.char, v=q.hit+1;
for(;q;q=q.next)
E[r=q.char]=q, f=q.hit+1,
s===r&&(h=t), t+=f;
t+=x=e=p.cs+1;
S=SSE[4096*v/t>>5];
r=p.dad?(p.dad.next.char===c)<<1|(p.o*2<p.sum+p.cs)<<2|b<<3|p.size<p.dad.size*2:0;
f=S[r]>>>9;
b=s===c;S[r]+=((b<<23)-f)*D[r=S[r]&127]&-128|r<127;
f=f>>11||1;q=E[s];
if(b){ // hit most probable symbol
enc(0,f,4096);
sa.min=p;sa.med=m;sa.fit=q;
break
}
enc(f,4096-f,4096);
if(e>1){t-=v;
if(q){
enc(h-v,q.hit+1,t); // hit
sa.min=p;sa.med=m;sa.fit=q;
break
}
enc(t-e,e,t) // escape
}
}
}
if(!p){ // order -1
for(p=q=s<256?s:s=256;p;)E[--p]&&q--;
enc(q,1,257-x);
if(s>255){
for(a=5;a--;L=L<<8>>>0)
if((p=L>0xffffffff)||255>L>>>24)
for(O[o++]=255&p--+B,B=L>>>24;C;C--)O[o++]=255&p;
else++C;
done(O,z,o);return O
}
}update(sa,s)
}
}
/* decompressor
@A :input(Array / Uint8Array)
@done: call back of last process.
done(A,a,z)
@A: decompressed array of input
@a: last position
@z: decompressed size
@rate: call back of progress
rate(a,z)
@a: current position
@z: last position
@return:
call with await: decompressed array of @A
without await: Promise object
*/
async function PPMd(A,done,rate){
function dec(c,f){L-=c*R;
for(R*=f;R<16777216;R*=256)L=(L<<8|A[a++])>>>0
}
var a=0,b,d=0,o=0,z=A.length,
L=0,R=-1>>>0,st=+new Date;
const C=[],E=[],D=[],M=[],O=[],SEE=[],SSE=[],X={b:0,c:0,s:0},sa=new Suffix,
pass=typeof rate=="function"?8191:-1,fn=b=>wait0(c=>b(rate(a,z)));
for(init(D,M,SEE,SSE);a<4;)L=(L<<8|A[a++])>>>0;
for(;;o&pass||Date.now()-st<200||await new Promise(fn,st=+new Date)){
let p=sa.max=sa.point, x=E.length=0, s;
sa.med=sa.min=sa.fit=s;
do if(p=p.dad){
let m=p,q=p,n=0,t=0,c,e,f,r,v,S;
for(s=-1;(q=q.dad)&&(q.size*2>p.size?~p.next.hit*(~q.cs*2-q.sum)<=~q.next.hit*(~p.cs*2-p.sum):q.cs===p.cs);)p=q;
q=p.next;
if(x){
for(;q;q=q.next)if(!E[r=q.char])
E[r]=C[n++]=q, t+=q.hit+1;
if(!t)continue;f=p.cs;x+=n;
if(255>f)
r=p.dad?2*(2*f<p.dad.cs+x-n)+4*(p.size<p.dad.size*2):0,
S=SEE[M[f]<<5|(p.sum>++f*8)+8*(2*n<p.next.hit)+d*16+r],
e=S.s>>S.b, 2*f<++e&&(e=f+1);
else S=X,e=1;
r=L/(R=R/(t+e)>>>0)|0;
if(t>r){
for(v=t=0;v<n;){
q=C[v++], s=q.char, f=q.hit+1;
if(r<t+f){
e>2&&(S.s-=e), upSEE(S), d=1;
dec(t,f), sa.min=p, sa.med=m, sa.fit=q
break
}t+=f
}
}else dec(t,e), S.s+=t, d=0
}else{
for(c=q.char, v=q.hit+1;q;q=q.next)
E[q.char]=q, t+=q.hit+1;
t+=x=e=p.cs+1;
S=SSE[4096*v/t>>5];
q=p.dad?(p.dad.next.char===c)<<1|(p.o*2<p.sum+p.cs)<<2|b<<3|p.size<p.dad.size*2:0;
f=S[q]>>>9, r=f>>11||1;
b=L/(R>>>=12)>>>0<r;
S[q]+=((b<<23)-f)*D[q=S[q]&127]&-128|q<127;
if(b)dec(0,r), sa.min=p, sa.med=m, sa.fit=E[s=c]; // hit most probable symbol
else{
dec(r,4096-r);
if(e>1){
t-=v, r=L/(R=R/t>>>0)|0;
if(t-e>r){
for(q=p.next, t=0;q;q=q.next)if(s=q.char,c^s){
f=q.hit+1;
if(r<t+f){
dec(t,f), sa.min=p, sa.med=m, sa.fit=q;
break
}t+=f
}
}else dec(t-e,e) //escape
}
}
}
}else{// order -1
p=s=-1;
for(x in E)p++;
for(p=L/(R=R/(256-p)>>>0)>>>0, x=s;E[++s]||p>++x;);
if(s>255)return done(O,z,o),O;
dec(x,1)
}
while(s<0);update(sa,O[o++]=s)
}
}
使用例
(async()=>{
let A=Array.from("That that is is that that is not is not is that it it is",a=>a.charCodeAt());
let rate=(a,b)=>{console.log((a/b*1e4|0)/100,"%")}, done=(a,b,c)=>{console.log(b,"->",c,a)};
let e=await PPMSenc(A,done,rate), d=await PPMSdec(e,done,rate);
console.log(e.length, String.fromCharCode(...d))
})()
file爆縮実験
See the Pen PPM* compressor(3) by xezz (@xezz) on CodePen.