圧縮業界で高い圧縮率を誇るPAQ系列の軽量版LPAQ5の紹介。文脈混合法により圧縮しまくっていきます。
やぶからぼうに実装
本家の方はmacro乱舞でやばいです。性能的にはLPAQ1より少し良いかもしれません。
{const TOLIMIT1=159,TOLIMIT2=175,SQUARD=2,calcprevfail=new Uint16Array(256),SQ=new Uint16Array(4096),
sqInit=d=>{
if(d>2046)return 4095;
if(d<-2046)return 0;
return 0|4096/(1+Math.exp(-d/256))
},
//Stretch
S1=new Int16Array(4096),S2=new Int32Array(4096),D=new Int16Array(1024).map((a,b)=>16384/(b+b+3)),calcfails=new Uint8Array(8192),ST=new Uint8Array([1,3,4,7,8,9,11,15,16,17,18,20,21,22,26,31,32,32,32,32,34,34,34,34,34,34,36,36,36,36,38,41,42,42,44,44,46,46,48,48,50,53,54,54,56,56,58,58,60,60,62,62,50,67,68,68,70,70,72,72,74,74,76,76,62,62,64,83,84,84,86,86,44,44,58,58,60,60,76,76,78,78,80,93,94,94,96,96,48,48,88,88,80,103,104,104,106,106,62,62,88,88,80,113,114,114,116,116,62,62,88,88,90,123,124,124,126,126,62,62,98,98,90,133,134,134,136,136,62,62,98,98,90,143,144,144,68,68,62,62,98,98,100,149,150,150,108,108,100,153,154,108,100,157,158,108,100,161,162,108,110,165,166,118,110,169,170,118,110,173,174,118,110,177,178,118,110,181,182,118,120,185,186,128,120,189,190,128,120,193,194,128,120,197,198,128,120,201,202,128,120,205,206,128,120,209,210,128,130,213,214,138,130,217,218,138,130,221,222,138,130,225,226,138,130,229,230,138,130,233,234,138,130,237,238,138,130,241,242,138,130,245,246,138,140,249,250,80,140,253,254,80,140,253,254,80,2,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,104,106,108,110,112,114,116,118,120,122,124,126,128,130,132,134,136,138,140,142,144,146,148,150,152,154,156,158,160,162,164,166,168,170,172,174,176,178,180,182,184,186,188,190,192,194,196,198,200,202,204,206,208,210,212,214,216,218,220,222,224,226,228,230,232,234,236,238,240,242,244,246,248,250,252,254,128,130,132,134,136,138,140,142,144,146,148,150,152,154,156,158,160,162,164,166,168,170,172,174,176,178,180,182,184,186,188,190,192,194,196,198,200,202,204,206,208,210,212,214,216,218,220,222,224,226,228,230,232,234,236,238,240,242,244,246,248,250,252,190,192,130,132,134,136,138,140,142,144,146,148,150,152,154,156,158,160,162,164,166,168,170,172,174,176,178,180,182,184,186,188,190,192,194,196,198,200,202,204,206,208,210,212,214,216,218,220,222,224,226,228,230,232,234,236,238,240,242,244,246,248,250,252,254,1,16,17,18,38,40,42,70,46,63,95,195,159,191,223,240,2,32,33,34,39,41,43,45,47,79,111,143,175,207,225,241,3,48,49,50,36,52,53,54,55,53,57,58,59,60,61,62,19,35,36,66,67,64,54,70,71,72,73,74,75,76,77,78,68,69,37,82,83,84,80,81,87,88,89,6,91,92,93,94,85,86,4,98,99,100,101,96,97,104,105,106,107,108,109,110,102,103,20,114,115,116,117,118,112,113,6,122,123,124,125,126,119,120,5,130,131,70,133,134,135,128,97,138,139,140,141,157,136,152,21,146,147,148,149,150,151,152,97,145,155,156,157,158,168,154,6,162,163,164,165,166,167,168,149,160,161,172,173,174,170,171,22,178,179,180,181,182,183,199,185,186,176,177,189,190,187,188,7,194,195,196,197,198,199,200,201,202,195,192,226,206,204,220,23,210,211,212,213,214,149,216,217,233,219,220,208,209,221,222,8,226,227,228,229,245,231,232,233,234,235,236,226,224,238,239,24,242,243,244,245,246,247,248,249,250,251,14,253,254,255,242,9,25,10,26,11,27,12,28,13,29,14,30,15,31,2,5,6,10,12,13,14,19,23,24,25,27,28,29,30,33,35,35,35,35,37,37,37,37,37,37,39,39,39,39,40,43,45,45,47,47,49,49,51,51,52,43,57,57,59,59,61,61,63,63,65,65,66,55,57,57,73,73,75,75,77,77,79,79,81,81,82,69,71,71,73,73,59,59,61,61,49,49,89,89,91,91,92,69,87,87,45,45,99,99,101,101,102,69,87,87,57,57,109,109,111,111,112,85,87,87,57,57,119,119,121,121,122,85,97,97,57,57,129,129,131,131,132,85,97,97,57,57,139,139,141,141,142,95,97,97,57,57,81,81,147,147,148,95,107,107,151,151,152,95,107,155,156,95,107,159,160,105,107,163,164,105,117,167,168,105,117,171,172,105,117,175,176,105,117,179,180,115,117,183,184,115,127,187,188,115,127,191,192,115,127,195,196,115,127,199,200,115,127,203,204,115,127,207,208,125,127,211,212,125,137,215,216,125,137,219,220,125,137,223,224,125,137,227,228,125,137,231,232,125,137,235,236,125,137,239,240,125,137,243,244,135,137,247,248,135,69,251,252,135,69,255,252,135,69,255,3,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,129,131,133,135,137,139,141,143,145,147,149,151,153,155,157,159,161,163,165,167,169,171,173,175,177,179,181,183,185,187,189,191,193,195,197,199,201,203,205,207,209,211,213,215,217,219,221,223,225,227,229,231,233,235,237,239,241,243,245,247,249,251,253,255,129,131,133,135,137,139,141,143,145,147,149,151,153,155,157,159,161,163,165,167,169,171,173,175,177,179,181,183,185,187,189,191,193,195,197,199,201,203,205,207,209,211,213,215,217,219,221,223,225,227,229,231,233,235,237,239,241,243,245,247,249,251,253,191,193,131,133,135,137,139,141,143,145,147,149,151,153,155,157,159,161,163,165,167,169,171,173,175,177,179,181,183,185,187,189,191,193,195,197,199,201,203,205,207,209,211,213,215,217,219,221,223,225,227,229,231,233,235,237,239,241,243,245,247,249,251,253,255,51,66,52,4,40,116,97,75,47,79,141,188,250,238,137,212,37,67,38,85,130,102,89,61,9,110,172,219,207,240,182,227,82,70,100,130,41,84,55,21,131,117,103,104,90,76,62,63,80,115,21,65,99,70,41,146,132,118,112,105,91,77,78,94,80,56,69,114,85,71,6,147,133,119,113,106,92,93,109,125,42,162,5,100,81,57,22,148,134,120,121,107,108,124,140,156,163,149,115,86,72,43,178,164,135,128,122,123,139,155,161,176,150,136,101,87,58,7,179,165,151,129,138,145,160,171,187,203,137,144,96,73,44,194,180,166,152,153,154,170,186,202,218,234,169,185,88,59,23,195,181,167,168,184,200,201,217,233,249,13,216,232,74,45,210,196,182,183,199,215,231,247,248,28,191,209,12,175,60,8,211,197,198,214,230,246,27,159,190,206,222,253,208,237,46,226,212,213,229,245,11,143,174,193,221,252,30,255,225,31,24,227,228,244,26,127,158,189,205,236,14,239,241,152,152,182,242,243,10,111,142,173,192,220,251,223,254,237,167,197,212,242,25,95,126,157,177,204,235,29,224,15,252,167,197,227]),
//StateMap
StateMap=(t=512)=>{t=new Uint32Array(t).fill(1<<31);t.a=t.c=0;return t},
SMp=(t,cx,y22)=>{
var p0=t[t.c],i=p0&1023,pr=p0>>>10;
t[t.c]=p0+=(y22-pr>>3)*D[i]&-1024|i<TOLIMIT1;
return t[t.c=t.a+cx]>>>20
},
//APM
APM=n=>{
for(var t=new Uint32Array(n),i=t.c=0,j=0;i<24;)t[i]=(sqInit(((2*i+++1<<12)/48|0)-2048))<<20|8;
for(;i<n;)t[i++]=t[j++];return t
},
APMp=(t,p,cx,y22)=>{
var p0=t[t.c],i=p0&1023,pr=p0>>>10;
t[t.c]=p0+=(y22-pr>>3)*D[i]+512&-1024|i<TOLIMIT2;
p0=p&4095;cx=cx*24+(p>>12);
t.c=cx+(p0>>11);
return(t[cx]>>>13)*(4096-p0)+(t[cx+1]>>>13)*p0>>>19
},
//Mixer
MI=8,MC=1280,
//hash
imul=Math.imul||function(a,b,c){c=65535&b;return((a>>>16)*c+(a&=65535)*(b>>>16)<<16)+a*c>>>0},
hash1=i=>imul((i=imul(i,234567891))<<21|i>>>11,765432197),
hash2=i=>imul((i=imul(i,234567891))<<19|i>>>13,654321893),
hash3=i=>imul((i=imul(i,234567891))<<21|i>>>11,543210973),
hash4=i=>imul((i=imul(i,234567891))<<19|i>>>13,432109879),
hash5=i=>imul((i=imul(i,234567891))<<21|i>>>11,987654323),
hash6=i=>imul((i=imul(i,345678941))<<19|i>>>13,876543211),
hash7=i=>imul((i=imul(i,345678941))<<21|i>>>11,765432197),
hash8=i=>imul((i=imul(i,345678941))<<19|i>>>13,654321893),
hash9=i=>imul((i=imul(i,345678941))<<21|i>>>11,543210973),
hash0=i=>imul((i=imul(i,345678941))<<19|i>>>13,432109879),
get=(t,p,n,i)=>{
var B=16,q,r;p+=i*B&n;i>>>=24;
if(t[p-1]===i)return p;q=p^B;
if(t[q-1]===i)return q;r=p^B*2;
if(t[r-1]===i)return r;
if(t[p]>t[q])p=q;
if(t[p]>t[r])p=r;
for(t[p-1]=i,i=p;--B;)t[p++]=0;return i
},
//MatchModel
MAXLEN=62,L2C=new Uint32Array(MAXLEN*2+1),L2O=new Uint32Array(MAXLEN+1);
class MatchModel{
constructor(n){
this.sm=StateMap(55<<8);
this.m=this.l=this.pos=this.h1=0;
this.B=new Uint8Array(n>>=1);this.N=n-1;
this.H=new Int32Array(n>>=2);this.HN=n-1
}
upd(c,cc,c8,c4){
var{l,m,h1,N,HN,H,B,pos}=this,h2=hash2(c4*73-c8*101+cc*421)&HN,h3=hash1(c4*59+c8*59999)&HN;
B[pos++]=c;this.pos=pos&=N;
this.h1=h1=h1*6+c+1&HN;
if(l>2)m=m+1&N,l+=l<MAXLEN;
else{
if(pos>=MAXLEN){
var p1=pos-1,p;
l=1;m=H[h1];
if(m^pos)for(p=p1;l<=MAXLEN&&B[m-l&N]===B[p];--p)++l;
if(l<3){
l=1;m=H[h2];
if(m^pos)for(p=p1;l<=MAXLEN&&B[m-l&N]===B[p];--p)++l
}
if(l<3){
l=1;m=H[h3];
if(m^pos)for(p=p1;l<=MAXLEN&&B[m-l&N]===B[p];--p)++l
}
}else{
l=1;m=H[h1];
if(m^pos)for(;l<=MAXLEN&&B[m-l&N]===B[pos-l&N];)++l;
if(l<3){
l=1;m=H[h2];
if(m^pos)for(;l<=MAXLEN&&B[m-l&N]===B[pos-l&N];)++l
}
if(l<3){
l=1;m=H[h3];
if(m^pos)for(;l<=MAXLEN&&B[m-l&N]===B[pos-l&N];)++l
}
}--l
}
this.l=l;this.bm=B[this.m=m]+256;
H[h1]=H[h2]=H[h3]=pos
}
p(bc,c0,c1,tx,y22){
if(this.l){
var b=this.bm;
if(b>>8-bc===c0)
b=b>>7-bc&1,c0=L2C[this.l*2-b]+c1;
else this.l=0
}
tx[0]=S1[SMp(this.sm,c0,y22)];
return this.l
}}
//Predictor encoder/decoder
class Coder{
constructor(i){
var j=6,e=this.t1=65537,v=2047,x=~v;
this.t2=e+1+i/2|0;this.t3=e+2+i;
this.p=this.mp=2048;this.c0=1;
this.t0c1=this.pw=this.c1=this.c4=this.c8=this.cc=this.prevfail=this.fails=this.a2o=this.mc=0;this.shift=14;
this.m1=this.m3=(i>>1)-16;this.m2=i-16;
this.tx=new Int32Array(MI);this.wx=new Int32Array(MI*MC).fill(1<<12);
this.cp=new Uint32Array(6);this.h=new Uint32Array(6);
this.t=new Uint8Array(e+2+i*2);
this.mm=new MatchModel(i);this.a1=APM(24<<16);this.a2=APM(24<<11);
for(this.sm=[];j--;)this.sm[j]=StateMap();
if(!SQ[0])for(;x<v;)
for(i=sqInit(++x),SQ[x+v]=i+SQUARD;j<i;S2[j]=(x+v)*23)S1[++j]=x;
if(!calcprevfail[1])for(i=0;i<256;calcprevfail[i++]=v){
v=(i&1)<<10;
if(i&6)v+=512;
if(i&248)v+=256
}
if(!calcfails[0])for(i=-4096;i<4096;calcfails[i+++4096]=v){
e=i,v=0;
if(e<0)e=-e;
if(e>1024)v=1;
if(e>2624)v=3
}
if(!L2C[1])for(i=2,e=MAXLEN*2;i<=e;i+=2)
v=i<32?i:(i>>3)*2+24,
L2C[i]=v*=256,
L2C[i-1]=v-256,
L2O[v=i>>1]=(5+(v>7)+(v>11)+(v>15)+(v>31))*MI
}
cpup(t,cp,y){
var p=ST.subarray(y*768);
t[cp[0]]=p[t[cp[0]]+256];
t[cp[1]]=p[t[cp[1]]];
t[cp[2]]=p[t[cp[2]]+256];
t[cp[3]]=p[t[cp[3]]+512];
t[cp[4]]=p[t[cp[4]]];
t[cp[5]]=p[t[cp[5]]+256];
return this.c0+=this.c0+y
}
upd0(y,m2,bc,sh){
this.a2o+=MI*10;
var{t,cp,h}=this,p=ST.subarray(y*768),j=y+1<<sh;
t[cp[1]]=p[t[cp[1]]];cp[1]+=j;
t[cp[2]]=p[t[cp[2]]+m2*256];cp[2]+=j;
t[cp[3]]=p[t[cp[3]]+512];cp[3]+=j;
t[cp[4]]=p[t[cp[4]]];cp[4]+=j;
t[cp[5]]=p[t[cp[5]]+256];cp[5]+=j;
t[cp[0]]=p[t[cp[0]]+256];
this.c0+=this.c0+y;this.p=this.up(y,bc,t,cp,h)
}
upd3(y){
var{t,cp,h,t1,t2,t3,m1,m2,m3,c4,c8}=this,c0=this.cpup(t,cp,y);
this.a2o+=MI*10;
cp[1]=get(t,t1,m1,hash6(c0-h[1]));
cp[2]=get(t,t1,m1,hash7(c0*23-(c4&0xffffff)*251));
cp[3]=get(t,t2,m2,hash8(c0-c4*19));
cp[4]=get(t,t3,m3,hash9(c0*31-c4*197+(c8&0xffff)*63331));
cp[5]=get(t,t2,m2,hash0(c0*37-h[5]));
this.p=this.up(y,4,t,cp,h)
}
upd7(y){
var{t,cp,h,t1,t2,t3,m1,m2,m3,cc,c1,c4,c8,fails}=this,c=this.cpup(t,cp,y)-256;
this.a2o=MI*80*((c>>6)+(c4>>4&12));
this.mm.upd(c,this.cc=cc<<8|c8>>>24,this.c8=c8=c8<<8|c4>>>24,this.c4=c4=c4<<8|c);
this.t0c1=(this.c1=c1>>4|c&240)<<8;
this.prevfail=calcprevfail[fails];this.fails=1;
h[0]=c<<8;
h[1]=(c4&0xffff)*8191;
cp[1]=get(t,t1,m1,hash1(h[1]));
cp[2]=get(t,t1,m1,hash2((c4&0xffffff)*251));
cp[3]=get(t,t2,m2,hash3(c4*127));
cp[4]=get(t,t3,m3,hash4(c4*197-(c8&0xffff)*63331));
if(c>64&&c<91)c+=32;
if(c>96&&c<123)h[5]=(h[5]+c)*382;
else this.pw=h[5]*241,h[5]=0;
cp[5]=get(t,t2,m2,hash5(h[5]-this.pw));
this.c0=1;this.p=this.up(y,0,t,cp,h)
}
up(y,bc,t,cp,h){
var{tx,wx,sm,c0,c1,t0c1,fails,prevfail}=this,pr=y*0xfff-this.mp,l=this.mc,z=y<<22,S=S1;
this.fails=fails=fails<<1&255|calcfails[pr+4096];
wx[l]+=tx[0]*pr+8192>>14;
wx[++l]+=tx[1]*pr+8192>>14;
wx[++l]+=tx[2]*pr+8192>>14;
wx[++l]+=tx[3]*pr+8192>>14;
wx[++l]+=tx[4]*pr+8192>>14;
wx[++l]+=tx[5]*pr+8192>>14;
wx[++l]+=tx[6]*pr+8192>>14;
wx[++l]+=pr+32>>6;
l=this.mm.p(bc,c0,c1,tx,z);l=l?L2O[l]:(4-!t[cp[1]]-!t[cp[2]]-!t[cp[3]]-!t[cp[4]])*MI;
pr=wx[this.mc=l+=this.a2o]*tx[0];
pr+=wx[++l]*(tx[1]=S[SMp(sm[1],t[cp[1]],z)]);
pr+=wx[++l]*(tx[2]=S[SMp(sm[2],t[cp[2]],z)]);
pr+=wx[++l]*(tx[3]=S[SMp(sm[3],t[cp[3]],z)]);
pr+=wx[++l]*(tx[4]=S[SMp(sm[4],t[cp[4]],z)]);
pr+=wx[++l]*(tx[5]=S[SMp(sm[5],t[cp[5]],z)]);
pr+=wx[++l]*(tx[6]=S[SMp(sm[0],t[cp[0]=t0c1+c0],z)]);
pr+=wx[++l]<<8;pr>>=this.shift;
if(pr<-2047)pr=-2047;
if(pr>2047)pr=2047;
this.mp=pr=SQ[pr+=2047]+3*APMp(this.a1,pr*23,h[0]+c0,z)>>2;
pr=pr*3+5*APMp(this.a2,S2[pr],fails+prevfail,z)+4>>3;
return pr+=pr<2048
}
ebits(part1,part2,c,y){
var sm=this.sm;
y=part1(7,c);this.upd0(y,0,1,0);part2();
sm[4].a=sm[5].a=256;
y=part1(6,c);this.upd0(y,0,2,1);part2();
sm[2].a=256;
y=part1(5,c);this.upd0(y,0,3,2);part2();
sm[1].a=256;
y=part1(4,c);this.upd3(y);part2();
y=part1(3,c);this.upd0(y,1,5,0);part2();
y=part1(2,c);this.upd0(y,1,6,1);part2();
y=part1(1,c);this.upd0(y,1,7,2);part2();
sm[1].a=sm[2].a=sm[4].a=sm[5].a=0;
y=part1(0,c);this.upd7(y);part2()
}}
//setImmediateもどき
(function(a){
var F=[],hit="\0";
a.wait0=function(f){
F[F.length]=f;a.postMessage(hit,"*")
};
a.onmessage=function(e){
if(e.source===a&&e.data===hit)
e.stopPropagation(),F[0]&&F.shift()()
}
})(this);
/* LPAQ5e(A,m,done,rate): compressor
@A: input(Array / Uint8Array)
@m: memory usage(0-9)
@done: call back of last process.
done(A,a,z)
@A: compressed array of input.
@a: input size
@z: compressed size
@rate: call back of progress
rate(a,z)
@a: current position
@z: last position
*/
this.LPAQ5enc=(A,m,done=a=>a,rate)=>{
function enc1(y,m){
y=m>>y&1,m=x1+(x2-x1>>>12)*e.p+((x2-x1&4095)*e.p>>12);
y?x2=m:x1=m+1;return y
}
function enc2(){for(;!((x1^x2)>>24);x2=x2<<8|255)x1<<=8,O[o++]=x2>>>24}
function _(){
var c,p,t=Date.now();
for(rate(a,z);a<z&&(a&4095||Date.now()-t<200);){
c=A[a++];
if(txt&&(c===32||c===31))c^=63;
e.ebits(enc1,enc2,c);p=e.mm.pos;
if(!(262143&p)&&txt&&(7340032===p&&16===e.shift||1048576===p&&15===e.shift||14===e.shift))
for(e.shift++,c=MI*MC;c;)e.wx[--c]*=2
}
if(a<z)return wait(_);
O[o++]=x1>>>24;done(O,z,o);
return O
}
if(!A.buffer)A=new Uint8Array(A);
m=(m&15)%10;
var a=A.length,c=a<4096?a:4096,z=a,e=new Coder(1<<m+20),o=1,O=[m*16],x1=0,x2=-1>>>0,txt,wait=wait0;
if(typeof rate!="function")rate=a=>a,wait=a=>a();
for(;a-=O[o++]=a&255;a/=256)O[0]+=2;
for(;c--&&A[c]<128;);O[0]+=txt=c<0;
return wait(_)
};
/* LPAQ5dec(A,done,rate): decompressor
@A: input(Array / Uint8Array)
@done: call back of last process.
done(A,a,z)
@A: decompressed array of input.
@a: last position
@z: decompressed size
@rate: call back of progress
rate(a,z)
@a: current position
@z: last position
*/
this.LPAQ5dec=(A,done=a=>a,rate)=>{
if(!A.buffer)A=new Uint8Array(A);
var a=1,c=A[0],n=0,e=1,o=c>>1&7,z=A.length,x,x1=0,x2=-1>>>0,txt=c&1,O=[],wait=wait0;
for(;n+=A[a++]*e,o;o--)e*=256;
e=new Coder(1<<c/16+20);
if(typeof rate!="function")rate=a=>a,wait=a=>a();
for(c=4;c--;)x=(x<<8|(a<z?A[a++]:255))>>>0;
function dec1(y,m){
m=x1+(x2-x1>>>12)*e.p+((x2-x1&4095)*e.p>>12)>>>0,y=x<=m;
y?x2=m:x1=m+1;return y
}
function dec2(){for(;!((x1^x2)>>24);x2=x2<<8|255)x1<<=8,x=(x<<8|(a<z?A[a++]:255))>>>0}
function _(){
var c,p,t=Date.now();
for(rate(a,z);o<n&&(o&4095||Date.now()-t<200);){
e.ebits(dec1,dec2);c=e.c4&255;
if(txt&&(c===32||c===31))c^=63;
O[o++]=c;p=e.mm.pos;
if(!(262143&p)&&txt&&(7340032===p&&16===e.shift||1048576===p&&15===e.shift||14===e.shift))
for(e.shift++,c=MI*MC;c;)e.wx[--c]*=2
}
if(o<n)return wait(_);
done(O,z,o);return O
}
return wait(_)
}}
使用例
(async()=>{
let A=Array.from("That that is is that that is not is not is that it it is",a=>a.charCodeAt());
let e=LPAQ5enc(A,5), d=LPAQ5dec(e);
console.log(e.length, String.fromCharCode(...d))
})()
file圧縮思考実験
See the Pen LPAQ5 compressor by xezz (@xezz) on CodePen.