今回は文脈混合法(context mixing)とよばる技術を用いた情報圧縮programを紹介します。その分野では特に有名なPAQというprogram……から派生したLPAQ1の力をまんまと見せびらかしてやろうと思っているかどうか定かではありません。
1があるって事は2やら3、9なんかもあります。なぜ1にしたのかと言うと、実装が簡単だという素晴らし過ぎる理由です。
実装編
な~るほど、こりゃ簡単だ。
const ST=new Uint8Array([1,2,3,5,4,6,7,10,8,12,9,13,11,14,15,19,16,23,17,24,18,25,20,27,21,28,22,29,26,30,31,33,32,35,32,35,32,35,32,35,34,37,34,37,34,37,34,37,34,37,34,37,36,39,36,39,36,39,36,39,38,40,41,43,42,45,42,45,44,47,44,47,46,49,46,49,48,51,48,51,50,52,53,43,54,57,54,57,56,59,56,59,58,61,58,61,60,63,60,63,62,65,62,65,50,66,67,55,68,57,68,57,70,73,70,73,72,75,72,75,74,77,74,77,76,79,76,79,62,81,62,81,64,82,83,69,84,71,84,71,86,73,86,73,44,59,44,59,58,61,58,61,60,49,60,49,76,89,76,89,78,91,78,91,80,92,93,69,94,87,94,87,96,45,96,45,48,99,48,99,88,101,88,101,80,102,103,69,104,87,104,87,106,57,106,57,62,109,62,109,88,111,88,111,80,112,113,85,114,87,114,87,116,57,116,57,62,119,62,119,88,121,88,121,90,122,123,85,124,97,124,97,126,57,126,57,62,129,62,129,98,131,98,131,90,132,133,85,134,97,134,97,136,57,136,57,62,139,62,139,98,141,98,141,90,142,143,95,144,97,144,97,68,57,68,57,62,81,62,81,98,147,98,147,100,148,149,95,150,107,150,107,108,151,108,151,100,152,153,95,154,107,108,155,100,156,157,95,158,107,108,159,100,160,161,105,162,107,108,163,110,164,165,105,166,117,118,167,110,168,169,105,170,117,118,171,110,172,173,105,174,117,118,175,110,176,177,105,178,117,118,179,110,180,181,115,182,117,118,183,120,184,185,115,186,127,128,187,120,188,189,115,190,127,128,191,120,192,193,115,194,127,128,195,120,196,197,115,198,127,128,199,120,200,201,115,202,127,128,203,120,204,205,115,206,127,128,207,120,208,209,125,210,127,128,211,130,212,213,125,214,137,138,215,130,216,217,125,218,137,138,219,130,220,221,125,222,137,138,223,130,224,225,125,226,137,138,227,130,228,229,125,230,137,138,231,130,232,233,125,234,137,138,235,130,236,237,125,238,137,138,239,130,240,241,125,242,137,138,243,130,244,245,135,246,137,138,247,140,248,249,135,250,69,80,251,140,252,249,135,250,69,80,251,140,252,0,0,0,0,0,0]),
SQ=new Uint16Array([1,2,3,6,10,16,27,45,73,120,194,310,488,747,1101,1546,2047,2549,2994,3348,3607,3785,3901,3975,4022,4050,4068,4079,4085,4089,4092,4093,4094]),
DT=new Int16Array(1024).map((a,b)=>16384/(b+b+3)|0),
stretch=function(a,b,c,d){for(;++c<2048;)for(d=squash(c);b<d;)a[++b]=c;a[++b]=c-1;return a}(new Int16Array(4096),-1,-2048),
stretchf=new Float32Array(4096).map((a,b)=>stretch[b]/4095);
function squash(d,w){
if(d>2047)return 4095;
if(d<-2047)return 0;
return SQ[w=(d>>7)+16]*(128-(d&=127))+SQ[++w]*d+64>>7
}
//////////////////////////// StateMap, APM //////////////////////////
function StateMap(i){for(var A=new Uint32Array(i);i;)A[--i]=1<<31;A.cxt=0;return A}
function APM(n){
for(var i=24,p=i,A=new Uint32Array(n*=i);i;)A[--i]=squash(((i*2+1<<12)/48|0)-2048)<<20|6;
for(A.cxt=0;p<n;)A[p++]=A[i++];
return A
}
function SMup(A,y,limit){
var c=A.cxt,i=A[c],n=i&1023,p=i>>>10;
A[c]+=((y<<22)-p>>3)*DT[n]&-1024|n<limit
}
function SMp(A,y,cx){
SMup(A,y,1023);
return A[A.cxt=cx]>>>20
}
function SMpp(A,y,pr,cx){
SMup(A,y,255),
pr=(stretch[pr]+2048)*23,
cx=cx*24+(pr>>12),
A.cxt=cx+((pr&=4095)>>11);
return(A[cx]>>>13)*(4096-pr)+(A[cx+1]>>>13)*pr>>19
}
/////////////////////////// Mixer /////////////////////////////
function Mixer(){
for(var a=567,A=new Float32Array(a);a>7;)A[--a]=1.5;
for(A.cxt=0,A.pr=.5;a;)A[--a]=.5;return A
}
function MixUp(M,y){
var i=7,e=(M.pr-y)*.28955;
if(e>-.00044&&e<.00024)return;
for(y=M.cxt*7+14;i;)M[--y]-=M[--i]*e
}
function Mixp(M){
for(var i=7,s=0,c=M.cxt*7+14;i;)s-=M[--i]*M[--c];
return(M.pr=1/(1+Math.exp(s)))*4095
}
/////////////////////////// MatchModel ////////////////////////
function MatchModel(n){
this.buf=new Uint8Array(n>>=1);this.N=n-1;
this.H=new Int32Array(n>>=2);this.HN=n-1;
this.sm=StateMap(14336)
}
MatchModel.prototype={
pos:0,mp:0,len:0,h1:0,h2:0,c0:1,bc:0,MAXLEN:62,
p:function(y,M){
var B=this.buf,c=this.c0+=this.c0+y,l=this.len,m=this.mp,p=this.pos,i;
if(++this.bc>7){
this.bc=0;
this.h1=this.h1*24+c&this.HN;
this.h2=this.h2*160+c&this.HN;
B[p++]=c&255;
c=this.c0=1,p&=this.N;
if(l)m=++m&this.N,l<this.MAXLEN&&++l;
else{
m=this.H[this.h1];
if(m^p)for(;l<this.MAXLEN&&(i=~l+m&this.N)^p&&B[i]===B[~l+p&this.N];)++l}
if(l<2){
m=this.H[this.h2];l=0;
if(m^p)for(;l<this.MAXLEN&&(i=~l+m&this.N)^p&&B[i]===B[~l+p&this.N];)++l}
this.H[this.h1]=this.H[this.h2]=this.pos=p;this.mp=m
}
if(l&&B[m]+256>>8-this.bc===c)
m=B[m]>>7-this.bc&1,
c=16>l?2*l+m:2*(l>>2)+m+24,
c=c<<8|B[p-1&this.N];
else l=0;
M[0]=stretchf[SMp(this.sm,y,c)];
return this.len=l
}};
///////////////////////////// Predictor /////////////////////////
function Predictor(m){
this.a1=APM(256),this.a2=APM(16384),this.m=Mixer(),this.mm=new MatchModel(m=1<<16+(m&15));
this.h=new Uint32Array(6);this.N=m*=2;
this.t0=new Uint8Array(m+65536);
this.t=this.t0.subarray(65536);this.sm=[];
for(m=6;m;)this.sm[--m]=StateMap(256)
}
Predictor.prototype={
cp0:0,cp1:0,cp2:0,cp3:0,cp4:0,cp5:0,pr:2048,bc:0,c0:1,c4:0,
x:function(i){
i=((i>>>16)*52503+(i&=65535)*1883<<16)+i*52503;i=i<<16|i>>>16;
i=((i>>>16)*14547+(i&=65535)*3579<<16)+i*14547;
var B=16,t=this.t,c=i>>>24,j;
if(t[i=i*B&this.N-B]===c)return i;
if(t[i^B]===c)return i^B;
if(t[i^B*2]===c)return i^B*2;
if(t[i+1]>t[i+1^B]||t[i+1]>t[i+1^B*2])i^=B;
if(t[i+1]>t[i+1^B^B*2])i^=B^B*2;
for(j=i;B--;)t[j++]=0;t[i]=c;
return i
},
update:function(y){
var T=this.t0,c=this.c0+=this.c0+y,h=this.h,m=this.m,sm=this.sm,s=stretchf,i;
T[this.cp0]=ST[T[this.cp0]<<1|y],T[this.cp1]=ST[T[this.cp1]<<1|y],
T[this.cp2]=ST[T[this.cp2]<<1|y],T[this.cp3]=ST[T[this.cp3]<<1|y],
T[this.cp4]=ST[T[this.cp4]<<1|y],T[this.cp5]=ST[T[this.cp5]<<1|y],
MixUp(m,y);
if(c>255)
i=this.c4=this.c4<<8|(c&=255),
h[this.bc=0]=c<<8,
h[1]=(i&65535)<<5|0x57000000,
h[2]=i*768,
h[3]=i*5,
h[4]=h[4]*352+c*13&0x3fffffff,
c>64&&c<91&&(c+=32),
c>96&&c<123?h[5]=(h[5]+c)*56:h[5]=0,i=65537,
this.cp1=this.x(h[c=this.c0=1])+i,this.cp2=this.x(h[2])+i,
this.cp3=this.x(h[3])+i,this.cp4=this.x(h[4])+i,this.cp5=this.x(h[5])+i;
else if(i=++this.bc^4)
this.cp1+=i=y+1<<(i&3)-1,this.cp2+=i,this.cp3+=i,this.cp4+=i,this.cp5+=i;
else i=65537,
this.cp1=this.x(h[1]+c)+i,this.cp2=this.x(h[2]+c)+i,
this.cp3=this.x(h[3]+c)+i,this.cp4=this.x(h[4]+c)+i,this.cp5=this.x(h[5]+c)+i;
this.cp0=h[0]+c;
if(i=this.mm.p(y,m))i=5+(i>7)+(i>11)+(i>15)+(i>31);
else i=4-!T[this.cp4]-!T[this.cp3]-!T[this.cp2]-!T[this.cp1];
m[1]=s[SMp(sm[0],y,T[this.cp0])],m[2]=s[SMp(sm[1],y,T[this.cp1])],
m[3]=s[SMp(sm[2],y,T[this.cp2])],m[4]=s[SMp(sm[3],y,T[this.cp3])],
m[5]=s[SMp(sm[4],y,T[this.cp4])],m[6]=s[SMp(sm[5],y,T[this.cp5])],
m.cxt=i+10*(h[0]>>13),i=0|Mixp(m),
i=i+3*SMpp(this.a1,y,i,c)>>2,
this.pr=i+3*SMpp(this.a2,y,i,c^h[0]>>2)>>2
}};
//////////////////////////// main ////////////////////////
/*compressor
@In: input(Array / Uint8Array)
@mem: memory usage(0-15). about (1<<16+m)*3b+3mb
@done: call back of last process.
done(A,a,z)
@A: compressed array of input.
@a: input size
@z: compressed size
@rate: call back of progress
rate(a,z)
@a: current position
@z: last position
*/
async function compress(In,mem,done,rate){
var a=In.length,z=a,x1=0,x2=-1,o=1,st=+new Date;
const O=[mem&=15],P=new Predictor(mem),fn=b=>setTimeout(c=>b(rate(a,z)),0);
for(;a;a>>>=8)O[o++]=a&255,O[0]+=16;// write size
for(let b,c,m,p,i;a<z;a&8191||Date.now()-st<200||await new Promise(fn,st=+new Date))
for(i=8,c=In[a++];i;){
// prediction
p=P.pr;p+=p<2048;
m=x1+(x2-x1>>>12)*p+((x2-x1&4095)*p>>12);
P.update(b=c>>--i&1);
// normalize
for(b?x2=m:x1=m+1;!((x1^x2)>>24);x2=x2<<8|255)
O[o++]=x2>>>24,x1<<=8
}
O[o++]=x1>>>24;done&&done(O,z,o);
return O
}
/*decompressor
@In: input(Array / Uint8Array)
@done: call back of last process.
done(A,a,z)
@A: decompressed array of input.
@a: last position
@z: decompressed size
@rate: call back of progress
rate(a,z)
@a: current position
@z: last position
*/
async function decompress(In,done,rate){
var a=1,v=In[0],x1,x2,z=In.length,size=0,o=v>>4,st=+new Date;
const Out=[],P=new Predictor(v&15),fn=b=>setTimeout(c=>b(rate(a,z)),0);
for(v=1;o--;v*=256)size+=In[a++]*v;
for(let b,c,m,p;++o<size;Out[o]=c&255){
for(c=1;c<256;c+=c+b){
// normalize
for(;!((x1^x2)>>24);x2=x2<<8|255)
v=(v<<8|(a<z?In[a++]:255))>>>0,x1=x1<<8>>>0;
// prediction
p=P.pr;p+=p<2048;
m=x1+(x2-x1>>>12)*p+((x2-x1&4095)*p>>12);
P.update(b=v<=m|0);
b?x2=m:x1=m+1
}o&8191||Date.now()-st<200||await new Promise(fn,st=+new Date)
}
done&&done(Out,z,o);return O
}
圧縮関数の引数mem
で圧縮力を設定します(0~15を指定)。大きいほど記憶空間消費量が増えます。続いて使用例
(async()=>{
let A=Array.from("That that is is that that is not is not is that it it is",a=>a.charCodeAt());
let rate=(a,b)=>{console.log((a/b*1e4|0)/100,"%")}, done=(a,b,c)=>{console.log(b,"->",c,a)};
let e=await compress(A,6,done,rate), d=await decompress(e,done,rate);
console.log(e.length, String.fromCharCode(...d))
})()
CodePen様の力をお借りしてみましょう
See the Pen LPAQ1 compressor by xezz (@xezz) on CodePen.
参考文献
作者公式page。PAQ等の強力な圧縮programが結集
上記より本家LPAQ
Large Text Benchmark。圧縮病患者に有益な情報山盛り