0
0

圧縮の時間です。byte単位の文脈混合法に二次記号推定(Secondary Symbol Estimation)を組み合わせていきます。

原理

PPMに似ています。文脈木を構築し、設定した次数までの文脈に現れる各記号の頻度を足し合わせます(条件によってその倍率を変えます)。
そして最高頻度の記号が符号化したい記号と一致するかどうか、1 bit算術符号化。不一致なら記号頻度と合計頻度を調整して、追加の算術符号化。いわゆるSSE。
計算量が莫大なので非常に低速。はっきり言って使い物にならない程度に便利な役立たずです。

実装編

// for context tree
function Leaf(s,a){this.s=s,this.n=a<<1|1}
Leaf.prototype={c:1,e:0,o:0};

//model table
const CNUM=256,SCALE=1<<22,MX=8,MY=10,MET=999999,
	ETT=new Int32Array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,550,0,MET,MET,MET,MET,MET,MET,110,0,MET,MET,MET,MET,MET,MET,300,150,MET,MET,MET,MET,MET,MET,330,130,0,MET,MET,MET,MET,MET,700,180,0,MET,MET,MET,MET,MET,750,190,0,MET,MET,MET,MET,MET,2600,160,0,MET,MET,MET,MET,MET]),
	EWW=new Float32Array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,.15,1,0,0,0,0,0,3,.44,3.8,7,0,0,0,0,7,1.75,99,7,7,0,0,0,14,3.5,.7,99,7,15,0,0,16,4,.8,99,99,7,99,0,25,4.5,.75,99,99,7,99,99,99,4.5,.75,99,99,99,99,99]),
	EWW1=new Float32Array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,6,0,0,0,0,0,3,1,3,6,0,0,0,0,7,2,2,4,6,0,0,0,14,4,2,4,4,8,0,0,16,6,2,4,4,4,8,0,25,6,2,4,4,4,4,12,35,6,2,4,4,4,4,12]),
	OWW=new Float32Array([0,0,0,0,0,0,0,0,.28,0,0,0,0,0,0,0,.28,.28,0,0,0,0,0,0,1,.2,.25,0,0,0,0,0,1,.2,.25,1/3.5,0,0,0,0,0,0,.15,.44,.44,0,0,0,0,0,0,.15,.3,.5,0,0,0,0,0,0,.15,.5,.15,0,0,0,0,0,.15,.5,.15,.15,.15,.15,.15,.15,.15,.15,.15,.15]),
	OWW1=new Float32Array([0,0,0,0,0,0,0,0,.28,0,0,0,0,0,0,0,.28,.28,0,0,0,0,0,0,1,.2,.25,0,0,0,0,0,1,.2,.25,1/3.5,0,0,0,0,.4,.4,.4,.4,.45,0,0,0,.5,.5,.5,.5,.5,.4,0,0,.5,.5,.5,.5,.5,.3,.2,0,.5,.5,.5,.5,.5,.5,.2,.3,.5,.5,.5,.5,999,.5,.3,.25]);

/* compressor
@A	:input(Array / Uint8Array)
@mo	:context order(0-1023)
@ml	:limit of total nodes=4e9/ml(@ml:0-4095)
@done: call back of last process.
	done(A,a,z)
	@A: (de)compressed array of input
	@a: last position
	@z: (de)compressed size
@rate: call back of progress
	rate(a,z)
	@a: current position
	@z: last position
@return:
	call with await: compressed array of @A
	without await: Promise object
*/
async function PPMYenc(A,mo,ml,done,rate){
	function enc(c,f,t,n){
		L+=n=c*R/t+1>>>0;
		for(R=R*(c+f)/t-n>>>0;R<16777216;R*=256,L=L<<8>>>0)
			if((f=L>0xffffffff)||255>L>>>24)
				for(O[o++]=255&f--+B,B=L>>>24;C;C--)O[o++]=255&f;
			else++C
	}
	mo&=1023;ml&=4095;
	var b,c=A.length,e,f,i=128,j,p,t,u,w,z=c,
		L=0,R=-1>>>0,B,C=0,o=2,O=[mo&255,mo>>8|ml<<2&255,ml>>6], // coder
		F=new Float64Array(CNUM),sseLastHit=0,SSE=[],
		root={c:1,e:0,n:0,o:0},x=new Leaf(0,0),Cx=[root], // context tree
		m=1,n,OMOpt=1,
		OMTotal=CNUM,OMProb,ix=rate?4095:-1,fn=b=>wait0(c=>b(rate(i,z))),st=+new Date;

	for(mo+=2;O[++o]=B=c&255,c>>>=8;)O[2]+=64; // write size
	for(ml=4e9/ml;i;)for(p=SSE[--i]=[],j=8;j;)p[--j]=64|i<<7;

	for(;i<z;i&ix||Date.now()-st<200||await new Promise(fn,st=+new Date)){
		if(m>mo)m=mo;c=A[i++];
		//mix in order-(-1)
		OMProb=.25/OMTotal*OMOpt/i;
		for(j=CNUM;j;)F[--j]=OMProb;
		//mix context probabilities
		for(n=0;n<m;){
			p=Cx[n];b=p.n;
			if(!b.c&&b&1)b=p.n=new Leaf(A[b>>=1],b+1),p.e++,ml--;
			t=p.c-1;u=1/t;
			e=p.e,j=Math.min(m,MY-1)*MX+Math.min(n++,MX-1);
			f=e^1?OWW[j]:OWW1[j];
			if(m<3)e=1;
			else{
				w=e^1?EWW[j]:EWW1[j];j=ETT[j];
				e=1+w*(t<j?t-e:j-e)*u;
				if(e<0)e=0;e/=1+w
			}
			for(e*=(f+p.o)/(f+t)*u;b;b=b.p)F[b.s]+=b.c*e
		}
		//count inverse total for renormalization
		for(j=CNUM,t=u=w=0;j--;)u+=F[j];
		var maxF=j,maxS;
		for(u=u>0?SCALE/u:0;++j<CNUM;t+=f){
			f=F[j]*u+1|0;
			j^c||(e=f,w=t);
			if(f>maxF)maxF=f,maxS=j;
		}
		p=SSE[4096*maxF/t>>5];
		f=p[j=n>1?(Cx[j=n-1].n.s===maxS)<<1|(Cx[j].o*2+2>Cx[j].c)<<2|sseLastHit:0];
		if(c==maxS)
			enc(0,f,0x4000),
			p[j]+=0x3fff-f>>8,sseLastHit=1;
		else{
			enc(f,0x4000-f,0x4000);
			p[j]-=f>>8,sseLastHit=0;
			t-=maxF;
			if(c>maxS)w-=maxF;
			enc(w,e,t)
		}
		//detect which context has the biggest probability for current sample
		for(f=OMProb,e=-1;n;){
			p=Cx[--n];t=p.c;
			u=t>1?1/--t:0;
			for(p=p.n;p;p=p.p)
				if(p.s===c){
					p=p.c*u;
					if(p>f)f=p,e=n;
					break
				}
		}
		//increment optimality frequency
		~e?Cx[e].o++:++OMOpt;
		up:for(j=++m;--j;Cx[j]=p){
			for(e=0,n=Cx[j-1],p=n.n;p;e=p,p=p.p)
				if(p.s===c){
					p.c++,f=p.p,e?e.p=f:n.n=f,p.p=n.n,n.n=p;
					continue up
				}
			if(ml>0)
				e=n.n,n.n=p=new Leaf(c,i),n.e++,ml--,p.p=e;
			else p=x;m=j
		}root.c++ // order 0
	}
	for(i=5;i--;L=L<<8>>>0)
		if((f=L>0xffffffff)||L>>>24<255)
			for(O[o++]=255&f--+B,B=L>>>24;C;C--)O[o++]=255&f;
		else++C;
	done&&done(O,z,o);return O
}
/* decompressor
@A	:input(Array / Uint8Array)
@done: call back of last process.
	done(A,a,z)
	@A: (de)compressed array of input
	@a: last position
	@z: (de)compressed size
@rate: call back of progress
	rate(a,z)
	@a: current position
	@z: last position
@return:
	call with await: decompressed array of @A
	without await: Promise object
*/
async function PPMYdec(A,done,rate){
	function dec(c,f,t,n){L-=n=c*R/t+1>>>0;
		for(R=R*(c+f)/t-n>>>0;R<16777216;R*=256)L=(L<<8|A[a++])>>>0
	}
	var a=3,b,c=A[2],e,f,i=128,j=1,p=A[1],t,u,v,w,y=A.length,z=0,
		L,R=-1>>>0,O=[], // coder
		sseLastHit=0,SSE=[],
		root={c:1,e:0,n:0,o:0},x=new Leaf(0,0),Cx=[root],// context tree
		F=new Float64Array(CNUM),C=new Uint32Array(CNUM),
		m=1,n,mo=(A[0]|p<<8&1023)+2,ml=4e9/(p>>2&255|c<<6&4095),OMOpt=1, // int
		OMTotal=CNUM,OMProb,ix=rate?4095:-1,fn=b=>wait0(c=>b(rate(a,y))),st=+new Date;

	for(c>>=6;z+=A[a++]*j,c--;)j*=256;
	for(j=5;--j;)L=(L<<8|A[a++])>>>0;
	for(;i;)for(p=SSE[--i]=[],j=8;j;)p[--j]=64|i<<7;

	for(;i<z;i&ix||Date.now()-st<200||await new Promise(fn,st=+new Date)){
		if(m>mo)m=mo;
		OMProb=.25/OMTotal*OMOpt/-~i;
		for(j=CNUM;j;)F[--j]=OMProb;
		for(n=0;n<m;){
			p=Cx[n];b=p.n;
			if(!b.c&&b&1)b=p.n=new Leaf(O[b>>=1],b+1),p.e++,ml--;
			t=p.c-1;u=1/t;
			e=p.e,j=Math.min(m,MY-1)*MX+Math.min(n++,MX-1);
			f=e^1?OWW[j]:OWW1[j];
			if(m<3)e=1;
			else{
				w=e^1?EWW[j]:EWW1[j];j=ETT[j];
				e=1+w*(t<j?t-e:j-e)*u;
				if(e<0)e=0;e/=1+w
			}
			for(e*=(f+p.o)/(f+t)*u;b;b=b.p)F[b.s]+=b.c*e
		}
		for(j=CNUM,t=u=0;j--;)u+=F[j];
		var maxF=j,maxS;
		for(u=u>0?SCALE/u:0;++j<CNUM;){
			t+=C[j]=p=F[j]*u+1|0;
			if(p>maxF)maxF=p,maxS=j
		}
		p=SSE[4096*maxF/t>>5];
		f=p[j=n>1?(Cx[j=n-1].n.s===maxS)<<1|(Cx[j].o*2+2>Cx[j].c)<<2|sseLastHit:0];
		if(L*0x4000/R>>>0<f)
			dec(0,f,0x4000),
			p[j]+=0x3fff-f>>8,
			sseLastHit=1,c=maxS;
		else{
			dec(f,0x4000-f,0x4000);
			p[j]-=f>>8,sseLastHit=c=w=0,t-=maxF;
			for(v=L*t/R>>>0;c<CNUM;c++)if(c^maxS){
				if(v<w+C[c])break;
				w+=C[c]
			}
			dec(w,C[c],t)
		}
		for(f=OMProb,e=-1;n;){
			p=Cx[--n];t=p.c;
			u=t>1?1/--t:0;
			for(p=p.n;p;p=p.p)
				if(p.s===c){
					p=p.c*u;
					if(p>f)f=p,e=n;
					break
				}
		}
		~e?Cx[e].o++:++OMOpt;O[i++]=c;
		up:for(j=++m;--j;Cx[j]=p){
			for(e=0,n=Cx[j-1],p=n.n;p;e=p,p=p.p)
				if(p.s===c){
					p.c++,f=p.p,e?e.p=f:n.n=f,p.p=n.n,n.n=p;
					continue up
				}
			if(ml>0)
				e=n.n,n.n=p=new Leaf(c,i),n.e++,ml--,p.p=e;
			else p=x;m=j
		}root.c++
	}done&&done(O,y,i);return O
}

PPMYencの引数の説明は上記にも書いてある通り。@Aは圧縮元配列、@moは最大次数(0-1023)、@mlは作成できる文脈木の葉の最大数(0-4095)で、小さい方が多くなる天邪鬼な性格、@doneは処理終了後のcallback関数、@rateは処理途中のcallback関数。
PPMYdecの@Aは圧縮配列、他はPPMYenc同様。

使用例
(async()=>{
let A=Array.from("That that is is that that is not is not is that it it is",a=>a.charCodeAt());
let rate=(a,b)=>{console.log((a/b*1e4|0)/100,"%")}, done=(a,b,c)=>{console.log(b,"->",c,a)};
let e=await PPMYenc(A,14,111,done,rate), d=await PPMYdec(e,done,rate);
console.log(e.length, String.fromCharCode(...d))

// call without await
PPMYenc(A,14,111,done,rate).then(a=>{
	PPMYdec(a,done,rate).then(b=>{
		console.log(b.length,"->",a.length, String.fromCharCode(...b))
	})
})
})()

codepen様到来

See the Pen PPMY compressor by xezz (@xezz) on CodePen.

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0