0
0

PPM star編第3話。圧縮力を高めるためにPPMd等で使われている技術を組み込んでいきます。遺伝法、SEE、そして新たに二次記号推定(Secondary Symbol Estimation, SSE)の3つです。

function Suffix(){this.root=this.point={size:0,cs:-1,sum:0,o:0}}

function update(sa,s){
	var a,b,e,m,n,r,t,
		f=sa.fit,p=sa.max,q=sa.med,l=sa.min,
		o=l,u=f?l.sum+l.cs:0,v=f?f.hit:0,
		c={size:p.size+1, cs:-1, sum:0, o:0};
	for(;p!==q;p=p.dad){
		m=p.sum+1,n=2*v*(6+m),m+=u;
		// inheritance
		n=n>m*6?3+(n>m*9)+(n>m*12)+(n>m*15):(p.sum++,n>m)+(n>m*3);
		t={char:s, hit:n, son:c};
		p.sum+=n;p.cs++;
		if(e=p.next)
			if(e.hit>n)t.next=e.next,e.next=t;
			else p.next=t,t.next=e;
		else p.next=t
	}
	if(p){
		s:for(n=f,m=l;p!==l;p=p.dad){
			t=p.next;
			do if(e=t,e.char===s){
				m=p;n=e;e.hit>v&&(v=e.hit,o=p);break s
			}while(t=e.next);
			// inheritance
			b=p.sum+1,a=2*v*(6+b),b+=u;
			a=a>b*6?3+(a>b*9)+(a>b*12)+(a>b*15):(p.sum++,a>b)+(a>b*3);
			e.next={char:s, hit:a, son:c};
			p.sum+=a;p.cs++
		}
		e=n,q=e.son;
		if(~p.size+q.size){
			q.dad=c.dad=u=r={dad:q.dad, size:p.size+1, cs:q.cs, o:q.o},b=0;

			for(t=q;t=t.next;b+=a)
				u=u.next={char:t.char, hit:a=t.hit>>1, son:t.son};

			for(r.sum=b;e.son=r,p=p.dad;){
				for(e=p.next;e.char^s;)e=e.next;
				e.hit>v&&(v=e.hit,o=p);
				if(e.son!==q)break
			}
		}else c.dad=q;
		if(m!==l)
			for(p=m,e=n;;e.hit>v&&(v=e.hit,o=p)){
				if(e.hit>254)// rescale
					for(t=p.next,p.sum=0;p.sum+=t.hit>>=1,t=t.next;);
				t=p.next;p.sum++;
				if(++e.hit>t.hit)
					u=e.hit, e.hit=t.hit, t.hit=u,
					u=e.char, e.char=t.char, t.char=u,
					u=e.son, e.son=t.son, t.son=u;
				if((p=p.dad)===l)break;
				for(e=p.next;e.char^s;)e=e.next
			}
		if(f.hit>253)// rescale
			for(t=l.next,l.sum=0;l.sum+=t.hit>>=1,t=t.next;);

		t=l.next;l.sum+=2;o.o++;
		if((f.hit+=2)>t.hit)
			u=f.hit, f.hit=t.hit, t.hit=u,
			u=f.char, f.char=t.char, t.char=u,
			u=f.son, f.son=t.son, t.son=u
	}else(c.dad=sa.root).o++;
	sa.point=c
}
// initialize SEE and SSE model
function init(D,M,SEE,SSE){
	for(var a,f,o=128,p;o--;D[o]=1024/(o+40)|0)
		for(p=SSE[o]=[],f=1<<24|o<<25,a=16;a;)p[--a]=f;
	for(;++o<16;)
		for(p=32;p;)SEE[o<<5|--p]={b:3,c:7,s:8*o+5<<3};
	for(;a<256;)M[a]=Math.sqrt(a++)
}
// update SEE
function upSEE(S){if(!--S.c){
	var b=S.b,i=S.s>>b;
	i=7-(i>40)-(i>280)-(i>1020);
	if(i<b)S.s>>=1,S.b--;
	else if(i>b)S.s<<=1,S.b++;
	S.c=5<<S.b}
}
/* compressor
@A	:input(Array / Uint8Array)
@done: call back of last process.
	done(A,a,z)
	@A: compressed array of input
	@a: last position
	@z: compressed size
@rate: call back of progress
	rate(a,z)
	@a: current position
	@z: last position
@return:
	call with await: decompressed array of @A
	without await: Promise object
*/
async function PPMe(A,done,rate){
	function enc(c,f,t){
		R=R/t>>>0;L+=R*c;
		for(R*=f;R<16777216;R*=256,L=L<<8>>>0)
			if((c=L>0xffffffff)||255>L>>>24)
				for(O[o++]=255&c--+B,B=L>>>24;C;C--)O[o++]=255&c;
			else++C
	}
	var a=0,b,d=0,o=-1,z=A.length,
		L=0,R=o>>>0,B=0,C=0,
		O=[],M=[],SEE=[],SSE=[],D=[],E=[],X={b:0,c:0,s:0},sa=new Suffix,st=+new Date;

	const pass=typeof rate=="function"?8191:-1,fn=b=>wait0(c=>b(rate(a,z)));

	for(init(D,M,SEE,SSE);;a&pass||Date.now()-st<200||await new Promise(fn,st=+new Date)){
		let s=A[a++], p=sa.max=sa.point, x=E.length=0;

		for(sa.med=sa.min=sa.fit=void 0;p=p.dad;){
			let e,f,h,r,l,q=p,m=p,t=0,S;

			// deside order by Shortest Deterministic Context and Local Order Estimation
			for(;(q=q.dad)&&(q.size*2>p.size?~p.next.hit*(~q.cs*2-q.sum)<=~q.next.hit*(~p.cs*2-p.sum):q.cs===p.cs);)p=q;
			q=p.next;
			if(x){
				for(e=0;q;q=q.next)if(!E[r=q.char])
					E[r]=q, f=q.hit+1,
					s===r&&(h=t,l=f), t+=f, e++

				if(e){
					x+=e;f=p.cs;
					if(255>f)
						r=p.dad?2*(2*f<p.dad.cs+x-e)+4*(p.size<p.dad.size*2):0,
						S=SEE[M[f]<<5|(p.sum>++f*8)+8*(2*e<p.next.hit)+d*16+r],
						e=S.s>>S.b, 2*f<++e&&(e=f+1);
					else S=X,e=1;
					if(q=E[s]){ // hit!
						enc(h,l,t+e);d=1;
						e>2&&(S.s-=e);upSEE(S);
						sa.min=p;sa.med=m;sa.fit=q;
						break
					}
					S.s+=t;d=0;
					enc(t,e,t+e) // escape
				}
			}else{
				let c=q.char, v=q.hit+1;
				for(;q;q=q.next)
					E[r=q.char]=q, f=q.hit+1,
					s===r&&(h=t), t+=f;

				t+=x=e=p.cs+1;
				S=SSE[4096*v/t>>5];
				r=p.dad?(p.dad.next.char===c)<<1|(p.o*2<p.sum+p.cs)<<2|b<<3|p.size<p.dad.size*2:0;
				f=S[r]>>>9;
				b=s===c;S[r]+=((b<<23)-f)*D[r=S[r]&127]&-128|r<127;
				f=f>>11||1;q=E[s];

				if(b){ // hit most probable symbol
					enc(0,f,4096);
					sa.min=p;sa.med=m;sa.fit=q;
					break
				}
				enc(f,4096-f,4096);
				if(e>1){t-=v;
					if(q){
						enc(h-v,q.hit+1,t); // hit
						sa.min=p;sa.med=m;sa.fit=q;
						break
					}
					enc(t-e,e,t) // escape
				}
			}
		}
		if(!p){ // order -1
			for(p=q=s<256?s:s=256;p;)E[--p]&&q--;
			enc(q,1,257-x);
			if(s>255){
				for(a=5;a--;L=L<<8>>>0)
					if((p=L>0xffffffff)||255>L>>>24)
						for(O[o++]=255&p--+B,B=L>>>24;C;C--)O[o++]=255&p;
					else++C;
				done(O,z,o);return O
			}
		}update(sa,s)
	}
}
/* decompressor
@A	:input(Array / Uint8Array)
@done: call back of last process.
	done(A,a,z)
	@A: decompressed array of input
	@a: last position
	@z: decompressed size
@rate: call back of progress
	rate(a,z)
	@a: current position
	@z: last position
@return:
	call with await: decompressed array of @A
	without await: Promise object
*/
async function PPMd(A,done,rate){
	function dec(c,f){L-=c*R;
		for(R*=f;R<16777216;R*=256)L=(L<<8|A[a++])>>>0
	}
	var a=0,b,d=0,o=0,z=A.length,
		L=0,R=-1>>>0,st=+new Date;

	const C=[],E=[],D=[],M=[],O=[],SEE=[],SSE=[],X={b:0,c:0,s:0},sa=new Suffix,
		pass=typeof rate=="function"?8191:-1,fn=b=>wait0(c=>b(rate(a,z)));

	for(init(D,M,SEE,SSE);a<4;)L=(L<<8|A[a++])>>>0;
	for(;;o&pass||Date.now()-st<200||await new Promise(fn,st=+new Date)){
		let p=sa.max=sa.point, x=E.length=0, s;

		sa.med=sa.min=sa.fit=s;
		do if(p=p.dad){
				let m=p,q=p,n=0,t=0,c,e,f,r,v,S;

				for(s=-1;(q=q.dad)&&(q.size*2>p.size?~p.next.hit*(~q.cs*2-q.sum)<=~q.next.hit*(~p.cs*2-p.sum):q.cs===p.cs);)p=q;
				q=p.next;
				if(x){
					for(;q;q=q.next)if(!E[r=q.char])
						E[r]=C[n++]=q, t+=q.hit+1;
					if(!t)continue;f=p.cs;x+=n;
					if(255>f)
						r=p.dad?2*(2*f<p.dad.cs+x-n)+4*(p.size<p.dad.size*2):0,
						S=SEE[M[f]<<5|(p.sum>++f*8)+8*(2*n<p.next.hit)+d*16+r],
						e=S.s>>S.b, 2*f<++e&&(e=f+1);
					else S=X,e=1;
					r=L/(R=R/(t+e)>>>0)|0;
					if(t>r){
						for(v=t=0;v<n;){
							q=C[v++], s=q.char, f=q.hit+1;
							if(r<t+f){
								e>2&&(S.s-=e), upSEE(S), d=1;
								dec(t,f), sa.min=p, sa.med=m, sa.fit=q
								break
							}t+=f
						}
					}else dec(t,e), S.s+=t, d=0
				}else{
					for(c=q.char, v=q.hit+1;q;q=q.next)
						E[q.char]=q, t+=q.hit+1;

					t+=x=e=p.cs+1;
					S=SSE[4096*v/t>>5];
					q=p.dad?(p.dad.next.char===c)<<1|(p.o*2<p.sum+p.cs)<<2|b<<3|p.size<p.dad.size*2:0;
					f=S[q]>>>9, r=f>>11||1;
					b=L/(R>>>=12)>>>0<r;
					S[q]+=((b<<23)-f)*D[q=S[q]&127]&-128|q<127;

					if(b)dec(0,r), sa.min=p, sa.med=m, sa.fit=E[s=c]; // hit most probable symbol
					else{
						dec(r,4096-r);
						if(e>1){
							t-=v, r=L/(R=R/t>>>0)|0;
							if(t-e>r){
								for(q=p.next, t=0;q;q=q.next)if(s=q.char,c^s){
									f=q.hit+1;
									if(r<t+f){
										dec(t,f), sa.min=p, sa.med=m, sa.fit=q;
										break
									}t+=f
								}
							}else dec(t-e,e) //escape
						}
					}
				}
			}else{// order -1
				p=s=-1;
				for(x in E)p++;
				for(p=L/(R=R/(256-p)>>>0)>>>0, x=s;E[++s]||p>++x;);
				if(s>255)return done(O,z,o),O;
				dec(x,1)
			}
		while(s<0);update(sa,O[o++]=s)
	}
}
使用例
(async()=>{
let A=Array.from("That that is is that that is not is not is that it it is",a=>a.charCodeAt());
let rate=(a,b)=>{console.log((a/b*1e4|0)/100,"%")}, done=(a,b,c)=>{console.log(b,"->",c,a)};
let e=await PPMSenc(A,done,rate), d=await PPMSdec(e,done,rate);
console.log(e.length, String.fromCharCode(...d))
})()

file爆縮実験

See the Pen PPM* compressor(3) by xezz (@xezz) on CodePen.

過去編

1話
2話
二次記号推定について軽く触れた記事

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0