0
0

GRZipII編第2話。

原理

BlockSort -> WFC -> RLE -> Range符号 という流れで圧縮。MTFより強力なWeighted Frequency Coding(WFC)搭載で圧縮率向上となりやすい(MTFより低速)。

実装

function compare(R,a,b,i,n,r){
	for(n-=a<b?b:a;i<n&&!(r=R[a+i]-R[b+i]);)i+=2;return r||b-a
}
function subsort(R,A,l,h,d,n){
	if(10<h-l)
		for(var i=l+2,j,m=i,t;i<h;A[j]=t)
			for(t=A[j=++i];m<j&&compare(R,t,A[j-3],d,n)<0;)A[j]=A[j-=3];
	for(i=l;i<h;A[j]=t)
		for(t=A[j=++i];l<j&&compare(R,t,A[j-1],d,n)<0;)A[j--]=A[j]
}
function median(R,A,a,b,c,d,z){
	var r=A[a]+d,s=A[b]+d;
	r=r<z?R[r]:-1,s=s<z?R[s]:-1,d+=A[c],d=d<z?R[d]:-1;
	return r<s?s<d?b:r<d?c:a:r<d?a:s<d?c:b
}
function getPivot(R,A,r,l,h,d,z){
	if(r<65)var m=l+(r>>1),p;
	else r>>=3,
		l=median(R,A,l,p=l+r,p+=r,d,z),
		m=median(R,A,p+=r,p+=r,p+=r,d,z),
		h=median(R,A,p+=r,p+=r,h,d,z);
	p=A[m=median(R,A,l,m,h,d,z)];A[m]=A[l];A[l]=p;
	return R[p+d]
}
function mqsort(R,I,l,h,d,n,L,H,D){
	for(var a,b,i,j,o,p,r,s=0,t;;){
		if(h-l<20){
			for(l<h&&subsort(R,I,l,h,d,n);l<=h;)R[I[l]]=l++;
			if(!s)break;
			l=L[--s];h=H[s];d=D[s];
			continue}
		p=getPivot(R,I,h-l+1,l,h,d,n);
		for(i=a=l,j=b=h;;I[j--]=t){
			for(;i<=j&&(r=((t=I[i]+d)<n?R[t]:-1))<=p;i++)
				r^p||(I[i]=I[a],I[a++]=t-d);
			for(;i<=j&&(r=((t=I[j]+d)<n?R[t]:-1))>=p;j--)
				r^p||(I[j]=I[b],I[b--]=t-d);
			if(i>j)break;
			t=I[i],I[i++]=I[j]}
		if((p=a-l)<(r=i-a))r=p;o=j;
		for(p=l;r--;I[o--]=t)t=I[p],I[p++]=I[o];
		if((p=h-b)<(r=b-j))r=p;o=h;
		for(p=i;r--;I[o--]=t)t=I[p],I[p++]=I[o];
		i+=l-a;j-=~h+b;
		if(j<=h)L[s]=j,H[s]=h,D[s++]=d;
		if(i<j)L[s]=i,H[s]=j-1,D[s++]=d+2;
		h=i-1}
}
//sort type B*
function sortTypeBstr(B,I,C,R,n){
	for(var A=C.slice(),p=1,r,i=n-1,j=0|Math.sqrt(n*2)+1,L=new Int32Array(j*3+3),H=L.subarray(j+1),D=H.subarray(j+1);i--;)
		if(r=B[i]-B[i+1])
			r<0&&p>0&&(I[A[B[i]<<8|B[i+1]]++]=i),p=r;
	for(;i<255;)
		for(j=++i;j<255;p-r>1&&mqsort(R,I,r,--p,2,n,L,H,D))
			p=A[r=i<<8|++j],r=C[r]
}
//set type A,B
function setTypeAB(B,I,C,n){
	var i=255,j,p,A=new Uint32Array(256),E=A.slice();
	for(A[i]=n;j=i;A[i]=E[i]){
		for(;j;)E[--j]=C[j<<8|i];
		for(j=C[i--<<8];j>E[i];)
			if((p=I[--j])>0&&B[p]>=B[--p])
				I[--E[B[p]]]=p}
	for(I[C[B[n-1]<<8]++]=n-1;j<n;)
		if((i=I[j++])>0&&B[i]<=B[--i]&&C[p=B[i]<<8|B[i+1]]<A[B[i]])
			I[C[p]++]=i
}
/* suffix sort
@T: input (Uint8Array / Array)
@A: suffix array (Int32Array / Array / null)
@n: input size (Number / null)
*/
function I2SRsort(T,A,n){n=n||T.length;
	var I=A||new Int32Array(n);
	if(n<2)return n?I[0]=0:0,I;
	if(n<3)return I[n=0|T[0]<T[1]]=1,I[1-n]=0,I;
	if(n<512&&!A)return I.map((a,b)=>b).sort((a,b,c,d)=>{for(c=a<b?n-b:n-a;c--;)if(d=T[a++]-T[b++])return d;return b-a});
	for(var i=n,c,C=new Uint32Array(65537),R=I.slice();i;)C[c|(c=T[--i])<<8]++;
	for(n=0;i<65537;n+=c)c=C[i],C[i++]=n;
	for(i=n,c=0;i;)R[--i]=C[(c|(c=T[i])<<8)+1]-1;
	sortTypeBstr(T,I,C,R,n);setTypeAB(T,I,C,n);
	return I
}
/* BWT
@T: input (Uint8Array / Array)
@U: output (Uint8Array / Array / null)
@n: input size (Number / null)
*/
function BWTe(T,U,n){n=n||T.length;
	var i=0,a=n<257?1:n<65537?2:n<16777217?3:4,p,SA=I2SRsort(T,0,n),B=U||new Uint8Array(n+a);
	for(B[0]=T[n-1];p=SA[i++];)B[i]=T[--p];
	for(p=i;i<n;)B[i]=T[SA[i++]-1];
	if(U)return p;--p;
	for(;a--;p>>=8)B[i++]=p&255;
	return B
}
//setImmediateもどき
(function(a){
	var F=[],hit="\0";
	a.wait0=function(f){
		F[F.length]=f;a.postMessage(hit,"*")
	};
	a.onmessage=function(e){
		if(e.source===a&&e.data===hit)
			e.stopPropagation(),F[0]&&F.shift()()
	}
})(this);
const R2N=new Uint8Array(256),R2M=new Uint8Array(256),R2P=new Uint8Array(257);
// initialize
!function(N,M,P,a,b,c){for(P[4]=1;b<255;N[++b]=c-2)for(c=0;b>>++c;);for(;--b>1;M[b+1]=1<<c-2)for(c=0;b>>++c;);for(;b<7;)for(c=256>>b++;c;)P[a--]=--c}(R2N,R2M,R2P,256,4);

function update03(M,V,U,r){
	M[r]+=2,U[r]+=2,V[r]+=2;
	if((M[4]+=2)>58)M[4]=(M[0]-=M[0]>>1)+(M[1]-=M[1]>>1)+(M[2]-=M[2]>>1)+(M[3]-=M[3]>>1);
	if((V[4]+=2)>62)V[4]=(V[0]>>=1)+(V[1]>>=1)+(V[2]>>=1)+(V[3]>>=1);
	if((U[4]+=2)>204)U[4]=(U[0]>>=1)+(U[1]>>=1)+(U[2]>>=1)+(U[3]>>=1)
}
/* compressor
@A: input(Uint8Array)
@bs: block size(32768 - 0x7fffffff)
@done: call back of last process.
	done(A,a,z)
	@A: compressed array of input.
	@a: input size
	@z: compressed size
@rate: call back of progress
	rate(a,z)
	@a: current position
	@z: last position
*/
function GRZ2WFCenc(A,bs,done=a=>a,rate){
	function encode(t,f,c){L+=c*(R=R/t>>>0);
		for(R*=f;R<16777216;R*=256,L=L<<8>>>0)
			if((c=L>0xffffffff)||255>L>>>24)
				for(O[o++]=255&c--+C,C=L>>>24;N;N--)O[o++]=255&c;
			else++N;
	}
	// EndOfMark
	function EOM(){
		var V=S0[c],U=S1[c0<<2|cr&3],i=S[3]+U[3]+V[3],j=S[4]+U[4]+V[4],n;
		encode(j,i,j-i>>>0),update03(S,V,U,3);
		V=S2,U=S3[c1];
		for(i=j=0;i<cb;)
			encode(max,max-(n=V[i]+U[i]>>1),n),
			V[i]-=V[i]>>4,U[i]-=U[i++]>>6;
		for(V=S4[i];j<1<<cb;V[j]-=V[j]>>7)
			encode(max,max-V[j+=++j],V[j])
	}
	function block(){
		var b,e=E,i,j,l,n,m,p,r,d=Date.now(),U,V;
		for(rate(a,z);a<y;){
			if(!--e&&Date.now(e=E)-d>200)return wait(block);
			V=S0[c],U=S1[c0<<2|cr&3];
			r=CI[c=B[w]=A[a]]-1&255,CW[c]+=131072;
			for(l=1;c===A[++a]&&a<y;)l++;
			if(i=CI[c])for(;i;)CI[M[i]=M[--i]]=i+1;
			for(CI[M[0]=c]=0;i<12;i++)
				if(w>=(p=1<<i)){n=CW[b=B[w-p]]-=W[i];
					if(b^c){
						for(j=p=CI[b];CW[M[++j]]>n;)CI[M[j-1]=M[j]]=p++;
						CI[M[p]=b]=p}}w++;
			if(r<3){
				for(i=n=0;i<r;)n+=U[i]+V[i]+S[i++];
				encode(S[4]+U[4]+V[4],S[r]+U[r]+V[r],n);
				update03(S,V,U,r)
			}else{
				encode(j=S[4]+U[4]+V[4],i=S[3]+U[3]+V[3],j-i);
				update03(S,V,U,3);
				V=S2,U=S3[c1],
				m=R2M[r],n=c1=R2N[r],p=R2P[r];
				for(i=0;i<n;)
					encode(max,max-(j=V[i]+U[i]>>1),j),
					V[i]-=V[i]>>4,U[i]-=U[i++]>>6;
				if(n<cb)
					encode(max,V[i]+U[i]>>1,0),
					V[i]+=max-V[i]>>4,U[i]+=max-U[i]>>6;
				V=S4[n];
				for(j=1;m;m>>=1)
					if(p&m)
						encode(max,max-V[j],V[j]),
						V[j]-=V[j]>>7,j+=j+1;
					else
						encode(max,V[j],0),
						V[j]+=max-V[j]>>7,j+=j
			}
			if(r>3)r=3;
			c0=c0<<2&255|r;
			if(l<2)b=0;
			else for(b=m=1;l>>b>1;b++)m<<=1;
			V=R0[cr|r<<4],U=R1,j=c*32;
			for(i=0;i<b;)
				encode(max,max-(n=V[i]+U[j]>>1),n),
				V[i]-=V[i++]>>6,U[j]-=U[j++]>>3;
			encode(max,V[i]+U[j]>>1,0);
			V[i]+=max-V[i]>>6,U[j]+=max-U[j]>>3;
			V=R2[b],cr=cr<<1&15|b>1;
			for(i=0;m;m>>=1)
				if(l&m)encode(max,max-V[i],V[i]),
					V[i]-=V[i++]>>6;
				else encode(max,V[i],0),
					V[i]+=max-V[i++]>>6
		}
		EOM();
		// write pidx
		for(i=c=c0=c1=cr=0;x>>++i;);
		if(x>1)for(;i;)encode(max,1024,(bp>>--i&1)<<10);
		return wait(head)
	}
	function head(){
		if(a>=z){EOM();
			for(L+=R>>>1,x=6;--x;L=L<<8>>>0)
				if((c=L>0xffffffff)||255>L>>>24)
					for(O[o++]=255&c--+C,C=L>>>24;N;N--)O[o++]=255&c;
				else++N;
			done(O,z,o);return O
		}
		var U=A.subarray(a,a+bs);y+=x=U.length;
		bp=BWTe(U.slice(),U);
		return wait(block)
	}
	A instanceof Uint8Array||(A=new Uint8Array(A));
	var E="function"!=typeof rate?0:4096,bp,cb=6,a=1024,o=257,w=0,x,y=1<<15,z=A.length,
		B=new Uint8Array(z),W=new Uint32Array([114688,7272,4240,2364,1263,649,320,153,70,31,14,8]),M=new Int32Array(o),CI=M.slice(),CW=M.slice(),
		R0=[],R1=new Uint16Array(8192).fill(1024),R2=[],S=new Uint8Array([1,1,1,1,4]),S0=[],S1=[],S2=R1.slice(-6),S3=[],S4=[],
		L=0,R=-1>>>0,N=0,C=0,O=[],
		c=0,c0,c1=0,cr,max=2048,wait=wait0;

	if(!E)rate=a=>a,wait=a=>a();
	bs&=-1>>>1;if(bs<y)bs=y;if(bs>z)bs=z;y=0;
	for(;o--;)S0[M[o]=CI[o]=o]=new Uint8Array(5);
	for(CW[256]=-1;a;)S1[--a]=new Uint8Array(5);
	for(;a<8;S3[a++]=R1.slice(-6))S4[a]=R1.slice(-128);
	for(a=64;a;)R0[--a]=R1.slice(-32);
	for(a=24;a;)R2[--a]=R1.slice(-32);
	return head()
}
/* decompressor
@A: input (Uint8Array)
@done: call back of last process.
	done(A,a,z)
	@A: decompressed array of input.
	@a: input size
	@z: decompressed size
@rate: call back of progress.
	rate(a,z)
	@a: current position
	@z: last position
*/
function GRZ2WFCdec(A,done=a=>a,rate){
	function decode(f,c){
		for(L-=R*c,R*=f;R<1<<24;R*=256)L=(L<<8|A[a++])>>>0
	}
	function block(){
		var b,e=E,f,i,j,l,n,r,U,V,d=Date.now();
		for(rate(a,z);;){
			if(!--e&&Date.now(e=E)-d>200)return wait(block);
			V=S0[c],U=S1[t<<2|s&3];
			b=L/(R=R/(S[4]+U[4]+V[4])>>>0)>>>0;
			for(n=r=0;b>=n;)n+=S[r]+U[r]+V[r++];
			decode(b=S[--r]+U[r]+V[r],n-b);

			S[r]+=2,U[r]+=2,V[r]+=2;
			if((S[4]+=2)>58)S[4]=(S[0]-=S[0]>>1)+(S[1]-=S[1]>>1)+(S[2]-=S[2]>>1)+(S[3]-=S[3]>>1);
			if((V[4]+=2)>62)V[4]=(V[0]>>=1)+(V[1]>>=1)+(V[2]>>=1)+(V[3]>>=1);
			if((U[4]+=2)>204)U[4]=(U[0]>>=1)+(U[1]>>=1)+(U[2]>>=1)+(U[3]>>=1);

			if(r>2){
				V=S2,U=S3[u];
				for(i=r=0;i<cb;){
					if(L/(R>>>=11)>>>0<(c=V[i]+U[i]>>1)){
						decode(c,0);
						V[i]+=2048-V[i]>>4;
						U[i]+=2048-U[i]>>6;
						break}
					decode(2048-c,c);
					V[i]-=V[i]>>4;
					U[i]-=U[i++]>>6}
				V=S4[u=i],i=2<<i;
				for(f=1;f<i;)
					if(L/(R>>>=11)>>>0<(c=V[f]))
						decode(c,0),
						V[f]+=2048-c>>7,
						f+=f,r+=r;
					else
						decode(2048-c,c),
						V[f]-=c>>7,
						f+=f+1,r+=r+1;
				if(u==cb&&r>cs)break;
				r+=2<<u|1
			}
			CW[c=B[w]=M[r+1&255]]+=131072;
			if(i=CI[c])while(i)CI[M[i]=M[--i]]=i+1;
			for(CI[M[0]=c]=0;i<12;i++)
				if(w>=(V=1<<i)){n=CW[b=B[w-V]]-=W[i];
					if(b^c){
						for(j=V=CI[b];CW[M[++j]]>n;)CI[M[j-1]=M[j]]=V++;
						CI[M[V]=b]=V}}w++;

			if(r>3)r=3;
			t=t<<2&255|r,V=R0[s|r<<4],U=R1,f=c*32;
			for(i=0;L/(R>>>=11)>>>0>=(n=V[i]+U[f]>>1);U[f]-=U[f++]>>3)
				decode(2048-n,n),V[i]-=V[i++]>>6;
			decode(n,0);
			V[i]+=2048-V[i]>>6;
			U[f]+=2048-U[f]>>3;
			V=R2[i],s=s<<1&15|i>1;
			for(f=l=0;f<i;)
				if(L/(R>>>=11)>>>0<(n=V[f]))
					decode(n,0),
					V[f++]+=2048-n>>6,l+=l;
				else decode(2048-n,n),
					V[f++]-=n>>6,l+=++l;
			for(l+=1<<f;l--;)P[p]=N[T[p++]=c]++
		}
		if(!p)return done(O,z,o);
		for(i=r=f=0;r<p;r+=c)c=N[f],N[f++]=r;
		for(c=0;p>>++c;);
		if(p>1)for(;c--;)
			if(L/(R>>>=11)>>>0<1024)decode(1024,0);
			else decode(1024,1024),i|=1<<c;
		for(c=0;p;c<i&&c++)c=P[c]+N[O[o+--p]=T[c]];
		o+=r;B.fill(s=t=u=c=0);
		return wait(block)
	}
	var E="function"!=typeof rate?0:4096,cb=6,cs=(2<<cb)-2,a=1024,c=0,w=0,P=[],o=257,p=0,z=A.length,O=[],T=[],N=new Uint32Array(a),
		B=[],W=new Uint32Array([114688,7272,4240,2364,1263,649,320,153,70,31,14,8]),M=new Int32Array(o),CI=M.slice(),CW=M.slice(),
		R0=[],R1=new Uint16Array(8192).fill(1024),R2=[],S=new Uint8Array([1,1,1,1,4]),S0=[],S1=[],S2=R1.slice(-6),S3=[],S4=[],
		L,R=-1>>>0,s,t,u=0,wait=wait0;

	if(!E)rate=a=>a,wait=a=>a();

	for(;o;)S0[M[--o]=CI[o]=o]=new Uint8Array(5);
	for(CW[256]=-1;a;)S1[--a]=new Uint8Array(5);
	for(;a<8;S3[a++]=R1.slice(-6))S4[a]=R1.slice(-128);
	for(a=64;a;)R0[--a]=R1.slice(-32);
	for(a=24;a;)R2[--a]=R1.slice(-32);
	for(;a<4;)L=(L<<8|A[a++])>>>0;
	return block()
}
使用例
let A=Array.from("That that is is that that is not is not is that it it is",a=>a.charCodeAt());
let c=GRZ2WFCenc(A,A.length),d=GRZ2WFCdec(c)
console.log(c.length, String.fromCharCode(...d))

file compression test

See the Pen GRZipII(wfc) compressor by xezz (@xezz) on CodePen.

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0