0
0

BWT系圧縮でかつて猛威を振るわなかったGRZipIIというprogramの紹介。と言うか模倣しようかなという企画第1弾。

原理

BlockSort -> MTF -> RLE -> Range符号 という流れで圧縮。MTFにより小さい値だらけになったら、まず記号を2以下か3以上かで分けます。2以下ならそのまま符号化。3以上なら3を符号化した後、その記号を0~126に変換してGamma符号化。記号の後は連長をGamma符号化。本家はまだLZP等で加工していますが省略。

実装

function compare(R,a,b,i,n,r){
	for(n-=a<b?b:a;i<n&&!(r=R[a+i]-R[b+i]);)i+=2;return r||b-a
}
function subsort(R,A,l,h,d,n){
	if(10<h-l)
		for(var i=l+2,j,m=i,t;i<h;A[j]=t)
			for(t=A[j=++i];m<j&&compare(R,t,A[j-3],d,n)<0;)A[j]=A[j-=3];
	for(i=l;i<h;A[j]=t)
		for(t=A[j=++i];l<j&&compare(R,t,A[j-1],d,n)<0;)A[j--]=A[j]
}
function median(R,A,a,b,c,d,z){
	var r=A[a]+d,s=A[b]+d;
	r=r<z?R[r]:-1,s=s<z?R[s]:-1,d+=A[c],d=d<z?R[d]:-1;
	return r<s?s<d?b:r<d?c:a:r<d?a:s<d?c:b
}
function getPivot(R,A,r,l,h,d,z){
	if(r<65)var m=l+(r>>1),p;
	else r>>=3,
		l=median(R,A,l,p=l+r,p+=r,d,z),
		m=median(R,A,p+=r,p+=r,p+=r,d,z),
		h=median(R,A,p+=r,p+=r,h,d,z);
	p=A[m=median(R,A,l,m,h,d,z)];A[m]=A[l];A[l]=p;
	return R[p+d]
}
function mqsort(R,I,l,h,d,n,L,H,D){
	for(var a,b,i,j,o,p,r,s=0,t;;){
		if(h-l<20){
			for(l<h&&subsort(R,I,l,h,d,n);l<=h;)R[I[l]]=l++;
			if(!s)break;
			l=L[--s];h=H[s];d=D[s];
			continue}
		p=getPivot(R,I,h-l+1,l,h,d,n);
		for(i=a=l,j=b=h;;I[j--]=t){
			for(;i<=j&&(r=((t=I[i]+d)<n?R[t]:-1))<=p;i++)
				r^p||(I[i]=I[a],I[a++]=t-d);
			for(;i<=j&&(r=((t=I[j]+d)<n?R[t]:-1))>=p;j--)
				r^p||(I[j]=I[b],I[b--]=t-d);
			if(i>j)break;
			t=I[i],I[i++]=I[j]}
		if((p=a-l)<(r=i-a))r=p;o=j;
		for(p=l;r--;I[o--]=t)t=I[p],I[p++]=I[o];
		if((p=h-b)<(r=b-j))r=p;o=h;
		for(p=i;r--;I[o--]=t)t=I[p],I[p++]=I[o];
		i+=l-a;j-=~h+b;
		if(j<=h)L[s]=j,H[s]=h,D[s++]=d;
		if(i<j)L[s]=i,H[s]=j-1,D[s++]=d+2;
		h=i-1}
}
//sort type B*
function sortTypeBstr(B,I,C,R,n){
	for(var A=C.slice(),p=1,r,i=n-1,j=0|Math.sqrt(n*2)+1,L=new Int32Array(j*3+3),H=L.subarray(j+1),D=H.subarray(j+1);i--;)
		if(r=B[i]-B[i+1])
			r<0&&p>0&&(I[A[B[i]<<8|B[i+1]]++]=i),p=r;
	for(;i<255;)
		for(j=++i;j<255;p-r>1&&mqsort(R,I,r,--p,2,n,L,H,D))
			p=A[r=i<<8|++j],r=C[r]
}
//set type A,B
function setTypeAB(B,I,C,n){
	var i=255,j,p,A=new Uint32Array(256),E=A.slice();
	for(A[i]=n;j=i;A[i]=E[i]){
		for(;j;)E[--j]=C[j<<8|i];
		for(j=C[i--<<8];j>E[i];)
			if((p=I[--j])>0&&B[p]>=B[--p])
				I[--E[B[p]]]=p}
	for(I[C[B[n-1]<<8]++]=n-1;j<n;)
		if((i=I[j++])>0&&B[i]<=B[--i]&&C[p=B[i]<<8|B[i+1]]<A[B[i]])
			I[C[p]++]=i
}
/* suffix sort
@T: input (Uint8Array / Array)
@A: suffix array (Int32Array / Array / null)
@n: input size (Number / null)
*/
function I2SRsort(T,A,n){n=n||T.length;
	var I=A||new Int32Array(n);
	if(n<2)return n?I[0]=0:0,I;
	if(n<3)return I[n=0|T[0]<T[1]]=1,I[1-n]=0,I;
	if(n<512&&!A)return I.map((a,b)=>b).sort((a,b,c,d)=>{for(c=a<b?n-b:n-a;c--;)if(d=T[a++]-T[b++])return d;return b-a});
	for(var i=n,c,C=new Uint32Array(65537),R=I.slice();i;)C[c|(c=T[--i])<<8]++;
	for(n=0;i<65537;n+=c)c=C[i],C[i++]=n;
	for(i=n,c=0;i;)R[--i]=C[(c|(c=T[i])<<8)+1]-1;
	sortTypeBstr(T,I,C,R,n);setTypeAB(T,I,C,n);
	return I
}
/* BWT
@T: input (Uint8Array / Array)
@U: output (Uint8Array / Array / null)
@n: input size (Number / null)
*/
function BWTe(T,U,n){n=n||T.length;
	var i=0,a=n<257?1:n<65537?2:n<16777217?3:4,p,SA=I2SRsort(T,0,n),B=U||new Uint8Array(n+a);
	for(B[0]=T[n-1];p=SA[i++];)B[i]=T[--p];
	for(p=i;i<n;)B[i]=T[SA[i++]-1];
	if(U)return p;--p;
	for(;a--;p>>=8)B[i++]=p&255;
	return B
}
//setImmediateもどき
(function(a){
	var F=[],hit="\0";
	a.wait0=function(f){
		F[F.length]=f;a.postMessage(hit,"*")
	};
	a.onmessage=function(e){
		if(e.source===a&&e.data===hit)
			e.stopPropagation(),F[0]&&F.shift()()
	}
})(this);
const R2N=new Uint8Array(256),R2M=new Uint8Array(256),R2P=new Uint8Array(257);
// initialize
!function(N,M,P,a,b,c){for(P[4]=1;b<255;N[++b]=c-2)for(c=0;b>>++c;);for(;--b>1;M[b+1]=1<<c-2)for(c=0;b>>++c;);for(;b<7;)for(c=256>>b++;c;)P[a--]=--c}(R2N,R2M,R2P,256,4);

function update03(M,V,U,r){
	M[r]+=2,U[r]+=2,V[r]+=2;
	if((M[4]+=2)>58)M[4]=(M[0]-=M[0]>>1)+(M[1]-=M[1]>>1)+(M[2]-=M[2]>>1)+(M[3]-=M[3]>>1);
	if((V[4]+=2)>62)V[4]=(V[0]>>=1)+(V[1]>>=1)+(V[2]>>=1)+(V[3]>>=1);
	if((U[4]+=2)>204)U[4]=(U[0]>>=1)+(U[1]>>=1)+(U[2]>>=1)+(U[3]>>=1)
}
/* compressor
@A: input(Uint8Array)
@bs: block size(32768 - 0x7fffffff)
@done: call back of last process.
	done(A,a,z)
	@A: compressed array of input.
	@a: input size
	@z: compressed size
@rate: call back of progress
	rate(a,z)
	@a: current position
	@z: last position
*/
function GRZ2MTFenc(A,bs,done=a=>a,rate){
	function encode(t,f,c){
		L+=c*(R=R/t>>>0);c=L>0xffffffff;
		for(R*=f;R<16777216;R*=256,L=L<<8>>>0)
			if(255>L>>>24||c){
				for(O[o++]=255&c--+B,B=L>>>24;N;N--)O[o++]=255&c;c=0
			}else++N
	}
	A instanceof Uint8Array||(A=new Uint8Array(A));
	var E="function"!=typeof rate?0:4096,bp,cb=6,a=1024,o=256,x,y=1<<15,z=A.length,M=new Uint8Array(o),
		R1=new Uint16Array(8192).fill(1024),R0=[],R2=[],S=new Uint8Array([1,1,1,1,4]),S0=[],S1=[],S2=R1.slice(-6),S3=[],S4=[],
		L=0,R=-1>>>0,N=0,B=0,O=[],
		c=0,c0,c1=0,cr,max=2048,wait=wait0;

	if(!E)rate=a=>a,wait=a=>a();
	bs&=-1>>>1;if(bs<y)bs=y;if(bs>z)bs=z;y=0;
	// init models
	for(;o--;)S0[M[o]=o]=new Uint8Array(5);
	for(;a;)S1[--a]=new Uint8Array(5);
	for(;a<8;S3[a++]=R1.slice(-6))S4[a]=R1.slice(-128);
	for(a=64;a;)R0[--a]=R1.slice(-32);
	for(a=24;a;)R2[--a]=R1.slice(-32);

	// EndOfMark
	function EOM(){
		var V=S0[c],U=S1[c0<<2|cr&3],i=S[3]+U[3]+V[3],j=S[4]+U[4]+V[4],n;
		encode(j,i,j-i),update03(S,V,U,3),
		V=S2,U=S3[c1];
		for(i=j=0;i<cb;)
			encode(max,max-(n=V[i]+U[i]>>1),n),
			V[i]-=V[i]>>4,U[i]-=U[i++]>>6;
		for(V=S4[i];j<1<<cb;V[j]-=V[j]>>7)
			encode(max,max-V[j+=++j],V[j])
	}
	function block(){
		var b,e=E,i,j,l,n,m,p,r,d=Date.now(),U,V;
		for(rate(a,z);a<y;){
			if(!--e&&Date.now(e=E)-d>200)return wait(block);
			V=S0[c],U=S1[c0<<2|cr&3],c=A[a];
			for(l=1;A[++a]===c&&a<y;)l++;
			for(r=0;M[r]^c;)r++;
			if(i=r--){for(;i;)M[i]=M[--i];M[0]=c}
			// encode rank
			if((r&=255)<3){
				for(n=0;i<r;)n+=U[i]+V[i]+S[i++];
				encode(S[4]+U[4]+V[4],S[r]+U[r]+V[r],n),update03(S,V,U,r)}
			else{
				encode(j=S[4]+U[4]+V[4],i=S[3]+U[3]+V[3],j-i),update03(S,V,U,3),
				V=S2,U=S3[c1],
				m=R2M[r],n=c1=R2N[r],p=R2P[r];
				for(i=0;i<n;)
					encode(max,max-(j=V[i]+U[i]>>1),j),
					V[i]-=V[i]>>4,U[i]-=U[i++]>>6;
				if(n<cb)
					encode(max,V[i]+U[i]>>1,0),
					V[i]+=max-V[i]>>4,U[i]+=max-U[i]>>6;
				V=S4[n];
				for(j=1;m;m>>=1)
					if(p&m)
						encode(max,max-V[j],V[j]),
						V[j]-=V[j]>>7,j+=j+1;
					else
						encode(max,V[j],0),
						V[j]+=max-V[j]>>7,j+=j
			}
			if(r>3)r=3;
			c0=c0<<2&255|r;
			if(l<2)b=0;
			else for(b=m=1;l>>b>1;b++)m<<=1;

   			// encode run length
   			V=R0[cr|r<<4],U=R1,j=c*32;
			for(i=0;i<b;)
				encode(max,max-(n=V[i]+U[j]>>1),n),
				V[i]-=V[i++]>>6,U[j]-=U[j++]>>3;
			encode(max,V[i]+U[j]>>1,0),
			V[i]+=max-V[i]>>6,U[j]+=max-U[j]>>3,
			V=R2[b],cr=cr<<1&15|b>1;
			for(i=0;m;m>>=1)
				if(l&m)encode(max,max-V[i],V[i]),
					V[i]-=V[i++]>>6;
				else encode(max,V[i],0),
					V[i]+=max-V[i++]>>6
		}
		EOM();
		// write pidx
		for(i=c=c0=c1=cr=0;x>>++i;);
		if(x>1)for(;i;)encode(max,1024,(bp>>--i&1)<<10);
		return wait(head)
	}
	function head(){
		if(a>=z){EOM();
			for(L+=R>>>1,c=L>0xffffffff,x=6;--x;L=L<<8>>>0)
				if(255>L>>>24||c){
					for(O[o++]=255&c--+B,B=L>>>24;N;N--)O[o++]=255&c;c=0
				}else++N;
			done(O,z,o);
			return O
		}
		var U=A.subarray(a,a+bs);y+=x=U.length;
		bp=BWTe(U.slice(),U);
		return wait(block)
	}return head()
}
/* decompressor
@A: input (Uint8Array)
@done: call back of last process.
	done(A,a,z)
	@A: decompressed array of input.
	@a: input size
	@z: decompressed size
@rate: call back of progress.
	rate(a,z)
	@a: current position
	@z: last position
*/
function GRZ2MTFdec(A,done=a=>a,rate){
	function decode(f,c){
		for(L-=R*c,R*=f;R<16777216;R*=256)L=(L<<8|A[a++])>>>0
	}
	function block(){
		var b,e=E,f,i,l,n,r,U,V,d=Date.now();
		for(rate(a,z);;){
			if(!--e&&Date.now(e=E)-d>200)return wait(block);
			V=S0[c],U=S1[t<<2|s&3],
			b=L/(R=R/(S[4]+U[4]+V[4])>>>0)>>>0;
			for(n=r=0;b>=n;)n+=S[r]+U[r]+V[r++];
			decode(b=S[--r]+U[r]+V[r],n-b),

			S[r]+=2,U[r]+=2,V[r]+=2;
			if((S[4]+=2)>58)S[4]=(S[0]-=S[0]>>1)+(S[1]-=S[1]>>1)+(S[2]-=S[2]>>1)+(S[3]-=S[3]>>1);
			if((V[4]+=2)>62)V[4]=(V[0]>>=1)+(V[1]>>=1)+(V[2]>>=1)+(V[3]>>=1);
			if((U[4]+=2)>204)U[4]=(U[0]>>=1)+(U[1]>>=1)+(U[2]>>=1)+(U[3]>>=1);

			if(r>2){// additional rank decoding
				V=S2,U=S3[u];
				for(i=r=0;i<cb;){
					if(L/(R>>>=11)>>>0<(c=V[i]+U[i]>>1)){
						decode(c,0);
						V[i]+=2048-V[i]>>4;
						U[i]+=2048-U[i]>>6;
						break}
					decode(2048-c,c);
					V[i]-=V[i]>>4;
					U[i]-=U[i++]>>6
				}
				V=S4[u=i],i=2<<i;
				for(f=1;f<i;)
					if(L/(R>>>=11)>>>0<(c=V[f]))
						decode(c,0),
						V[f]+=2048-c>>7,
						f+=f,r+=r;
					else
						decode(2048-c,c),
						V[f]-=c>>7,
						f+=f+1,r+=r+1;
				if(u===cb&&r>cs)break;
				r+=2<<u|1
			}
			// move to front
			for(n=r,c=M[i=++r&255];i;)M[i]=M[--i];M[i]=c;
			if(n>3)n=3;

			// decode run length
			t=t<<2&255|n,V=R0[s|n<<4],U=R1;
			for(f=c*32;L/(R>>>=11)>>>0>=(n=V[i]+U[f]>>1);U[f]-=U[f++]>>3)
				decode(2048-n,n),V[i]-=V[i++]>>6;
			decode(n,0),
			V[i]+=2048-V[i]>>6,
			U[f]+=2048-U[f]>>3;
			V=R2[i],s=s<<1&15|i>1;
			for(f=l=0;f<i;)
				if(L/(R>>>=11)>>>0<(n=V[f]))
					decode(n,0),
					V[f++]+=2048-n>>6,l+=l;
				else decode(2048-n,n),
					V[f++]-=n>>6,l+=l+1;
			for(l+=1<<f;l--;)P[p]=F[T[p++]=c]++
		}
		if(!p)return done(O,z,o);
		for(i=r=f=0;r<p;r+=c)c=F[f],F[f++]=r;
		for(c=0;p>>++c;);
		if(p>1)for(;c--;)
			if(L/(R>>>=11)>>>0<1024)decode(1024,0);
			else decode(1024,1024),i|=1<<c;
		for(c=0;p;c<i&&c++)c=P[c]+F[O[o+--p]=T[c]];
		o+=r;F.fill(s=t=u=c=0);
		return wait(block)
	}
	var E="function"!=typeof rate?0:4096,cb=6,cs=(2<<cb)-2,a=256,o=1024,p=0,z=A.length,M=[],P=[],O=[],T=[],F=new Uint32Array(a),
		R1=new Uint16Array(8192).fill(1024),R0=[],R2=[],S=new Uint8Array([1,1,1,1,4]),S0=[],S1=[],S2=R1.slice(-6),S3=[],S4=[],
		L,R=-1>>>0,c=0,s,t,u=0,wait=wait0;

	if(!E)rate=a=>a,wait=a=>a();
	// init models
	for(;o;)S1[--o]=new Uint8Array(5);
	for(;a;)S0[M[--a]=a]=new Uint8Array(5);
	for(;a<8;S3[a++]=R1.slice(-6))S4[a]=R1.slice(-128);
	for(a=64;a;)R0[--a]=R1.slice(-32);
	for(a=24;a;)R2[--a]=R1.slice(-32);

	for(;a<4;)L=(L<<8|A[a++])>>>0;
	return block()
}
使用例
let A=Array.from("That that is is that that is not is not is that it it is",a=>a.charCodeAt());
let c=GRZ2MTFe(A,A.length),d=GRZ2MTFd(c)
console.log(c.length, String.fromCharCode(...d))

file爆縮実験

See the Pen GRZipII(mtf) compressor by xezz (@xezz) on CodePen.

本家

GRZipII

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0