0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

PPM圧縮と部分展開(2)

Last updated at Posted at 2025-08-24

今回は先日投稿した部分展開可能な静的PPM圧縮の続編となります。本家の不具合修正と改良を行い、JavaScriptに移植した問題作のようなものを書き散らかしていきます。

本家の不具合

  • 圧縮対象fileの1 byte目が他の場所に現れない場合、圧縮失敗
  • 小さいfileは概ね圧縮失敗
  • 特定のbyte列で圧縮失敗
  • 引数の有効範囲検査が不十分
  • 分割間隔がある条件を満たすと展開に失敗
  • 1MB超のfileを圧縮したものは展開にほぼ失敗

等が挙げられます。これらのうち最後の不具合は解消できていません。部分展開機能を排除すれば解消できそう…。

本家の改良点

  • 分割地点の符号化
    動画で言う所のP frameの場所。本家だとかなり非効率ですが、最適化を施しました
  • 文字候補が1個しかない文脈では符号化/復号を省略
    確実に次の文字を予測できる事を意味するので、当然符号化するまでもない。まあ省略しなくても100%の確率を用いて符号化するので圧縮率には殆ど影響しない

未実装機能

展開用の引数をfileから読み込む機能

実装編

const RICE_MASK=3,R_SHIFT=14,MAXBUF=1<<20,MAXDEPTH=10;

function rightbits(len,c){return(1<<len)-1&c}
// bit encoder
class riceEncode{
constructor(fp){
	this.outfp=fp;this.c=32;this.b=0;this.p=fp.length
}
putbits(c,len){
	for(;len>=this.c;this.c=32)
		len-=this.c,
		this.b|=rightbits(this.c,c>>>len),
		this.outfp[this.p++]=this.b&255,
		this.outfp[this.p++]=this.b>>8&255,
		this.outfp[this.p++]=this.b>>16&255,
		this.outfp[this.p++]=this.b>>>24,
		this.b=0;
	this.b|=rightbits(len,c)<<(this.c-=len);
}
putbit(bit){
	this.c--;
	if(bit)this.b|=(1<<this.c);
	if(!this.c)
		this.outfp[this.p++]=this.b&255,
		this.outfp[this.p++]=this.b>>8&255,
		this.outfp[this.p++]=this.b>>16&255,
		this.outfp[this.p++]=this.b>>>24,
		this.b=0,this.c=32
}
code(n,mask){
	this.putbit(n<0);
	if(n<0)n=~n;
	for(var b=0;n>>++b;);
	this.unsignedcode(b-1,mask);
	b>1?this.putbits(n,b-1):this.putbit(n>0);
}
unsignedcode(n,mask){
	this.putbits(0,n>>>mask);this.putbit(1);this.putbits(n,mask)
}
flush(){this.putbits(0,31)}
}
// bit decoder
class riceDecode{
constructor(fp){this.infp=fp;this.p=fp.p>>>0;this.c=this.b=0}
getbit(){
	if(--this.c>=0)return this.b>>>this.c&1;
	this.c=31;
	this.b=this.infp[this.p++]|this.infp[this.p++]<<8|this.infp[this.p++]<<16|this.infp[this.p++]<<24;
	return this.b>>>31&1;
}
getbits(n,x){
	for(;n>this.c;this.c=32)
		n-=this.c,
		x|=rightbits(this.c,this.b)<<n,
		this.b=this.infp[this.p++]|this.infp[this.p++]<<8|this.infp[this.p++]<<16|this.infp[this.p++]<<24;
	return x|rightbits(n,this.b>>>(this.c-=n))
}
decode(mask){
	let m=this.getbit(),n=this.unsigneddecode(mask);
	n=n?1<<n|this.getbits(n):this.getbit();
	return m?~n:n
}
unsigneddecode(mask){
	for(var n=0;!this.getbit();)n++;
	return(n<<mask)+this.getbits(mask)
}}
//range coder
class rangeEncoder{
constructor(outfp,o){
	this.o=o||outfp.length;this.setup(this.outfp=outfp)
}
pc(c){this.outfp[this.o++]=c}

setup(){this.low=this.count=0;this.range=-1>>>0}

flush(i){for(i=4;i--;this.low<<=8)this.pc(this.low>>>24)}
// normal coding
encode(cumFreq,freq,totFreq){
	let r=this.range/totFreq>>>0,l=this.low+r*cumFreq;
	if(!r)throw"range error";
	for(r*=freq;!((l^l+r)>>24)||r<65536&&(r=-l&65535);r*=256)this.pc(l>>>24),l<<=8;
	this.range=r;this.low=l
}
// using shift instead of dividing
encodeshift(cumFreq,freq,totShift){
	let r=this.range>>>totShift,l=this.low+r*cumFreq;
	if(!r)throw"range error";
	for(r*=freq;!((l^l+r)>>24)||r<65536&&(r=-l&65535);r*=256)this.pc(l>>>24),l<<=8;
	this.range=r;this.low=l
}
// coding number lower than maxnumber
encodeNumber(number,maxnumber){this.encode(number,1,maxnumber)}
}
class rangeDecoder{
setup(In,p){this.infp=In;this.p=In.p>>>0;this.init()}
init(){
	this.range=-1>>>0;this.low=0;
	for(let i=4;i--;)this.code=this.code<<8|this.infp[this.p++];
}
// normalize range and low
normalize(){
	for(;!((this.low^this.low+this.range)>>24)||this.range<65536&&(this.range=-this.low&65535);this.range*=256)
		this.code=this.code<<8|this.infp[this.p++],this.low<<=8;
}
// get next accumurate prob using shift
getfreqshift(totshift){
	let t=(this.code-this.low>>>0)/(this.range>>>=totshift)>>>0;
	if(t>=1<<totshift)throw"totfreq error";
	return t
}
// get next accumurate prob
getfreq(totFreq){
	let t=(this.code-this.low>>>0)/(this.range=this.range/totFreq>>>0)>>>0;
	if(t>=totFreq)throw"totfreq error";
	return t
}
basic_getCharacter(freq,cumFreq,n){
	let tmp=(this.code-this.low>>>0)/this.range>>>0,i=0,j=n,k;
	if(tmp>=cumFreq[n])throw"basic_getCharacter error";
	for(;i<j;cumFreq[k+1]>tmp?j=k:i=k+1)k=i+j>>1;
	let cf=cumFreq[i],fr=cumFreq[i+1]-cf;
	this.low+=cf*this.range;
	this.range*=fr;
	this.normalize();
	return i
}
basic_getCharacter2(cumFreq,n){
	let tmp=(this.code-this.low>>>0)/this.range>>>0,i=0,j=n,k;
	if(tmp>=cumFreq[n])throw"basic_getCharacter error";
	for(;i<j;cumFreq[k+1]>tmp?j=k:i=k+1)k=i+j>>1;
	let cf=cumFreq[i],fr=cumFreq[i+1]-cf;
	this.low+=cf*this.range;
	this.range*=fr;
	this.normalize();
	return i
}
getCharacterShift2(cumFreq,n,totShift){
	this.range>>>=totShift;
	return this.basic_getCharacter2(cumFreq,n);
}
getCharacter(freq,cumFreq,n){
	this.range=this.range/cumFreq[n]>>>0;
	return this.basic_getCharacter(freq,cumFreq,n)
}
getCharacterShift(freq,cumFreq,n,totShift){
	this.range>>>=totShift;
	return this.basic_getCharacter(freq,cumFreq,n);
}
decodeNumber(maxNumber){
	let tmp=(this.code-this.low)/(this.range=this.range/maxNumber)>>>0;
	this.low+=tmp*this.range;
	this.normalize();
	return tmp
}
decode(cumfreq,freq){
	this.low+=cumfreq*this.range;
	this.range*=freq;
	this.normalize()
}}
//range coder util
function normalize(freq,shift){
	let freqMax=1<<shift,total=0,l=freq.length,i=l;
	for(;i;)total+=freq[--i];
	if(!total)return 0;
	let total2=0,t;
	for(i=l;i;)
		if(t=freq[--i])
			t*=freqMax,total2+=freq[i]=t<total?1:t/total>>>0;
	if(total2>freqMax){
		//Decreasing each frequency
		for(t=total2-freqMax;t;)
			for(i=0;i<l;i++)
				if(freq[i]>1){
					--freq[i];
					if(!--t)break
				}
	}else if(total2<freqMax)
		//Increasing each frequency
		for(t=freqMax-total2;t;)
			for(i=0;i<l;i++)
				if(freq[i]){
					++freq[i];
					if(!--t)break
				}
	return total
}
function normalize2(freq,shift,size){
	let freqMax=1<<shift,total=0,i=size;
	for(;i;)total+=freq[--i];
	if(!total)return 0;
	let total2=0,kind=0,t;
	for(;i<size;kind++)t=freq[i]*freqMax,total2+=freq[i++]=t<total?1:t/total>>>0;
	if(total2>freqMax){
		for(t=total2-freqMax;t;)
			for(i=0;i<size;i++)
				if(freq[i]>1){--freq[i];if(!--t)break}
	}else if(total2<freqMax)
		for(t=freqMax-total2;t;)
			for(i=0;i<size;i++)
				if(freq[i]){++freq[i];if(!--t)break}
	return total
}
function makeCumFreq_dec(cumFreq,shift){
	let freqMax=1<<shift,total=0,l=cumFreq.length,i=l;
	for(cumFreq[l-1]=0;i;)total+=cumFreq[--i];
	if(!total)return;
	let total2=0,kind=0,t;
	for(i=l--;i;)if(t=cumFreq[--i])
		t*=freqMax,kind++,
		total2+=cumFreq[i]=t<total?1:t/total>>>0;
	if(total2>freqMax){
		for(t=total2-freqMax;t;)
			for(i=0;i<l;i++)
				if(cumFreq[i]>1){
					--cumFreq[i];
					if(!--t)break
				}
	}else if(total2<freqMax)
		for(t=freqMax-total2;t;)
			for(i=0;i<l;i++)
				if(cumFreq[i]){
					++cumFreq[i];
					if(!--t)break
				}
	for(i=t=0,l++;i<l;t+=kind)kind=cumFreq[i],cumFreq[i++]=t
}
function makeCumFreq_dec1(freq,cumFreq){
	for(let i=0,c=cumFreq[0],size=freq.length;i<size;)cumFreq[i+1]=c+=freq[i++]
}
function makeCumFreq_dec2(freq,cumFreq,size){
	for(let i=cumFreq[0]=0,c=0;i<size;)cumFreq[i+1]=c+=freq[i++]
}
function makeCumFreq(freq,cumFreq){
	for(let i=0,c=cumFreq[0],size=freq.length;++i<size;)cumFreq[i]=c+=freq[i-1]
}
function makeCumFreq2(freq,cumFreq,size){
	for(let i=cumFreq[0]=0,c=0;++i<size;)cumFreq[i]=c+=freq[i-1]
}
//spNode
class node{
constructor(i_freq,d,par,c){
	this.totalFreq=this.oriTotalFreq=this.toBit=this.kl_div=0,
	this.used=this.mearged=this.initialized=false,
	this.meargedNumber=-1,this.depth=d,this.parent=par,this.ch=c;
	let i=256,min=-1>>>1,size=0,f;
	for(this.children=[];i;)if(f=i_freq[--i]){
		size++;
		this.oriTotalFreq+=f;
		if(f<min)min=f
	}
	this.nc=new Uint8Array(size);
	this.freq=new Uint32Array(size);
	for(i=size=0;i<256;i++)if(f=i_freq[i]){
		this.nc[size]=i;
		if(f>2){
			let k=0;
			for(;f>>++k;);
			f>>=k-=k>>1;
			i_freq[i]=f<<=k
		}
		this.totalFreq+=this.freq[size++]=f
	}
	this.size=size
}
kl_divergence(par){
	let{freq,nc,totalFreq}=this,kl=0,pf=par.freq,pn=par.nc,pt=par.totalFreq;
	for(let i=0,j=0,l=freq.length,m=pf.length,c;i<l;kl+=c*Math.log(c/(pf[j]/pt))){
		//search parent
		for(c=nc[i];pn[j]!==c&&j<m;)j++;
		if(j===m)throw"kl_dirvergence error";
		c=freq[i++]/totalFreq
	}
	return(kl/Math.LN2)*this.oriTotalFreq
}
riceUnsignedN(n,mask){return(n>>mask)+mask+1}
checkBitN(parent){
	let{freq,nc}=this,i=0,n0=nc.length,bitcount=this.riceUnsignedN(n0,RICE_MASK),count=0,dif=0,pn=parent.nc,total=pn.length,ltotal=total&&Math.log(total),ln0=n0&&Math.log(n0),ln1=total>n0&&Math.log(total-n0);
	bitcount+=(total*ltotal-n0*ln0-(total-n0)*ln1)/Math.LN2>>>0;
	if(n0>1)for(;i<total&&count<n0;)
		if(pn[i++]===nc[count]){
			let t=freq[count]-1,k=0;
			for(;t;k++)t>>=1;
			bitcount+=this.riceUnsignedN(k,2);
			if(k)bitcount+=k>>1;
			count++
		}
	return++bitcount
}
checkBitN3(){
	let bitcount=0,p=0,t=this.totalFreq,i=this.nc.length,freq=this.freq;
	for(;t;p++)t>>=1;p>>=1;
	if(i^1)bitcount+=this.riceUnsignedN(p,RICE_MASK);
	for(;i;)bitcount+=this.riceUnsignedN(freq[--i]-1,p);
	return bitcount
}
checkBitN2(){
	let n0=this.children.length,ln0=n0&&Math.log(n0),ln1=256>n0&&Math.log(256-n0),b=this.riceUnsignedN(n0,RICE_MASK);
	return b+=(1419.565425786768-n0*ln0-(256-n0)*ln1)/Math.LN2>>>0;
}
getSize(){
	let i,n=1;
	for(i of this.children)n+=i.getSize();
	return n
}
free(){
	for(let i of this.children)i.free();delete this.children
}
getNonMeargeSize(){
	let i,n=this.mearged&1;
	for(i of this.children)n+=i.getNonMeargeSize();
	return n
}
getUsedSize(){
	let i,n=this.used&1;
	for(i of this.children)n+=i.getUsedSize();
	return n
}
searchPath(buf,p,historyLimit){
	if(p>=historyLimit)//liner search
		for(let cur=buf[p],S=this.children,left=0,right=S.length,center;left<right;S[center].ch<cur?left=center+1:right=center)
			if(S[center=left+right>>1].ch===cur)return S[center].searchPath(buf,p-1,historyLimit);
	//p<0 or not found
	return this
}
outputRice(rc,m){
	let{nc,oriFreq,children}=this,n=nc.length,l=this.used;
	rc.unsignedcode(n-1,RICE_MASK);
	rc.putbit(l);
	if(l&&n>1)for(let i=0;i<n;i++){
		let t=oriFreq[i],k=0;
		for(;t>>++k;);
		rc.unsignedcode(k-1,2);
		if(k>2)l=k>>1,rc.putbits(t>>l+(k&1),l)
	}
	if(m)return this.meargedNumber=m_number++;
	n=children.length;rc.putbit(n>0);
	if(n){
		rc.unsignedcode(n-1,RICE_MASK);//about 10bit
		for(let i=0;i<n;)children[i++].outputRice(rc);
	}
}
outputRice4(dad,ra,m){
	let{nc,children}=this,pn=dad.nc,n=nc.length,sum=pn.length,i=0,c=0;
	for(;c<n;)
		if(pn[i++]===nc[c])ra.encode(0,n,sum),c++;
		else ra.encode(n,sum-n,sum);
	if(m)return this.meargedNumber=m_number++;
	if(n=children.length)
		for(i=c=0;i<256&&c<n;)
			if(children[c].ch===i++)
				ra.encodeshift(0,n,8),
				children[c++].outputRice4(this,ra);
			else ra.encodeshift(n,256-n,8)
}
checkLog2(n){
	if(n<0)throw"error";
	for(var t=0;n;t++)n>>>=1;
	return t
}
mearge(){
	let c=[],cs=0,i;
	for(i of this.children)i.children.length?i.mearge():c[cs++]=i;
	//greedy stratedy
	let cf=new Uint32Array(256),new_c=[];
	for(;cs>1;){
		let mn=i=0;
		for(;i<cs;i++)
			for(let j=i;++j<cs;){
				let a=c[i],b=c[j];
				while(a.mearged)a=a.meargedNode;
				while(b.mearged)b=b.meargedNode;
				if(a===b)continue;
				cf.fill(0);
				let F=a.freq,N=a.nc,k=F.length,l=k;
				for(;k;)cf[N[--k]]+=F[k];
				F=b.freq,N=b.nc,l+=k=F.length;
				for(;k;)cf[N[--k]]+=F[k];
				let t=new node(cf,a.depth,a.parent,a.ch),gain=a.kl_divergence(t)+b.kl_divergence(t);
				t.toBit=t.checkBitN3();
				let bitCount=a.toBit+b.toBit-t.toBit+l-t.freq.length+1;
				//check the increasing cost and benefit
				if(gain<bitCount){
					//mearge
					a.mearged=b.mearged=true;
					a.meargedNode=b.meargedNode=t;
					t.toBit=Math.min(a.toBit,b.toBit);
					new_c.push(t);mn++
				}
			}
		for(i=0;i<cs;i++)c[i].mearged||new_c.push(c[i]);
		i=c;c=new_c;new_c=i;i.length=0;cs=c.length;
		if(!mn)break
	}
}
set_ncSkip2(){
	if(this.initialized)return;
	let{freq,children,ch,nc,depth,parent}=this,i=nc.length,cs=children.length,ncSkip=this.ncSkip=[];
	this.initialized=1;
	this.oriFreq=freq.slice();
	normalize(freq,R_SHIFT);
	makeCumFreq(freq,this.cumFreq=new Uint32Array(i));
	if(depth)
		a:for(let pn=parent.nc,ps=parent.ncSkip,p=pn.length,t,c;i;ncSkip[i]=t){
			for(t=this,c=nc[--i];p;)if(pn[--p]===c){
				t=ps[p];
				if(t.depth===depth)//find the children whose has ch
					for(let S=t.children,l=0,r=S.length,m;l<r;c<ch?l=m+1:r=m)
						if((c=S[m=l+r>>1].ch)===ch){
							t=S[m];continue a
						}
				break
			}
	}else for(;i;){
		ncSkip[--i]=this;//既定値=自身
		//対応する子nodeを探す
		for(let c=nc[i],l=0,r=cs,m;l<r;children[m].ch<c?l=m+1:r=m)
			if(children[m=l+r>>1].ch===c){
				ncSkip[i]=children[m];break
			}
	}
	for(;cs;)children[--cs].set_ncSkip2()
}}
//spDNode
class dNode{
constructor(){}
inputRice(rc,d){
	let size=rc.unsigneddecode(RICE_MASK)+1,S=this.children=[];
	this.depth=d;
	this.No=totalNumber++;
	this.nc=new Uint8Array(this.size=size);
	if(rc.getbit()){
		let cf=this.cumFreq=new Uint32Array(size+1);
		if(size==1)cf[0]=1;
		else for(let i=0,k,b;i<size;cf[i++]=k>2?rc.getbits(b=k>>1)<<b+(k&1):k)
			k=rc.unsigneddecode(2)+1;
		makeCumFreq_dec(cf,R_SHIFT)
	}
	if(rc.getbit())
		for(let i=0,c=rc.unsigneddecode(RICE_MASK)+1;i<c;)
			(S[i++]=new dNode).inputRice(rc,d+1)
}
inputRice4(parent,rd){
	this.parent=parent;
	let{nc,children}=this,i=0,count=0,pn=parent.nc,total=pn.length,n=nc.length,freq=new Uint32Array([n,total-n]),cumFreq=new Uint32Array(3);
	makeCumFreq_dec1(freq,cumFreq);
	for(;count<n;i++)
		if(!rd.getCharacter(freq,cumFreq,2))nc[count++]=pn[i];
	if(n=children.length){
		freq[0]=n;freq[1]=256-n;
		makeCumFreq_dec1(freq,cumFreq);
		for(i=count=0;i<256&&count<n;i++)
			if(!rd.getCharacterShift(freq,cumFreq,2,8))
				children[count].ch=i,
				children[count++].inputRice4(this,rd)
	}
}
searchPath(buf,p,historyLimit){
	if(p>=Math.max(historyLimit,0)){
		//binary search
		let S=this.children,b=buf[p],left=0,right=S.length,center,c;
		for(;left<right;c<b?left=center+1:right=center)
			if((c=S[center=left+right>>1].ch)===b)
				return S[center].searchPath(buf,p-1,historyLimit)
	}return this//p<0 or not found
}
set_ncSkip2(){
	let{children,depth,parent,nc,ch}=this,ncSkip=this.ncSkip=[],i=nc.length,cs=children.length;
	if(depth)
		a:for(let pn=parent.nc,p=pn.length,ps=parent.ncSkip,t,c;i;ncSkip[i]=t){
			for(t=this,c=nc[--i];p;)if(pn[--p]===c){
				t=ps[p];
				if(t.depth===depth)//find the children whose has ch
					for(let S=t.children,l=0,r=S.length,m;l<r;c<ch?l=m+1:r=m)
						if((c=S[m=l+r>>1].ch)===ch){
							t=S[m];continue a
						}
				break
			}
	}else for(;i;){
		ncSkip[--i]=this;//既定値=自身
		//対応する子nodeを探す
		for(let c=nc[i],l=0,r=cs,m;l<r;children[m].ch<c?l=m+1:r=m)
			if(children[m=l+r>>1].ch===c){
				ncSkip[i]=children[m];break
			}
	}
	for(;cs;)children[--cs].set_ncSkip2()
}
debugSize(){
	let{children}=this,l=children.length,i=0,s=1;
	for(;i<l;)s+=children[i++].debugSize();
	return s
}
allocateSize(){
	let{nc,children,cumFreq,ncSkip}=this,s=nc.length+(cumFreq.length+ncSkip.length+7<<2),i=children.length;
	for(;i;)s+=children[--i].allocateSize();
	return s
}
freeChildren(){
	for(var{children}=this,i=children.length;i;)children[--i].freeChildren();
	children.length=0
}
freeAll(){
	for(let{children}=this,i=children.length;i;delete children[i])
		children[--i].freeAll()
}}
//spEModel
let m_number,totalNumber;
class eModel{
constructor(fileSize,bufSize,op={interval:256}){
	if((op.interval>>>=0)<256)op.interval=256;
	if(op.interval>fileSize)op.interval=fileSize;
	if(op.interval>1<<20)op.interval=1<<20;
	this.fileSize=fileSize;this.interval=op.interval;
	this.pos=new Uint32Array(this.bufSize=bufSize);
	this.freq=new Uint32Array(256)
}
makeModel(buf){
	var{freq,pos,bufSize}=this,i=0,c=buf[0],freq2=new Uint32Array(65536);
	for(freq.fill(0),freq2[c<<8]++;++i<bufSize;freq[c]++)freq2[c|(c=buf[i])<<8]++;
	let max=c=freq2[i=0];
	for(this.buf=buf;c<bufSize;c=freq2[i]+=c)
		if(freq2[++i]>max)max=freq2[i];
	for(i=bufSize;i>1;)
		pos[--freq2[buf[--i]<<8|buf[i-1]]]=i;
	freq[buf[pos[--freq2[buf[0]<<8]]=0]]++;
	let root=new node(freq,0,null,0);
	this.makeTree(root,0,bufSize,0,new Uint32Array(max));
	return root
}
makeTree(parent,start,last,depth,work){
	let{buf,freq,pos,bufSize}=this,i=start;
	pos[i]<depth&&start++;
	for(let cur=buf[pos[i]+1];++i<last&&cur===buf[pos[i]+1];);
	if(i==last)return;//everything is same.we don't have to check more.
	if(depth>1){//We already sort depth 0,1
		let r=last-start,j=0;i=start;
		if(r<32)//Insertion sort
			for(;++i<last;pos[j]=r)
				for(r=pos[j=i];j>start&&buf[pos[j-1]-depth]>buf[r-depth];)
					pos[j]=pos[--j];
		else{//Bucket sort
			for(freq.fill(0);i<last;)freq[buf[pos[i++]-depth]]++;
			for(let c=freq[0];c<r;)c=freq[++j]+=c;
			for(;i>start;)work[--freq[buf[pos[--i]-depth]]]=pos[i];
			for(j=0;i<last;)pos[i++]=work[j++]
		}
	}//Check whether likelihood is increased
	let children=[],range=[],bitCount=0,gain=0;
	for(i=start;i<last;){
		let j=i,p=pos[i];
		if(p<depth)throw-1;
		if(p-depth===bufSize){i++;continue}
		let cur=buf[p-depth];
		//check how many character is same
		for(freq.fill(0);++p<bufSize&&++freq[buf[p]],++i<last&&cur===buf[(p=pos[i])-depth];);
		//j,j+1,...,i-1 is concerned
		let st=new node(freq,depth+1,parent,cur);
		if(!st.totalFreq)continue;
		children.push(st);range.push([j,i]);
		gain+=st.kl_div=st.kl_divergence(parent);
		bitCount+=st.checkBitN(parent)
	}
	bitCount+=parent.checkBitN2();
	if(gain<bitCount)return;
	parent.children=children;
	for(i=children.length;i;)
		this.makeTree(children[--i],range[i][0],range[i][1],depth+1,work)
}
compress(infp,outfp){
	let{fileSize,interval,freq}=this,i=fileSize,remainSize=i,split=i/interval>>>0,n=63,o=0,p;
	for(;i-=outfp[o++]=i&n;n=255)i/=n+1,outfp[0]+=64;
	for(i=interval,n=63,p=o;i-=outfp[o++]=i&n;n=255)i/=n+1,outfp[p]+=64;
	let inPos=0,treePos=[],e=new rangeEncoder(outfp,o),out_pos=new Uint32Array(split+1);//Position at Compressed Data;
	for(out_pos[0]=o;i<256;)freq[i++]=1;
	let base=new node(freq,-1,null,0);
	for(p=0;;){
		let shouldReadSize=remainSize>MAXBUF?MAXBUF:remainSize,A=infp.subarray(p,p+=shouldReadSize),readSize=A.length;
		if(!readSize)break;
		remainSize-=this.bufSize=readSize;
		let t=this.makeModel(A),root=t,step=0;
		t.set_ncSkip2();
		e.setup();
		for(i=0;i<readSize;){
			let c=A[i++];
			t.used=true;
			//Binary Search
			let left=0,right=t.nc.length,p=0;
			if(right>1){
				for(;left<right&&t.nc[p=left+right>>1]!==c;)t.nc[p]<c?left=p+1:right=p;
				if(left===right)throw"cannot found nc";
				e.encodeshift(t.cumFreq[p],t.freq[p],R_SHIFT)
			}
			if(++step===interval){
				step=0;e.flush();e.setup();
				out_pos[++inPos]=outfp.length;
				t=root;continue
			}
			t=t.ncSkip[p]
		}
		e.flush();
		treePos.push(outfp.length);
		let rc=new riceEncode(outfp);
		root.outputRice(rc);
		rc.flush();
		rc=new rangeEncoder(outfp);
		root.outputRice4(base,rc);
		rc.flush();
		m_number=0;
		out_pos[inPos]=outfp.length
	}
	//contents of footer
	let nowPos=outfp.length,rc=new riceEncode(outfp);
	rc.unsignedcode(out_pos[0],RICE_MASK);
	let x=1/0,v=(out_pos[0]-out_pos[i=inPos])/~i>>>0,min=x,avg=v;
	for(;i;){
		let n=out_pos[i]-=out_pos[--i];
		if(n<min)min=n
	}
	//find best avg
	for(;min<v--;){
		let c=i=0;
		for(;i<inPos;){
			let n=out_pos[++i]-v,b=0;
			if(n<0)n=~n;
			for(;n>>++b;);
			c+=(b>>2)+(b>1?b+3:5)
		}
		if(c<x)x=c,avg=v
	}
	if(fileSize>interval)
		for(i=0,rc.code(avg,RICE_MASK);i<inPos;)rc.code(out_pos[++i]-avg,2);
	rc.flush(i=0);out_pos=[];
	for(x of treePos)
		for(n=63,p=i;x-=out_pos[i++]=x&n;n=255)x/=n+1,out_pos[p]+=64;
	for(n=63,p=i;nowPos-=out_pos[i++]=nowPos&n;n=255)nowPos/=n+1,out_pos[p]+=64;
	for(o=outfp.length;i;)outfp[o++]=out_pos[--i];
	return outfp
}}
let CACHE;
function readTree(infp,filePos,base){
	infp.p=filePos;
	let rc=new riceDecode(infp),t=new dNode;
	totalNumber=0;
	t.inputRice(rc,0);
	let rd=new rangeDecoder;
	rd.setup(infp,infp.p=rc.p);
	t.inputRice4(base,rd);
	t.set_ncSkip2();
	if(CACHE)t.freeChildren();
	return t
}
function SPPMd(infp,opt={}){
	let a=0,b=infp[a++],c=64,fileSize=b&63,z=infp.length,start=opt.start>>>0,last=opt.last>>>0,o=0,outfp=[],outPos,treePos=[];
	for(b>>=6;b--;c*=256)fileSize+=infp[a++]*c;
	b=infp[a++],c=64;
	let interval=b&63,rootGsize=(fileSize+MAXBUF-1)/MAXBUF>>>0;
	for(b>>=6;b--;c*=256)interval+=infp[a++]*c;
	if(start>fileSize)return[];
	if(start>=last)last=fileSize;
	if(interval>fileSize)interval=fileSize;
	if(interval>MAXBUF)interval=MAXBUF;
	{
		let i=rootGsize,d;a=z;
		for(;i--;treePos.push(d)){
			b=infp[--a],c=64,d=b&63;
			for(b>>=6;b--;c*=256)d+=c*infp[--a]
		}
		b=infp[--a],c=64,d=b&63;
		for(b>>=6;b--;c*=256)d+=c*infp[--a];
		infp.p=d;
		let rc=new riceDecode(infp);
		outPos=new Uint32Array(c=fileSize/interval+1>>>0);
		outPos[i=0]=a=rc.unsigneddecode(RICE_MASK);
		if(interval<fileSize)for(b=rc.decode(RICE_MASK);++i<c;)outPos[i]=a+=rc.decode(2)+b;
		else outPos[1]=d+a
	}
	let base=new dNode,d=new rangeDecoder;
	base.nc=Uint8Array.from(Array(base.size=256),(a,b)=>b);
	let beforeRoot=-1,root,nowBlock=start/interval>>>0,startOffset=start%interval,
		step=0,started=0,remain=last-start,currentRoot=start/MAXBUF>>>0;

	if(currentRoot^beforeRoot)root=readTree(infp,treePos[beforeRoot=currentRoot],base);
	infp.p=outPos[nowBlock];
	d.setup(infp);
	for(let t=root;;){
		let c=t.size,cp=t.nc[c=c>1?d.getCharacterShift2(t.cumFreq,c,R_SHIFT):0];
		step++;
		if(!started){
			if(step!=startOffset+1){t=t.ncSkip[c];continue}
			started=1
		}
		outfp[o++]=cp;
		if(!--remain)break;
		if(step===interval){
			nowBlock++;step=0;currentRoot=(last-remain)/MAXBUF>>>0;
			infp.p=d.p;
			if(currentRoot^beforeRoot){
				if(!CACHE)root.freeAll();
				root=readTree(infp,treePos[beforeRoot=currentRoot],base);
				infp.p=outPos[nowBlock]
			}
			d.setup(infp);t=root;continue
		}
		t=t.ncSkip[c]
	}return outfp
}
function SPPMe(A,interval){
	let z=A.length,e=new eModel(z,z,{interval}),O=[];
	e.compress(A.buffer?A:new Uint8Array(A),O);
	return O
}
仕様
/*圧縮
SPPMe(A,interval)
@A: Array/Uint8Array。圧縮元配列
@interval: 分割間隔(256~1<<20)。
	小さいと途中復号する場合に無駄な処理が減る。大きいと圧縮率良い
@return: Array
*/
/*復号
SPPMd(infp,opt)
@infp: Array/Uint8Array
@opt: Object。形式は{start:number, last:number}となる
	start: 復号開始地点。省略すると0
	last: 復号終端。省略すると最後まで復号
*/
使用例
let A=Uint8Array.from("That that is is that that is not is not is that it it is",a=>a.charCodeAt()),
	e=SPPMe(A,256),//圧縮
	d=SPPMd(e),//全体復元
	d2=SPPMd(e,{start:A.length>>1});//半分以降復元

document.write(A.length,"->",e.length,"<br>",
	String.fromCharCode(...d),"<br>",
	String.fromCharCode(...d2),"<br>")

余談になりますが本家同様、使われていない関数も何個かそのまんま移植してあります。searchPathを使うと圧縮率が良くなりますが、速度が犠牲になりそう。

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?