WebAssembly を使って「SIMD-oriented Fast Mersenne Twister」アルゴリズムを使用した乱数生成を実装してみました。砂嵐は毎回生成しています。名前の下の"数ms"は生成時間を performance.now() で計測した最良値です。(精度不足な感じですが…)
See the Pen テレビの中の砂嵐(常時生成版) by Ikiuo (@ikiuo) on CodePen.
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="utf-8">
<title>テレビの中の砂嵐(常時生成版)</title>
</head>
<body>
<table>
<tr><th>SFMT19937<br><span id="tagSFMT19937Perf" style="font-size: xx-small;"></span></th><td> <canvas id="tagSFMT19937"></canvas></td></tr>
<tr><th>MT19937<br><span id="tagMT19937Perf" style="font-size: xx-small;"></span></th><td> <canvas id="tagMT19937"></canvas></td></tr>
</table>
<script>
/*
* メルセンヌツイスタ
*/
class MT19937 {
constructor(seed = 5489) {
seed = seed >>> 0;
const state = new Uint32Array(624);
state[0] = seed;
for (let i = 1; i < 624; i++) {
const x = (seed ^ (seed >>> 30)) >>> 0;
const y = (BigInt(x) * 0x6c078965n) & 0xffffffffn;
state[i] = seed = (Number(y) + i) >>> 0;
}
this.state = state;
this.index = 0;
}
generate1() {
const state = this.state;
const cidx = this.index;
const nidx = (cidx < 623) ? cidx + 1 : 0;
const xidx = (cidx < 227) ? cidx + 397 : cidx - 227;
this.index = nidx;
const sc = state[cidx];
const sn = state[nidx];
const sx = state[xidx];
const sy = ((sc & 0x80000000) | (sn & 0x7fffffff)) >>> 1;
const sz = (sn & 1) ? 0x9908b0df : 0;
const ss = (sx ^ sy ^ sz) >>> 0;
state[cidx] = ss;
const t1 = ss ^ (ss >>> 11);
const t2 = t1 ^ ((t1 << 7) & 0x9d2c5680);
const t3 = t2 ^ ((t2 << 15) & 0xefc60000);
const tr = t3 ^ (t3 >>> 18);
return tr >>> 0;
}
generate(count) {
const r = new Uint32Array(count * 4);
for (let i = 0; i < r.length; ++i)
r[i] = this.generate1();
return r;
}
}
/*
* SFMT19937 : SIMD 版メルセンヌツイスタ
*/
/*
* WebAssembly の SIMD コードを使用して 16 バイト単位で生成する。
* (生成するバイト列はリトル エンディアン)
*
* 大量の乱数を一気に生成する目的に特化した。
* 生成の度に内部状態を 32bit × (624+2) 個をコピーしているので
* ちびちび生成すると激遅になる。
*/
class SFMT19937L {
// sfmt19937.S -> .wasm -> deflate -> Base64
static #wasmBinary = Uint8Array.from(atob([
'eNpVjrFOw0AQRHf3DsfJOYkVNzRIZ1d8AXWOjiYiX0AickIymEixKVAKF/A3iJ4u',
'aYGGH0CkoqWjQFrMxUqQcsVIo52beTDOMwQA9GkEMMISqIdeNp3cXFkvs9l0dkuA',
'Qjhp7FPz3M6Ks9zaCYB/Ya/tbFxYwNYTUYo4N8AgNQfrwu27/3yOeJAAB36zpYJ2',
'pwtIQu55De5o4CEfwPwY705l34CBQ9IG8yPSyrll5OqAlQQTmrd/o15cQbleS0iD',
'kUUao+hr0lQngk39di50O+tLaHr8gDysjU4kByHsRjfBZdcofkQOvqrqo3z9fa+q',
'n8V3VS144HCHWjpVchVpMmHqEPCkDaoGMasoTdDpZUwGYuGoxIbKcWvh8rHY+ab+',
'AEnIXMg=',
].join('')), c => c.charCodeAt(0));
static #wasmStream(memory) {
const S = SFMT19937L;
const stream = new Blob([S.#wasmBinary]).stream();
const deflate = new DecompressionStream('deflate');
const option = {headers: {"Content-Type": "application/wasm"}}
return new Response(stream.pipeThrough(deflate), option);
}
#S;
#seed;
#memory;
#state;
#module;
#exports;
static async Instantiate(seed, memory) {
return await new SFMT19937L(seed, memory).#instantiate();
}
constructor(seed, memory) {
this.#S = SFMT19937L;
this.#seed = seed;
this.#memory = memory ?? new WebAssembly.Memory({initial: 1});
this.#state = new Uint32Array(this.#memory.buffer, 32, 624);
}
async #instantiate() {
const res = this.#S.#wasmStream(this.#memory);
const imports = {module: {memory: this.#memory}}
return await WebAssembly.instantiateStreaming(res, imports)
.then(m => this.#instantiated(m));
}
#instantiated(mod) {
const wasm = mod.instance.exports;
this.#module = mod;
this.#exports = wasm;
const seed = this.#seed ?? 5489;
if (seed instanceof Array)
this.#init_array(seed);
else
this.#init_scalar(seed)
wasm.cert_seed ();
return this;
}
#init_scalar(seed) {
const state = this.#state;
let s = seed;
state[0] = s;
for (let i = 1; i < 624; ++i) {
const x = BigInt((s ^ (s >>> 30)) >>> 0);
const n = Number((x * 1812433253n) & 0xffffffffn);
state[i] = s = (n + i) >>> 0;
}
}
#init_array(seed) {
throw Error("未実装");
}
generate(count) {
const value_size = count * 16;
const value_start = (8 + 624) * 4;
const value_end = value_start + value_size;
const memsize = (value_end + 0xffff) >>> 16;
if ((memsize << 16) > this.#memory.buffer.byteLength)
this.#memory.grow(memsize);
this.#exports.generate(count);
return new Uint32Array(this.#memory.buffer, value_start, value_size >> 2);
}
}
/*
* 以下、砂嵐画面処理
*/
class SandStorm
{
constructor(tag, rand) {
const width = 720;
const height = 480;
tag.width = width;
tag.height = height;
this.tag = tag;
this.rand = rand;
this.ctx = this.tag.getContext('2d');
this.width = this.ctx.canvas.width;
this.height = this.ctx.canvas.height;
this.image = this.ctx.createImageData(width, height);
this.pixel = this.image.data;
this.enable = true;
this.tag.onclick = (() => this.onclick ());
this.best = null;
}
update() {
if (!this.enable)
return;
const pixel = this.pixel;
const gcnt = (pixel.length + 15) >> 4;
const p_s = performance.now();
const val = this.rand.generate(gcnt);
const p_e = performance.now() - p_s;
if (!this.best || this.best > p_e)
this.best = p_e;
let c = 0;
for (let i = 0; i < pixel.length; i += 4, c >>= 8) {
if (!(i & 0x0c))
c = val[i >> 2];
const g = c & 255;
pixel[i + 0] = g;
pixel[i + 1] = g;
pixel[i + 2] = g;
pixel[i + 3] = 255;
}
this.ctx.putImageData(this.image, 0, 0);
}
onclick() {
this.enable = !this.enable;
}
}
async function test() {
const rsfmt = await SFMT19937L.Instantiate();
const rmt = new MT19937();
const ssfmt = new SandStorm(tagSFMT19937, rsfmt);
const smt = new SandStorm(tagMT19937, rmt);
const render = (() => {
window.requestAnimationFrame(render);
ssfmt.update();
smt.update();
tagSFMT19937Perf.innerText = ('' + ssfmt.best).slice(0, 8) + 'ms';
tagMT19937Perf.innerText = ('' + smt.best).slice(0, 8) + 'ms';
});
window.requestAnimationFrame(render);
}
window.onload = test;
</script>
</body>
</html>
WebAssembly 用ソース
アセンブルは自作アセンブラを使用します。
sfmt19937.S
;; -*- WASM -*-
;;
;; 定数 - SFMT19937 用パラメータ
;;
MEXP .alias 19937
POS1 .alias 122
NP1 .alias (POS1 << 4)
SL1 .alias 18
SL2 .alias 1
SR1 .alias 11
SR2 .alias 1
MSK1 .alias 0xdfffffef
MSK2 .alias 0xddfecb7f
MSK3 .alias 0xbffaffff
MSK4 .alias 0xbffffff6
PAR1 .alias 0x00000001
PAR2 .alias 0x00000000
PAR3 .alias 0x00000000
PAR4 .alias 0x13c9e684
NST .alias ((MEXP >> 7) + 1) ; 156 = ((19937 >> 7) + 1)
NDW .alias (NST << 2) ; 624 = 156 << 2
NBD .alias (NST << 4) ; 2496 = 156 << 4
STATE_P .alias 0
STATE .alias (STATE_P + 32)
;;
;; 作業領域
;;
.import.memory "module", "memory", 1
;;
;; 初期化補助 - cert_seed()
;;
$xpar .defmacro par, offs ; [STATE+offs] ^= par & -par
u32.const offs ; (*1)
u32.const offs
u32.load STATE ; t1 = [STATE+offs]
u32.const par & -par ; t2 = par & -par
u32.xor ; t3 = t1 ^ t2
u32.store STATE ; (*1) [STATE+offs] = t3
.endmacro
.export cert_seed
cert_seed .code
tmp .local v128
u32.const 0 ; tmp = [STATE].u32x4 & [PAR1,PAR2,PAR3,PAR4]
v128.load STATE
u32x4.const PAR1, PAR2, PAR3, PAR4
v128.and
local.tee tmp
u8x16.const *([8:16]+[0:8]) ; [8..15, 0..7]
u8x16.swizzle
local.get tmp
v128.xor ; t1 = tmp.u128 ^ rotr(tmp.u128, 64)
u64x2.extract_lane 0 ; t2 = t1.u64[0]
u64.popcnt ; t3 = popcnt(t2)
u64.const 1 ; t4 = t3 & 1
u64.and
u64.eqz ; cond = (t4 == 0)
if ; if (cond) {
@if PAR1
$xpar PAR1, 0*4 ; [STATE+0*4] ^= PAR1 & -PAR1
@elif PAR2
$xpar PAR2, 1*4 ; [STATE+1*4] ^= PAR2 & -PAR2
@elif PAR3
$xpar PAR3, 2*4 ; [STATE+2*4] ^= PAR3 & -PAR3
@elif PAR4
$xpar PAR4, 3*4 ; [STATE+3*4] ^= PAR4 & -PAR4
@endif
end.if ; }
; [STATE_P+0] = [STATE+NBD-32].v128
u32.const STATE_P
u32.const STATE + NBD - 32
v128.load
v128.store
; [STATE_P+16] = [STATE+NBD-16].v128
u32.const STATE_P + 16
u32.const STATE + NBD - 16
v128.load
v128.store
end
;;
;; 生成関数 - generate(cnt)
;;
.export generate
generate .code
cnt .param i32
pend .local i32
offs .local i32
dest .local i32
cur .local v128
u32.const STATE_P ; offs = STATE_P
local.tee offs
; local.get offs
local.get cnt ; pend = STATE_P + cnt * 16
u32.const 4
u32.shl
u32.add
local.set pend
$genlp: loop ; loop {
local.get offs ; (*1)
local.get offs ; sr2 = [offs+0].u128 >> (SR2*8)
v128.load 0
u8x16.const *([0:16][SR2:16]+[16]*16)[0:16]
u8x16.swizzle
local.get offs ; sl1 = [offs+16].u32x4 << SL1
v128.load 16
u32.const SL1
u32x4.shl
v128.xor ; x1 = sr2 ^ sl1
local.get offs ; sl2 = (cur = [offs+32]).u128 << (SL2*8)
v128.load 32
local.tee cur
u8x16.const *([16]*SL2+[0:16])[0:16]
u8x16.swizzle
local.get offs ; sr1 = [offs+NP1+32].u32x4 >> SR1
v128.load NP1+32
u32.const SR1
u32x4.shr
u32x4.const MSK1, MSK2, MSK3, MSK4
u32x4.and ; sr1 &= { MSK1, MSK2, MSK3, MSK4 }
v128.xor ; x2 = sl2 ^ sr1
v128.xor ; x3 = x1 ^ x2
local.get cur ; x4 = x3 ^ cur
v128.xor
v128.store NBD+32 ; (*1) [offs+NBD+32] = x4
local.get offs ; offs += 16
u32.const 16
u32.add
local.tee offs
local.get pend ; if (offs < pend) goto $genlp
u32.lt $genlp
end.loop ; } -> $genlp
;;
local.get cnt ; pend = STATE + NBD + (cnt * 16)
u32.const 4
u32.shl
u32.const STATE+NBD
u32.add
local.tee pend
; local.get pend ; offs = pend - (NBD + (STATE - STATE_P))
u32.const NBD+32
u32.sub
local.set offs
u32.const STATE_P ; dest = STATE_P
local.set dest
$elp: loop ; loop {
local.get dest ; [dest] = [offs].v128
local.get offs
v128.load
v128.store
local.get dest ; dest += 16
u32.const 16
u32.add
local.set dest
local.get offs ; offs += 16
u32.const 16
u32.add
local.tee offs
local.get pend ; if (offs < pend) goto $elp
u32.lt $elp
end.loop ; } -> $elp
; call prep
end
;; end: sfmt19937.S
アセンブル
% wasmgen --deflate --base64+w=64,q=1,c=1 sfmt19937.S -
'eNpVjrFOw0AQRHf3DsfJOYkVNzRIZ1d8AXWOjiYiX0AickIymEixKVAKF/A3iJ4u',
'aYGGH0CkoqWjQFrMxUqQcsVIo52beTDOMwQA9GkEMMISqIdeNp3cXFkvs9l0dkuA',
'Qjhp7FPz3M6Ks9zaCYB/Ya/tbFxYwNYTUYo4N8AgNQfrwu27/3yOeJAAB36zpYJ2',
'pwtIQu55De5o4CEfwPwY705l34CBQ9IG8yPSyrll5OqAlQQTmrd/o15cQbleS0iD',
'kUUao+hr0lQngk39di50O+tLaHr8gDysjU4kByHsRjfBZdcofkQOvqrqo3z9fa+q',
'n8V3VS144HCHWjpVchVpMmHqEPCkDaoGMasoTdDpZUwGYuGoxIbKcWvh8rHY+ab+',
'AEnIXMg=',