0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

テレビの中の砂嵐(SIMD版メルセンヌツイスタを使用した常時生成)

Posted at

WebAssembly を使って「SIMD-oriented Fast Mersenne Twister」アルゴリズムを使用した乱数生成を実装してみました。砂嵐は毎回生成しています。名前の下の"数ms"は生成時間を performance.now() で計測した最良値です。(精度不足な感じですが…)

See the Pen テレビの中の砂嵐(常時生成版) by Ikiuo (@ikiuo) on CodePen.

<!DOCTYPE html>
<html lang="ja">
  <head>
    <meta charset="utf-8">
    <title>テレビの中の砂嵐(常時生成版)</title>
  </head>
  <body>
    <table>
      <tr><th>SFMT19937<br><span id="tagSFMT19937Perf" style="font-size: xx-small;"></span></th><td> <canvas id="tagSFMT19937"></canvas></td></tr>
      <tr><th>MT19937<br><span id="tagMT19937Perf" style="font-size: xx-small;"></span></th><td> <canvas id="tagMT19937"></canvas></td></tr>
    </table>

    <script>

     /*
      * メルセンヌツイスタ
      */

     class MT19937 {
         constructor(seed = 5489) {
             seed = seed >>> 0;

             const state = new Uint32Array(624);
             state[0] = seed;
             for (let i = 1; i < 624; i++) {
                 const x = (seed ^ (seed >>> 30)) >>> 0;
                 const y = (BigInt(x) * 0x6c078965n) & 0xffffffffn;
                 state[i] = seed = (Number(y) + i) >>> 0;
             }
             this.state = state;
             this.index = 0;
         }

         generate1() {
             const state = this.state;

             const cidx = this.index;
             const nidx = (cidx < 623) ? cidx + 1 : 0;
             const xidx = (cidx < 227) ? cidx + 397 : cidx - 227;
             this.index = nidx;

             const sc = state[cidx];
             const sn = state[nidx];
             const sx = state[xidx];
             const sy = ((sc & 0x80000000) | (sn & 0x7fffffff)) >>> 1;
             const sz = (sn & 1) ? 0x9908b0df : 0;
             const ss = (sx ^ sy ^ sz) >>> 0;
             state[cidx] = ss;

             const t1 = ss ^ (ss >>> 11);
             const t2 = t1 ^ ((t1 << 7) & 0x9d2c5680);
             const t3 = t2 ^ ((t2 << 15) & 0xefc60000);
             const tr = t3 ^ (t3 >>> 18);
             return tr >>> 0;
         }

         generate(count) {
             const r = new Uint32Array(count * 4);

             for (let i = 0; i < r.length; ++i)
                 r[i] = this.generate1();
             return r;
         }
     }

     /*
      * SFMT19937 : SIMD 版メルセンヌツイスタ
      */
     /*
      * WebAssembly の SIMD コードを使用して 16 バイト単位で生成する。
      * (生成するバイト列はリトル エンディアン)
      *
      * 大量の乱数を一気に生成する目的に特化した。
      * 生成の度に内部状態を 32bit × (624+2) 個をコピーしているので
      * ちびちび生成すると激遅になる。
      */

     class SFMT19937L {

         // sfmt19937.S -> .wasm -> deflate -> Base64
         static #wasmBinary = Uint8Array.from(atob([
             'eNpVjrFOw0AQRHf3DsfJOYkVNzRIZ1d8AXWOjiYiX0AickIymEixKVAKF/A3iJ4u',
             'aYGGH0CkoqWjQFrMxUqQcsVIo52beTDOMwQA9GkEMMISqIdeNp3cXFkvs9l0dkuA',
             'Qjhp7FPz3M6Ks9zaCYB/Ya/tbFxYwNYTUYo4N8AgNQfrwu27/3yOeJAAB36zpYJ2',
             'pwtIQu55De5o4CEfwPwY705l34CBQ9IG8yPSyrll5OqAlQQTmrd/o15cQbleS0iD',
             'kUUao+hr0lQngk39di50O+tLaHr8gDysjU4kByHsRjfBZdcofkQOvqrqo3z9fa+q',
             'n8V3VS144HCHWjpVchVpMmHqEPCkDaoGMasoTdDpZUwGYuGoxIbKcWvh8rHY+ab+',
             'AEnIXMg=',
         ].join('')), c => c.charCodeAt(0));

         static #wasmStream(memory) {
             const S = SFMT19937L;
             const stream = new Blob([S.#wasmBinary]).stream();
             const deflate = new DecompressionStream('deflate');
             const option = {headers: {"Content-Type": "application/wasm"}}
             return new Response(stream.pipeThrough(deflate), option);
         }

         #S;
         #seed;
         #memory;
         #state;
         #module;
         #exports;

         static async Instantiate(seed, memory) {
             return await new SFMT19937L(seed, memory).#instantiate();
         }

         constructor(seed, memory) {
             this.#S = SFMT19937L;
             this.#seed = seed;
             this.#memory = memory ?? new WebAssembly.Memory({initial: 1});
             this.#state = new Uint32Array(this.#memory.buffer, 32, 624);
         }

         async #instantiate() {
             const res = this.#S.#wasmStream(this.#memory);
             const imports = {module: {memory: this.#memory}}
             return await WebAssembly.instantiateStreaming(res, imports)
                                     .then(m => this.#instantiated(m));
         }

         #instantiated(mod) {
             const wasm = mod.instance.exports;

             this.#module = mod;
             this.#exports = wasm;

             const seed = this.#seed ?? 5489;

             if (seed instanceof Array)
                 this.#init_array(seed);
             else
                 this.#init_scalar(seed)
             wasm.cert_seed ();
             return this;
         }

         #init_scalar(seed) {
             const state = this.#state;
             let s = seed;

             state[0] = s;
             for (let i = 1; i < 624; ++i) {
                 const x = BigInt((s ^ (s >>> 30)) >>> 0);
                 const n = Number((x * 1812433253n) & 0xffffffffn);
                 state[i] = s = (n + i) >>> 0;
             }
         }

         #init_array(seed) {
             throw Error("未実装");
         }

         generate(count) {
             const value_size = count * 16;
             const value_start = (8 + 624) * 4;
             const value_end = value_start + value_size;
             const memsize = (value_end + 0xffff) >>> 16;

             if ((memsize << 16) > this.#memory.buffer.byteLength)
                 this.#memory.grow(memsize);
             this.#exports.generate(count);
             return new Uint32Array(this.#memory.buffer, value_start, value_size >> 2);
         }
     }

     /*
      * 以下、砂嵐画面処理
      */

     class SandStorm
     {
         constructor(tag, rand) {
             const width = 720;
             const height = 480;
             tag.width = width;
             tag.height = height;

             this.tag = tag;
             this.rand = rand;
             this.ctx = this.tag.getContext('2d');
             this.width = this.ctx.canvas.width;
             this.height = this.ctx.canvas.height;
             this.image = this.ctx.createImageData(width, height);
             this.pixel = this.image.data;

             this.enable = true;
             this.tag.onclick = (() => this.onclick ());
             this.best = null;
         }
         update() {
             if (!this.enable)
                 return;

             const pixel = this.pixel;
             const gcnt = (pixel.length + 15) >> 4;

             const p_s = performance.now();
             const val = this.rand.generate(gcnt);
             const p_e = performance.now() - p_s;

             if (!this.best || this.best > p_e)
                 this.best = p_e;

             let c = 0;
             for (let i = 0; i < pixel.length; i += 4, c >>= 8) {
                 if (!(i & 0x0c))
                     c = val[i >> 2];
                 const g = c & 255;
                 pixel[i + 0] = g;
                 pixel[i + 1] = g;
                 pixel[i + 2] = g;
                 pixel[i + 3] = 255;
             }

             this.ctx.putImageData(this.image, 0, 0);
         }
         onclick() {
             this.enable = !this.enable;
         }
     }

     async function test() {
         const rsfmt = await SFMT19937L.Instantiate();
         const rmt = new MT19937();

         const ssfmt = new SandStorm(tagSFMT19937, rsfmt);
         const smt = new SandStorm(tagMT19937, rmt);

         const render = (() => {
             window.requestAnimationFrame(render);
             ssfmt.update();
             smt.update();

             tagSFMT19937Perf.innerText = ('' + ssfmt.best).slice(0, 8) + 'ms';
             tagMT19937Perf.innerText = ('' + smt.best).slice(0, 8) + 'ms';

         });
         window.requestAnimationFrame(render);
     }

     window.onload = test;

    </script>
  </body>
</html>

WebAssembly 用ソース

アセンブルは自作アセンブラを使用します。

sfmt19937.S
;; -*- WASM -*-

;;
;; 定数 - SFMT19937 用パラメータ
;;

MEXP        .alias          19937
POS1        .alias          122
NP1         .alias          (POS1 << 4)

SL1         .alias          18
SL2         .alias          1
SR1         .alias          11
SR2         .alias          1

MSK1        .alias          0xdfffffef
MSK2        .alias          0xddfecb7f
MSK3        .alias          0xbffaffff
MSK4        .alias          0xbffffff6

PAR1        .alias          0x00000001
PAR2        .alias          0x00000000
PAR3        .alias          0x00000000
PAR4        .alias          0x13c9e684

NST         .alias          ((MEXP >> 7) + 1)   ;  156 = ((19937 >> 7) + 1)
NDW         .alias          (NST << 2)          ;  624 = 156 << 2
NBD         .alias          (NST << 4)          ; 2496 = 156 << 4

STATE_P     .alias          0
STATE       .alias          (STATE_P + 32)

;;
;; 作業領域
;;

            .import.memory  "module", "memory", 1

;;
;; 初期化補助 - cert_seed()
;;

$xpar       .defmacro       par, offs   ; [STATE+offs] ^= par & -par
            u32.const       offs        ; (*1)
            u32.const       offs
            u32.load        STATE       ; t1 = [STATE+offs]
            u32.const       par & -par  ; t2 = par & -par
            u32.xor                     ; t3 = t1 ^ t2
            u32.store       STATE       ; (*1) [STATE+offs] = t3
            .endmacro

            .export         cert_seed
cert_seed   .code
tmp         .local          v128

            u32.const       0           ; tmp = [STATE].u32x4 & [PAR1,PAR2,PAR3,PAR4]
            v128.load       STATE
            u32x4.const     PAR1, PAR2, PAR3, PAR4
            v128.and
            local.tee       tmp
            u8x16.const     *([8:16]+[0:8]) ; [8..15, 0..7]
            u8x16.swizzle
            local.get       tmp
            v128.xor                    ; t1 = tmp.u128 ^ rotr(tmp.u128, 64)
            u64x2.extract_lane 0        ; t2 = t1.u64[0]
            u64.popcnt                  ; t3 = popcnt(t2)
            u64.const       1           ; t4 = t3 & 1
            u64.and
            u64.eqz                     ; cond = (t4 == 0)
            if                          ; if (cond) {
@if PAR1
            $xpar           PAR1, 0*4   ; [STATE+0*4] ^= PAR1 & -PAR1
@elif PAR2
            $xpar           PAR2, 1*4   ; [STATE+1*4] ^= PAR2 & -PAR2
@elif PAR3
            $xpar           PAR3, 2*4   ; [STATE+2*4] ^= PAR3 & -PAR3
@elif PAR4
            $xpar           PAR4, 3*4   ; [STATE+3*4] ^= PAR4 & -PAR4
@endif
            end.if                      ; }

            ; [STATE_P+0] = [STATE+NBD-32].v128
            u32.const       STATE_P
            u32.const       STATE + NBD - 32
            v128.load
            v128.store

            ; [STATE_P+16] = [STATE+NBD-16].v128
            u32.const       STATE_P + 16
            u32.const       STATE + NBD - 16
            v128.load
            v128.store

            end

;;
;; 生成関数 - generate(cnt)
;;

            .export         generate
generate    .code
cnt         .param          i32
pend        .local          i32
offs        .local          i32
dest        .local          i32
cur         .local          v128

            u32.const       STATE_P     ; offs = STATE_P
            local.tee       offs

            ; local.get     offs
            local.get       cnt         ; pend = STATE_P + cnt * 16
            u32.const       4
            u32.shl
            u32.add
            local.set       pend

$genlp:     loop                        ; loop {

            local.get       offs        ; (*1)

            local.get       offs        ; sr2 = [offs+0].u128 >> (SR2*8)
            v128.load       0
            u8x16.const     *([0:16][SR2:16]+[16]*16)[0:16]
            u8x16.swizzle

            local.get       offs        ; sl1 = [offs+16].u32x4 << SL1
            v128.load       16
            u32.const       SL1
            u32x4.shl

            v128.xor                    ; x1 = sr2 ^ sl1

            local.get       offs        ; sl2 = (cur = [offs+32]).u128 << (SL2*8)
            v128.load       32
            local.tee       cur
            u8x16.const     *([16]*SL2+[0:16])[0:16]
            u8x16.swizzle

            local.get       offs        ; sr1 = [offs+NP1+32].u32x4 >> SR1
            v128.load       NP1+32
            u32.const       SR1
            u32x4.shr

            u32x4.const     MSK1, MSK2, MSK3, MSK4
            u32x4.and                   ; sr1 &= { MSK1, MSK2, MSK3, MSK4 }

            v128.xor                    ; x2 = sl2 ^ sr1
            v128.xor                    ; x3 = x1 ^ x2

            local.get       cur         ; x4 = x3 ^ cur
            v128.xor

            v128.store      NBD+32      ; (*1) [offs+NBD+32] = x4

            local.get       offs        ; offs += 16
            u32.const       16
            u32.add
            local.tee       offs
            local.get       pend        ; if (offs < pend) goto $genlp
            u32.lt          $genlp

            end.loop                    ; } -> $genlp

            ;;

            local.get       cnt         ; pend = STATE + NBD + (cnt * 16)
            u32.const       4
            u32.shl
            u32.const       STATE+NBD
            u32.add
            local.tee       pend
            ; local.get     pend        ; offs = pend - (NBD + (STATE - STATE_P))
            u32.const       NBD+32
            u32.sub
            local.set       offs

            u32.const       STATE_P     ; dest = STATE_P
            local.set       dest

$elp:       loop                        ; loop {

            local.get       dest        ; [dest] = [offs].v128
            local.get       offs
            v128.load
            v128.store

            local.get       dest        ; dest += 16
            u32.const       16
            u32.add
            local.set       dest

            local.get       offs        ; offs += 16
            u32.const       16
            u32.add
            local.tee       offs
            local.get       pend        ; if (offs < pend) goto $elp
            u32.lt          $elp

            end.loop                    ; } -> $elp

;            call            prep

            end

;; end: sfmt19937.S
アセンブル
% wasmgen  --deflate --base64+w=64,q=1,c=1 sfmt19937.S -
'eNpVjrFOw0AQRHf3DsfJOYkVNzRIZ1d8AXWOjiYiX0AickIymEixKVAKF/A3iJ4u',
'aYGGH0CkoqWjQFrMxUqQcsVIo52beTDOMwQA9GkEMMISqIdeNp3cXFkvs9l0dkuA',
'Qjhp7FPz3M6Ks9zaCYB/Ya/tbFxYwNYTUYo4N8AgNQfrwu27/3yOeJAAB36zpYJ2',
'pwtIQu55De5o4CEfwPwY705l34CBQ9IG8yPSyrll5OqAlQQTmrd/o15cQbleS0iD',
'kUUao+hr0lQngk39di50O+tLaHr8gDysjU4kByHsRjfBZdcofkQOvqrqo3z9fa+q',
'n8V3VS144HCHWjpVchVpMmHqEPCkDaoGMasoTdDpZUwGYuGoxIbKcWvh8rHY+ab+',
'AEnIXMg=',
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?