LoginSignup
10
6

More than 5 years have passed since last update.

quick sort with wasm(wast)

Last updated at Posted at 2016-05-17

概要

WebAssemblyのwastでクイックソートを書いてwasmに変換し、デフォルトのソートとベンチマークをとって比較してみました(実際に動作しているものはqsort benchを参照)。

wast

これを移植。wasmへの変換はWebAssembly/wabt: The WebAssembly Binary Toolkitを使用。

(module
  (import "js" "mem" (memory 256))

  (func $med3_f64 (param $x f64) (param $y f64) (param $z f64) (result f64)
    get_local $x
    get_local $y
    f64.lt
    if f64
      get_local $y
      get_local $z
      f64.lt
      if
        get_local $y
        return
      end
      get_local $x
      get_local $z
      get_local $z
      get_local $x
      f64.lt
      select
    else
      get_local $z
      get_local $y
      f64.lt
      if
        get_local $y
        return
      end
      get_local $x
      get_local $z
      get_local $x
      get_local $z
      f64.lt
      select
    end)

  (func $med3_i32 (param $x i32) (param $y i32) (param $z i32) (result i32)
    get_local $x
    get_local $y
    i32.lt_s
    if i32
      get_local $y
      get_local $z
      i32.lt_s
      if
        get_local $y
        return
      end
      get_local $x
      get_local $z
      get_local $z
      get_local $x
      i32.lt_s
      select
    else
      get_local $z
      get_local $y
      i32.lt_s
      if
        get_local $y
        return
      end
      get_local $x
      get_local $z
      get_local $x
      get_local $z
      i32.lt_s
      select
    end)

  (func $qsort_f64 (param $left i32) (param $right i32)
    (local $i i32)
    (local $j i32)
    (local $pivot f64)

    get_local $left
    get_local $right
    i32.lt_s
    if
      get_local $left
      tee_local $i
      f64.load
      get_local $right
      tee_local $j
      f64.load
      get_local $i
      get_local $j
      get_local $i
      i32.sub
      i32.const 1
      i32.shr_u
      i32.add
      i32.const 0xfffffff8
      i32.and
      f64.load
      call $med3_f64
      set_local $pivot
      loop $cont
        loop
          get_local $i
          f64.load
          get_local $pivot
          f64.lt
          if
            get_local $i
            i32.const 8
            i32.add
            set_local $i
            br 1
          end
        end
        loop
          get_local $pivot
          get_local $j
          f64.load
          f64.lt
          if
            get_local $j
            i32.const 8
            i32.sub
            set_local $j
            br 1
          end
        end
        get_local $i
        get_local $j
        i32.lt_s
        if
          get_local $j
          get_local $i
          f64.load
          get_local $i
          get_local $j
          f64.load
          f64.store
          f64.store
          get_local $i
          i32.const 8
          i32.add
          set_local $i
          get_local $j
          i32.const 8
          i32.sub
          set_local $j
          br $cont
        end
      end
      get_local $left
      get_local $i
      i32.const 8
      i32.sub
      call $qsort_f64
      get_local $j
      i32.const 8
      i32.add
      get_local $right
      call $qsort_f64
    end)

  (func (export "qsort_f64") (param i32 i32)
    get_local 0
    i32.const 3
    i32.shl
    get_local 1
    i32.const 1
    i32.sub
    i32.const 3
    i32.shl
    call $qsort_f64)

  (func $qsort_i32 (param $left i32) (param $right i32)
    (local $i i32)
    (local $j i32)
    (local $pivot i32)

    get_local $left
    get_local $right
    i32.lt_s
    if
      get_local $left
      tee_local $i
      i32.load
      get_local $right
      tee_local $j
      i32.load
      get_local $i
      get_local $j
      get_local $i
      i32.sub
      i32.const 1
      i32.shr_u
      i32.add
      i32.const 0xfffffffc
      i32.and
      i32.load
      call $med3_i32
      set_local $pivot
      loop $cont
        loop
          get_local $i
          i32.load
          get_local $pivot
          i32.lt_s
          if
            get_local $i
            i32.const 4
            i32.add
            set_local $i
            br 1
          end
        end
        loop
          get_local $pivot
          get_local $j
          i32.load
          i32.lt_s
          if
            get_local $j
            i32.const 4
            i32.sub
            set_local $j
            br 1
          end
        end
        get_local $i
        get_local $j
        i32.lt_s
        if
          get_local $j
          get_local $i
          i32.load
          get_local $i
          get_local $j
          i32.load
          i32.store
          i32.store
          get_local $i
          i32.const 4
          i32.add
          set_local $i
          get_local $j
          i32.const 4
          i32.sub
          set_local $j
          br $cont
        end
      end
      get_local $left
      get_local $i
      i32.const 4
      i32.sub
      call $qsort_i32
      get_local $j
      i32.const 4
      i32.add
      get_local $right
      call $qsort_i32
    end)

  (func (export "qsort_i32") (param i32 i32)
    get_local 0
    i32.const 2
    i32.shl
    get_local 1
    i32.const 1
    i32.sub
    i32.const 2
    i32.shl
    call $qsort_i32)
)

Benchmark

Benchmark用コード

const mem = new WebAssembly.Memory({ initial: 256 });
let f64arr = new Float64Array(mem.buffer);
let i32arr = new Int32Array(mem.buffer);

// see http://indiegamr.com/generate-repeatable-random-numbers-in-js/
// the initial seed
Math.seed = 6;

// in order to work 'Math.seed' must NOT be undefined,
// so in any case, you HAVE to provide a Math.seed
Math.seededRandom = (max, min) => {
  max = max || 1;
  min = min || 0;

  Math.seed = (Math.seed * 9301 + 49297) % 233280;
  const rnd = Math.seed / 233280;

  return min + rnd * (max - min);
};

function randomizeArray(arr, max, min) {
  Math.seed = 6;
  arr.forEach((x, i) => arr[i] = Math.seededRandom(max, min));
}

const randomizeF64Array = () => randomizeArray(f64arr, 1, -1);
const randomizeI32Array = () => randomizeArray(i32arr, 100000000,-100000000);

function test(arr) {
  for (let i = 0, n = arr.length - 1; i < n; ++i) {
    if (arr[i] > arr[i + 1]) {
      console.log("fail");
      return;
    }
  }
  console.log("ok");
}

function med3(x, y, z) {
  if (x < y) {
    if (y < z) return y; else if (z < x) return x; else return z;
  } else {
    if (z < y) return y; else if (x < z) return x; else return z;
  }
}

function _qsort_64(left, right) {
  if (left < right) {
    let i = left, j = right, tmp, pivot = med3(f64arr[i], f64arr[j], f64arr[i + ((j - i) >>> 1)]);
    while (1) {
      while (f64arr[i] < pivot) i++;
      while (pivot < f64arr[j]) j--;
      if (i >= j) break;
      tmp = f64arr[i]; f64arr[i] = f64arr[j]; f64arr[j] = tmp;
      i++; j--;
    }
    _qsort_64(left, i - 1);
    _qsort_64(j + 1, right);
  }
}

function qsort_f64(start, end) {
  return _qsort_64(start, end - 1);
}

function _qsort_i32(left, right) {
  if (left < right) {
    let i = left, j = right, tmp, pivot = med3(i32arr[i], i32arr[j], i32arr[i + ((j - i) >>> 1)]);
    while (1) {
      while (i32arr[i] < pivot) i++;
      while (pivot < i32arr[j]) j--;
      if (i >= j) break;
      tmp = i32arr[i]; i32arr[i] = i32arr[j]; i32arr[j] = tmp;
      i++; j--;
    }
    _qsort_i32(left, i - 1);
    _qsort_i32(j + 1, right);
  }
}

function qsort_i32(start, end) {
  return _qsort_i32(start, end - 1);
}

fetch('qsort.wasm').then(res => res.arrayBuffer()).then(buffer => {
  return WebAssembly.instantiate(buffer, {
    js: { mem }
  });
}).then(({ module, instance }) => {
  [100, 100000, undefined].forEach(x => {
    f64arr = new Float64Array(mem.buffer, 0, x);
    i32arr = new Int32Array(mem.buffer, 0, x);

    new Benchmark.Suite()
    .add(`[js qsort_f64 ${f64arr.length}items]`, () => qsort_f64(0, f64arr.length))
    .add(`[wasm qsort_f64 ${f64arr.length}items]`, () => instance.exports.qsort_f64(0, f64arr.length))
    .on('start', randomizeF64Array)
    .on('cycle', event => {
      result.appendChild(document.createTextNode(String(event.target) + '\n'));
    })
    .run();
    result.appendChild(document.createTextNode('--------------------------------\n'));
    new Benchmark.Suite()
    .add(`[js qsort_i32 ${i32arr.length}items]`, () => qsort_i32(0, i32arr.length))
    .add(`[wasm qsort_i32 ${i32arr.length}items]`, () => instance.exports.qsort_i32(0, i32arr.length))
    .on('start', randomizeI32Array)
    .on('cycle', event => {
      result.appendChild(document.createTextNode(String(event.target) + '\n'));
    })
    .run();
    result.appendChild(document.createTextNode('--------------------------------\n'));
  });
});

結果

  • OS: OS X El Capitan 10.11.4
  • CPU: 2.4CHz Intel Core i5
  • Browser: 57.0.2987.110 (64-bit)
[js qsort_f64 100items] x 428,842 ops/sec ±3.04% (56 runs sampled)
[wasm qsort_f64 100items] x 687,530 ops/sec ±1.25% (47 runs sampled)
--------------------------------
[js qsort_i32 100items] x 483,102 ops/sec ±1.30% (57 runs sampled)
[wasm qsort_i32 100items] x 692,450 ops/sec ±3.32% (55 runs sampled)
--------------------------------
[js qsort_f64 100000items] x 236 ops/sec ±1.99% (56 runs sampled)
[wasm qsort_f64 100000items] x 383 ops/sec ±0.91% (57 runs sampled)
--------------------------------
[js qsort_i32 100000items] x 267 ops/sec ±1.44% (50 runs sampled)
[wasm qsort_i32 100000items] x 386 ops/sec ±2.57% (48 runs sampled)
--------------------------------
[js qsort_f64 2097152items] x 7.98 ops/sec ±3.78% (24 runs sampled)
[wasm qsort_f64 2097152items] x 12.16 ops/sec ±2.44% (33 runs sampled)
--------------------------------
[js qsort_i32 4194304items] x 4.47 ops/sec ±1.00% (15 runs sampled)
[wasm qsort_i32 4194304items] x 6.40 ops/sec ±1.17% (20 runs sampled)
--------------------------------

jsでほぼ同条件になるように実装してみたら結構速いですね。JITが効いてるのかな?

10
6
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
10
6