More than 5 years have passed since last update.

SHA-256 をつくる

Last updated at Posted at 2017-12-15

暗号の世界はわかりづらい。3DES は DES より強力だが 2DES は大した強度にならないことが知られていたりする。

Wikipedia によると「SHA-2は、Secure Hash Algorithmシリーズの暗号学的ハッシュ関数」となっている。計算方法は単純なので Python での実装はそう難しくない。


from polyphony import testbench, module, is_worker_running
from polyphony import rule

_k = [0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
      0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
      0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
      0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
      0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
      0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
      0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
      0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
      0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
      0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
      0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
      0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
      0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
      0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
      0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
      0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2]

_h = [0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
      0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19]

def rotr(x, y):
    return ((x >> y) | (x << (32 - y))) & 0xFFFFFFFF

def _sha256(msg, _h, w):
    for i in range(16):
        w[i] = msg[i]

    for i in range(16, 64):
        wi_15 = w[i - 15]
        s0 = rotr(wi_15, 7) ^ rotr(wi_15, 18) ^ (wi_15 >> 3)
        wi_2 = w[i - 2]
        s1 = rotr(wi_2, 17) ^ rotr(wi_2, 19) ^ (wi_2 >> 10)
        wi_16 = w[i - 16]
        wi_7 = w[i - 7]
        w[i] = (wi_16 + s0 + wi_7 + s1) & 0xFFFFFFFF

    a = _h[0]
    b = _h[1]
    c = _h[2]
    d = _h[3]
    e = _h[4]
    f = _h[5]
    g = _h[6]
    h = _h[7]

    for i in range(64):
        s0 = rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22)
        maj = (a & b) ^ (a & c) ^ (b & c)
        t2 = s0 + maj
        s1 = rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25)
        ch = (e & f) ^ ((~e) & g)
        t1 = h + s1 + ch + _k[i] + w[i]

        h = g
        g = f
        f = e
        e = (d + t1) & 0xFFFFFFFF
        d = c
        c = b
        b = a
        a = (t1 + t2) & 0xFFFFFFFF

    _lst = [a, b, c, d, e, f, g, h]

    for i in range(8):
        _h[i] = (_h[i] + _lst[i]) & 0xFFFFFFFF

def sha256(msg, h):
    for i in range(len(_h)):
        h[i] = _h[i]
    work = [None] * 64
    _sha256(msg, h, work)

    tail_blk = [0] * 16
    tail_blk[0] = 0x80000000
    tail_blk[15] = 0x00000200
    _sha256(tail_blk, h, work)

def test():
    msg_lst = [0x61616161] * 16
    h = [None] * 8
    sha256(msg_lst, h)
    for i in h:
#        print('R   {:08x}'.format(i))


sha256 を実際に作ってみてわかるのは並列計算ができない点にある。1つのブロックを全部計算してからでないと次のブロックの計算ができない。暗号に関連するコードだけにそのように設計されているようだ。したがって、処理時間は単純にブロック数に比例するだろう。

ソース上は 32bit の配列(リスト)をつかうようになっている。Polyphony では 32bit より広いビット数の値を扱うことが出来る。デフォルトでは 128bit まで扱えるが、無理矢理、コンパイラの中身をちょっと変えることで 256bit や 512bit も扱うことが出来る。

from polyphony import testbench, module, is_worker_running
from polyphony.typing import bit, bit512, bit256, bit32, uint3, uint4, List
from polyphony.io import Port, Queue
from polyphony.timing import clksleep, clkfence, wait_rising, wait_falling
from polyphony import rule

k = [0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
     0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
     0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
     0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
     0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
     0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
     0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
     0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
     0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
     0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
     0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
     0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
     0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
     0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
     0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
     0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2]

h = [0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
     0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19]

def bit32x8_bit256(lst:List[bit32])->bit256:
    rv256 = 0
    for i in range(8):
        rv256 <<= 32
        rv256 |= lst[i]
    return rv256

def bit32x16_bit512(lst:List[bit32], start_i = 0)->bit512:
    rv512 = 0
    #print('start_i', start_i)
    for i in range(16):
        rv512 <<= 32
        rv512 |= lst[start_i]
        start_i += 1
    #print('end start_i', start_i)
    return rv512

def rotr(x, y):
    return ((x >> y) | (x << (32 - y))) & 0xFFFFFFFF

class sha256:
    def __init__(self):
        self.data_in = Queue(bit512, 'in')
        self.data_out = Queue(bit256, 'out')

    def process_sha256(self):
        work = [0] * 64   # type: List[bit32]
        _h = [0] * 8    # type: List[bit32]
        __h = [0] * 8  # type: List[bit32]

        while is_worker_running():
            update = True

            for i in range(8):
                _h[i] = h[i]

            block_len512:bit512 = self.data_in.rd()
            block_len32 = block_len512
            count = 0

            while count < block_len32:
                #print(count, block_len32)
                count += 1
                d512 = self.data_in.rd()
                shift_n = 480

                with rule(unroll='full'):
                    for i in range(16):
                        work[i] = (d512 >> shift_n) & 0xFFFFFFFF
                        shift_n -= 32

                for i in range(16, 64):
                    wi_15 = work[i - 15]
                    s0 = rotr(wi_15, 7) ^ rotr(wi_15, 18) ^ (wi_15 >> 3)
                    wi_2 = work[i - 2]
                    s1 = rotr(wi_2, 17) ^ rotr(wi_2, 19) ^ (wi_2 >> 10)
                    wi_16 = work[i - 16]
                    wi_7 = work[i - 7]
                    work[i] = (wi_16 + s0 + wi_7 + s1) & 0xFFFFFFFF

                for i in range(8):
                    __h[i] = _h[i]

                for i in range(64):
                    s0 = rotr(__h[0], 2) ^ rotr(__h[0], 13) ^ rotr(__h[0], 22)
                    maj = (__h[0] & __h[1]) ^ (__h[0] & __h[2]) ^ (__h[1] & __h[2])
                    t2 = s0 + maj
                    s1 = rotr(__h[4], 6) ^ rotr(__h[4], 11) ^ rotr(__h[4], 25)
                    ch = (__h[4] & __h[5]) ^ ((~__h[4]) & __h[6])
                    t1 = __h[7] + s1 + ch + k[i] + work[i]

                    __h[7] = __h[6]
                    __h[6] = __h[5]
                    __h[5] = __h[4]
                    __h[4] = (__h[3] + t1) & 0xFFFFFFFF
                    __h[3] = __h[2]
                    __h[2] = __h[1]
                    __h[1] = __h[0]
                    __h[0] = (t1 + t2) & 0xFFFFFFFF

                with rule(unroll='full'):
                    for i in range(8):
                        _h[i] = (_h[i] + __h[i]) & 0xFFFFFFFF


def test(m):
    lst = [0x61616161] * 16
    blen = len(lst)
    blocks = ((blen * 4 + 5) + 63) // 64
    print("blocks", blocks)

    start_i = 0
    for i in range(blocks - 1):
        v512= bit32x16_bit512(lst, start_i)
        print('v512', v512)
        start_i += 16

    #send last block
    last_block = [0] * 16
    for i in range(blen - start_i):
        last_block[i] = lst[start_i]
        start_i += 1
    last_block[blen - start_i] = 0x80000000
    last_block[15] = (blocks << 8)

    v512_last = bit32x16_bit512(last_block)

    v256:bit256 = m.data_out.rd()
    print('sha256', v256)
    #print('R   {:032x}'.format(v256))


今度のソースはちゃんとブロックも考慮するようにした。現状では Python の format が使えないのでその部分はコメントアウトされている。Python でのデバッグ時に有効にすれば、実際の SHA-256 と比較出来て便利だろう。

#おまけ 256bit と 512bit 追加

256bit と 512bit 用のパッチをおまけとしてつけておく。

diff --git a/polyphony/_internal/_typing.py b/polyphony/_internal/_typing.py
index c43b648..2cb800f 100644
--- a/polyphony/_internal/_typing.py
+++ b/polyphony/_internal/_typing.py
@@ -52,6 +52,8 @@ __all__ = [
     'uint121', 'uint122', 'uint123', 'uint124', 'uint125', 'uint126', 'uint127', 'uint128',
+    'bit512',
+    'bit256',
 class bit: pass
@@ -183,6 +185,9 @@ class bit126: pass
 class bit127: pass
 class bit128: pass
+class bit256: pass
+class bit512: pass
 class int2: pass
 class int3: pass
 class int4: pass
diff --git a/polyphony/typing.py b/polyphony/typing.py
index 6f3078d..08c0dd9 100644
--- a/polyphony/typing.py
+++ b/polyphony/typing.py
@@ -176,6 +176,8 @@ class bit125(int_base): pass
 class bit126(int_base): pass
 class bit127(int_base): pass
 class bit128(int_base): pass
+class bit256(int_base): pass
+class bit512(int_base): pass
 class int2(int_base): pass
 class int3(int_base): pass

