Python
FPGA
Polyphony
More than 1 year has passed since last update.

暗号の世界はわかりづらい。3DES は DES より強力だが 2DES は大した強度にならないことが知られていたりする。

Wikipedia によると「SHA-2は、Secure Hash Algorithmシリーズの暗号学的ハッシュ関数」となっている。計算方法は単純なので Python での実装はそう難しくない。

終端ブロックに完全に対応しておらず、いささかやっつけ感があるがソースを示そう。

sha256_func.py
from polyphony import testbench, module, is_worker_running
from polyphony import rule

_k = [0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
      0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
      0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
      0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
      0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
      0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
      0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
      0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
      0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
      0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
      0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
      0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
      0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
      0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
      0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
      0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2]

_h = [0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
      0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19]

def rotr(x, y):
    return ((x >> y) | (x << (32 - y))) & 0xFFFFFFFF

def _sha256(msg, _h, w):
    for i in range(16):
        w[i] = msg[i]

    for i in range(16, 64):
        wi_15 = w[i - 15]
        s0 = rotr(wi_15, 7) ^ rotr(wi_15, 18) ^ (wi_15 >> 3)
        wi_2 = w[i - 2]
        s1 = rotr(wi_2, 17) ^ rotr(wi_2, 19) ^ (wi_2 >> 10)
        wi_16 = w[i - 16]
        wi_7 = w[i - 7]
        w[i] = (wi_16 + s0 + wi_7 + s1) & 0xFFFFFFFF

    a = _h[0]
    b = _h[1]
    c = _h[2]
    d = _h[3]
    e = _h[4]
    f = _h[5]
    g = _h[6]
    h = _h[7]

    for i in range(64):
        s0 = rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22)
        maj = (a & b) ^ (a & c) ^ (b & c)
        t2 = s0 + maj
        s1 = rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25)
        ch = (e & f) ^ ((~e) & g)
        t1 = h + s1 + ch + _k[i] + w[i]

        h = g
        g = f
        f = e
        e = (d + t1) & 0xFFFFFFFF
        d = c
        c = b
        b = a
        a = (t1 + t2) & 0xFFFFFFFF

    _lst = [a, b, c, d, e, f, g, h]

    for i in range(8):
        _h[i] = (_h[i] + _lst[i]) & 0xFFFFFFFF

def sha256(msg, h):
    for i in range(len(_h)):
        h[i] = _h[i]
    work = [None] * 64
    _sha256(msg, h, work)

    tail_blk = [0] * 16
    tail_blk[0] = 0x80000000
    tail_blk[15] = 0x00000200
    _sha256(tail_blk, h, work)

@testbench
def test():
    msg_lst = [0x61616161] * 16
    h = [None] * 8
    sha256(msg_lst, h)
    for i in h:
        print(i)
#        print('R   {:08x}'.format(i))

test()

sha256 を実際に作ってみてわかるのは並列計算ができない点にある。1つのブロックを全部計算してからでないと次のブロックの計算ができない。暗号に関連するコードだけにそのように設計されているようだ。したがって、処理時間は単純にブロック数に比例するだろう。

ソース上は 32bit の配列(リスト)をつかうようになっている。Polyphony では 32bit より広いビット数の値を扱うことが出来る。デフォルトでは 128bit まで扱えるが、無理矢理、コンパイラの中身をちょっと変えることで 256bit や 512bit も扱うことが出来る。

sha256.py
from polyphony import testbench, module, is_worker_running
from polyphony.typing import bit, bit512, bit256, bit32, uint3, uint4, List
from polyphony.io import Port, Queue
from polyphony.timing import clksleep, clkfence, wait_rising, wait_falling
from polyphony import rule

k = [0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
     0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
     0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
     0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
     0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
     0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
     0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
     0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
     0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
     0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
     0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
     0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
     0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
     0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
     0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
     0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2]

h = [0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
     0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19]

def bit32x8_bit256(lst:List[bit32])->bit256:
    rv256:bit256
    rv256 = 0
    for i in range(8):
        rv256 <<= 32
        rv256 |= lst[i]
    return rv256

def bit32x16_bit512(lst:List[bit32], start_i = 0)->bit512:
    rv512:bit512
    rv512 = 0
    #print('start_i', start_i)
    for i in range(16):
        rv512 <<= 32
        rv512 |= lst[start_i]
        start_i += 1
    #print('end start_i', start_i)
    return rv512

def rotr(x, y):
    return ((x >> y) | (x << (32 - y))) & 0xFFFFFFFF

@module
class sha256:
    def __init__(self):
        self.data_in = Queue(bit512, 'in')
        self.data_out = Queue(bit256, 'out')
        self.append_worker(self.process_sha256)

    def process_sha256(self):
        work = [0] * 64   # type: List[bit32]
        _h = [0] * 8    # type: List[bit32]
        __h = [0] * 8  # type: List[bit32]

        while is_worker_running():
            update = True

            for i in range(8):
                _h[i] = h[i]

            block_len512:bit512 = self.data_in.rd()
            block_len32 = block_len512
            count = 0
            #print(block_len512)
            #print(block_len32)

            while count < block_len32:
                #print(count, block_len32)
                count += 1
                #print("--=========")
                d512 = self.data_in.rd()
                shift_n = 480

                with rule(unroll='full'):
                    for i in range(16):
                        work[i] = (d512 >> shift_n) & 0xFFFFFFFF
                        shift_n -= 32

                for i in range(16, 64):
                    wi_15 = work[i - 15]
                    s0 = rotr(wi_15, 7) ^ rotr(wi_15, 18) ^ (wi_15 >> 3)
                    wi_2 = work[i - 2]
                    s1 = rotr(wi_2, 17) ^ rotr(wi_2, 19) ^ (wi_2 >> 10)
                    wi_16 = work[i - 16]
                    wi_7 = work[i - 7]
                    work[i] = (wi_16 + s0 + wi_7 + s1) & 0xFFFFFFFF

                for i in range(8):
                    __h[i] = _h[i]

                for i in range(64):
                    s0 = rotr(__h[0], 2) ^ rotr(__h[0], 13) ^ rotr(__h[0], 22)
                    maj = (__h[0] & __h[1]) ^ (__h[0] & __h[2]) ^ (__h[1] & __h[2])
                    t2 = s0 + maj
                    s1 = rotr(__h[4], 6) ^ rotr(__h[4], 11) ^ rotr(__h[4], 25)
                    ch = (__h[4] & __h[5]) ^ ((~__h[4]) & __h[6])
                    t1 = __h[7] + s1 + ch + k[i] + work[i]

                    __h[7] = __h[6]
                    __h[6] = __h[5]
                    __h[5] = __h[4]
                    __h[4] = (__h[3] + t1) & 0xFFFFFFFF
                    __h[3] = __h[2]
                    __h[2] = __h[1]
                    __h[1] = __h[0]
                    __h[0] = (t1 + t2) & 0xFFFFFFFF

                with rule(unroll='full'):
                    for i in range(8):
                        _h[i] = (_h[i] + __h[i]) & 0xFFFFFFFF

            self.data_out.wr(bit32x8_bit256(_h))

@testbench
def test(m):
    lst = [0x61616161] * 16
    blen = len(lst)
    blocks = ((blen * 4 + 5) + 63) // 64
    print("blocks", blocks)

    start_i = 0
    m.data_in.wr(blocks)
    for i in range(blocks - 1):
        v512= bit32x16_bit512(lst, start_i)
        print('v512', v512)
        m.data_in.wr(v512)
        start_i += 16

    #send last block
    last_block = [0] * 16
    for i in range(blen - start_i):
        last_block[i] = lst[start_i]
        start_i += 1

    last_block[blen - start_i] = 0x80000000
    last_block[15] = (blocks << 8)

    v512_last = bit32x16_bit512(last_block)
    #print(v512_last)
    m.data_in.wr(v512_last)

    v256:bit256 = m.data_out.rd()
    print('sha256', v256)
    #print('R   {:032x}'.format(v256))

m=sha256()
test(m)

今度のソースはちゃんとブロックも考慮するようにした。現状では Python の format が使えないのでその部分はコメントアウトされている。Python でのデバッグ時に有効にすれば、実際の SHA-256 と比較出来て便利だろう。

おまけ 256bit と 512bit 追加

256bit と 512bit 用のパッチをおまけとしてつけておく。

diff --git a/polyphony/_internal/_typing.py b/polyphony/_internal/_typing.py
index c43b648..2cb800f 100644
--- a/polyphony/_internal/_typing.py
+++ b/polyphony/_internal/_typing.py
@@ -52,6 +52,8 @@ __all__ = [
     'uint121', 'uint122', 'uint123', 'uint124', 'uint125', 'uint126', 'uint127', 'uint128',
     'List',
     'Tuple',
+    'bit512',
+    'bit256',
 ]

 class bit: pass
@@ -183,6 +185,9 @@ class bit126: pass
 class bit127: pass
 class bit128: pass

+class bit256: pass
+class bit512: pass
+
 class int2: pass
 class int3: pass
 class int4: pass
diff --git a/polyphony/typing.py b/polyphony/typing.py
index 6f3078d..08c0dd9 100644
--- a/polyphony/typing.py
+++ b/polyphony/typing.py
@@ -176,6 +176,8 @@ class bit125(int_base): pass
 class bit126(int_base): pass
 class bit127(int_base): pass
 class bit128(int_base): pass
+class bit256(int_base): pass
+class bit512(int_base): pass

 class int2(int_base): pass
 class int3(int_base): pass