暗号の世界はわかりづらい。3DES は DES より強力だが 2DES は大した強度にならないことが知られていたりする。
Wikipedia によると「SHA-2は、Secure Hash Algorithmシリーズの暗号学的ハッシュ関数」となっている。計算方法は単純なので Python での実装はそう難しくない。
終端ブロックに完全に対応しておらず、いささかやっつけ感があるがソースを示そう。
from polyphony import testbench, module, is_worker_running
from polyphony import rule
_k = [0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2]
_h = [0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19]
def rotr(x, y):
return ((x >> y) | (x << (32 - y))) & 0xFFFFFFFF
def _sha256(msg, _h, w):
for i in range(16):
w[i] = msg[i]
for i in range(16, 64):
wi_15 = w[i - 15]
s0 = rotr(wi_15, 7) ^ rotr(wi_15, 18) ^ (wi_15 >> 3)
wi_2 = w[i - 2]
s1 = rotr(wi_2, 17) ^ rotr(wi_2, 19) ^ (wi_2 >> 10)
wi_16 = w[i - 16]
wi_7 = w[i - 7]
w[i] = (wi_16 + s0 + wi_7 + s1) & 0xFFFFFFFF
a = _h[0]
b = _h[1]
c = _h[2]
d = _h[3]
e = _h[4]
f = _h[5]
g = _h[6]
h = _h[7]
for i in range(64):
s0 = rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22)
maj = (a & b) ^ (a & c) ^ (b & c)
t2 = s0 + maj
s1 = rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25)
ch = (e & f) ^ ((~e) & g)
t1 = h + s1 + ch + _k[i] + w[i]
h = g
g = f
f = e
e = (d + t1) & 0xFFFFFFFF
d = c
c = b
b = a
a = (t1 + t2) & 0xFFFFFFFF
_lst = [a, b, c, d, e, f, g, h]
for i in range(8):
_h[i] = (_h[i] + _lst[i]) & 0xFFFFFFFF
def sha256(msg, h):
for i in range(len(_h)):
h[i] = _h[i]
work = [None] * 64
_sha256(msg, h, work)
tail_blk = [0] * 16
tail_blk[0] = 0x80000000
tail_blk[15] = 0x00000200
_sha256(tail_blk, h, work)
@testbench
def test():
msg_lst = [0x61616161] * 16
h = [None] * 8
sha256(msg_lst, h)
for i in h:
print(i)
# print('R {:08x}'.format(i))
test()
sha256 を実際に作ってみてわかるのは並列計算ができない点にある。1つのブロックを全部計算してからでないと次のブロックの計算ができない。暗号に関連するコードだけにそのように設計されているようだ。したがって、処理時間は単純にブロック数に比例するだろう。
ソース上は 32bit の配列(リスト)をつかうようになっている。Polyphony では 32bit より広いビット数の値を扱うことが出来る。デフォルトでは 128bit まで扱えるが、無理矢理、コンパイラの中身をちょっと変えることで 256bit や 512bit も扱うことが出来る。
from polyphony import testbench, module, is_worker_running
from polyphony.typing import bit, bit512, bit256, bit32, uint3, uint4, List
from polyphony.io import Port, Queue
from polyphony.timing import clksleep, clkfence, wait_rising, wait_falling
from polyphony import rule
k = [0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2]
h = [0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19]
def bit32x8_bit256(lst:List[bit32])->bit256:
rv256:bit256
rv256 = 0
for i in range(8):
rv256 <<= 32
rv256 |= lst[i]
return rv256
def bit32x16_bit512(lst:List[bit32], start_i = 0)->bit512:
rv512:bit512
rv512 = 0
#print('start_i', start_i)
for i in range(16):
rv512 <<= 32
rv512 |= lst[start_i]
start_i += 1
#print('end start_i', start_i)
return rv512
def rotr(x, y):
return ((x >> y) | (x << (32 - y))) & 0xFFFFFFFF
@module
class sha256:
def __init__(self):
self.data_in = Queue(bit512, 'in')
self.data_out = Queue(bit256, 'out')
self.append_worker(self.process_sha256)
def process_sha256(self):
work = [0] * 64 # type: List[bit32]
_h = [0] * 8 # type: List[bit32]
__h = [0] * 8 # type: List[bit32]
while is_worker_running():
update = True
for i in range(8):
_h[i] = h[i]
block_len512:bit512 = self.data_in.rd()
block_len32 = block_len512
count = 0
#print(block_len512)
#print(block_len32)
while count < block_len32:
#print(count, block_len32)
count += 1
#print("--=========")
d512 = self.data_in.rd()
shift_n = 480
with rule(unroll='full'):
for i in range(16):
work[i] = (d512 >> shift_n) & 0xFFFFFFFF
shift_n -= 32
for i in range(16, 64):
wi_15 = work[i - 15]
s0 = rotr(wi_15, 7) ^ rotr(wi_15, 18) ^ (wi_15 >> 3)
wi_2 = work[i - 2]
s1 = rotr(wi_2, 17) ^ rotr(wi_2, 19) ^ (wi_2 >> 10)
wi_16 = work[i - 16]
wi_7 = work[i - 7]
work[i] = (wi_16 + s0 + wi_7 + s1) & 0xFFFFFFFF
for i in range(8):
__h[i] = _h[i]
for i in range(64):
s0 = rotr(__h[0], 2) ^ rotr(__h[0], 13) ^ rotr(__h[0], 22)
maj = (__h[0] & __h[1]) ^ (__h[0] & __h[2]) ^ (__h[1] & __h[2])
t2 = s0 + maj
s1 = rotr(__h[4], 6) ^ rotr(__h[4], 11) ^ rotr(__h[4], 25)
ch = (__h[4] & __h[5]) ^ ((~__h[4]) & __h[6])
t1 = __h[7] + s1 + ch + k[i] + work[i]
__h[7] = __h[6]
__h[6] = __h[5]
__h[5] = __h[4]
__h[4] = (__h[3] + t1) & 0xFFFFFFFF
__h[3] = __h[2]
__h[2] = __h[1]
__h[1] = __h[0]
__h[0] = (t1 + t2) & 0xFFFFFFFF
with rule(unroll='full'):
for i in range(8):
_h[i] = (_h[i] + __h[i]) & 0xFFFFFFFF
self.data_out.wr(bit32x8_bit256(_h))
@testbench
def test(m):
lst = [0x61616161] * 16
blen = len(lst)
blocks = ((blen * 4 + 5) + 63) // 64
print("blocks", blocks)
start_i = 0
m.data_in.wr(blocks)
for i in range(blocks - 1):
v512= bit32x16_bit512(lst, start_i)
print('v512', v512)
m.data_in.wr(v512)
start_i += 16
#send last block
last_block = [0] * 16
for i in range(blen - start_i):
last_block[i] = lst[start_i]
start_i += 1
last_block[blen - start_i] = 0x80000000
last_block[15] = (blocks << 8)
v512_last = bit32x16_bit512(last_block)
#print(v512_last)
m.data_in.wr(v512_last)
v256:bit256 = m.data_out.rd()
print('sha256', v256)
#print('R {:032x}'.format(v256))
m=sha256()
test(m)
今度のソースはちゃんとブロックも考慮するようにした。現状では Python の format が使えないのでその部分はコメントアウトされている。Python でのデバッグ時に有効にすれば、実際の SHA-256 と比較出来て便利だろう。
#おまけ 256bit と 512bit 追加
256bit と 512bit 用のパッチをおまけとしてつけておく。
diff --git a/polyphony/_internal/_typing.py b/polyphony/_internal/_typing.py
index c43b648..2cb800f 100644
--- a/polyphony/_internal/_typing.py
+++ b/polyphony/_internal/_typing.py
@@ -52,6 +52,8 @@ __all__ = [
'uint121', 'uint122', 'uint123', 'uint124', 'uint125', 'uint126', 'uint127', 'uint128',
'List',
'Tuple',
+ 'bit512',
+ 'bit256',
]
class bit: pass
@@ -183,6 +185,9 @@ class bit126: pass
class bit127: pass
class bit128: pass
+class bit256: pass
+class bit512: pass
+
class int2: pass
class int3: pass
class int4: pass
diff --git a/polyphony/typing.py b/polyphony/typing.py
index 6f3078d..08c0dd9 100644
--- a/polyphony/typing.py
+++ b/polyphony/typing.py
@@ -176,6 +176,8 @@ class bit125(int_base): pass
class bit126(int_base): pass
class bit127(int_base): pass
class bit128(int_base): pass
+class bit256(int_base): pass
+class bit512(int_base): pass
class int2(int_base): pass
class int3(int_base): pass