IEEE 754 - Binary16 形式
負符号を $S$, 指数部を $E$, 小数部を $T$ とすると、ビット パターンは
負符号 | 指数部 $E$ | 小数部 $T$ |
---|---|---|
$S$ | $E_0 E_1 E_2 E_3 E_4$ | $d_1 d_2 d_3 d_4 d_5 d_6 d_7 d_8 d_9 d_{10}$ |
(計 16 ビット)となっていて、各値は
\begin{eqnarray}
S & = & \{ 0, 1 \} \\
E & = & \{ 0,1,2, \cdots,31 \} \\
T & = & \{ 0,1,2, \cdots,1023 \} \\
\end{eqnarray}
の範囲です。$E$ が $0$ の場合は
\begin{eqnarray}
N & = & \left( -1^{S} \right) \times 2^{-15} \times \left( \frac{T}{ 2^{10} } \right) \\
\end{eqnarray}
$E$ が $1$ から $30$ の場合は
\begin{eqnarray}
N & = & \left( -1^{S} \right) \times 2^{E-15} \times \left( 1 + \frac{T}{ 2^{10} } \right) \\
\end{eqnarray}
$E$ が $31$ の場合は
\begin{eqnarray}
T & = & 0 \space \cdots \space 無限大 \\
T & \ne & 0 \space \cdots \space 非数 \\
\end{eqnarray}
で、非数 (NaN) には、沈黙非数 (qNaN) と合図非数 (sNaN) があり
\begin{eqnarray}
d_1 & = & 0 \space \cdots \space sNaN \\
d_1 & = & 1 \space \cdots \space qNaN \\
\end{eqnarray}
を表します。
表現可能な数値
指数部 $E$ によって表現できる数値が変化します。対応表は以下の通り。
+----+----------------------+--------------------------------+-----------------------------+
| E | Start | End | Step |
+----+------- --------------+--------------------------------+-----------------------------+
| 0 | 0.00000000000000 | 0.000060975551605224609375 | 0.000000059604644775390625 |
+----+----------------------+--------------------------------+-----------------------------+
| 1 | 0.00006103515625 | 0.000122010707855224609375 | 0.000000059604644775390625 |
| 2 | 0.0001220703125 | 0.00024402141571044921875 | 0.00000011920928955078125 |
| 3 | 0.000244140625 | 0.0004880428314208984375 | 0.0000002384185791015625 |
| 4 | 0.00048828125 | 0.000976085662841796875 | 0.000000476837158203125 |
| 5 | 0.0009765625 | 0.00195217132568359375 | 0.00000095367431640625 |
| 6 | 0.001953125 | 0.0039043426513671875 | 0.0000019073486328125 |
| 7 | 0.00390625 | 0.007808685302734375 | 0.000003814697265625 |
+----+----------------------+--------------------------------+-----------------------------+
| 8 | 0.0078125 | 0.01561737060546875 | 0.00000762939453125 |
| 9 | 0.015625 | 0.0312347412109375 | 0.0000152587890625 |
| 10 | 0.03125 | 0.062469482421875 | 0.000030517578125 |
| 11 | 0.0625 | 0.12493896484375 | 0.00006103515625 |
| 12 | 0.125 | 0.2498779296875 | 0.0001220703125 |
| 13 | 0.25 | 0.499755859375 | 0.000244140625 |
| 14 | 0.5 | 0.99951171875 | 0.00048828125 |
+----+----------------------+--------------------------------+-----------------------------+
| 15 | 1.0 | 1.9990234375 | 0.0009765625 |
| 16 | 2.0 | 3.998046875 | 0.001953125 |
| 17 | 4.0 | 7.99609375 | 0.00390625 |
| 18 | 8.0 | 15.9921875 | 0.0078125 |
| 19 | 16.0 | 31.984375 | 0.015625 |
| 20 | 32.0 | 63.96875 | 0.03125 |
| 21 | 64.0 | 127.9375 | 0.0625 |
| 22 | 128.0 | 255.875 | 0.125 |
| 23 | 256.0 | 511.75 | 0.25 |
| 24 | 512.0 | 1023.5 | 0.5 |
+----+----------------------+--------------------------------+-----------------------------+
| 25 | 1024.0 | 2047.0 | 1.0 |
| 26 | 2048.0 | 4094.0 | 2.0 |
| 27 | 4096.0 | 8188.0 | 4.0 |
| 28 | 8192.0 | 16376.0 | 8.0 |
| 29 | 16384.0 | 32752.0 | 16.0 |
| 30 | 32768.0 | 65504.0 | 32.0 |
+----+--------------------------------+--------------------------------+-----------------------------+
プログラム
内部では float 型で保持し、16bit データとの変換(丸めなしの切り捨て)以外は float 型任せです。
float16.py
#!/usr/bin/env python3
import math
import numbers
def binary16_to_float(binary):
sign = (binary >> 15) & 1
exponent = (binary >> 10) & 0x1f
significand = binary & 0x3ff
if exponent == 31:
aval = math.nan if significand else math.inf
else:
if exponent != 0:
significand |= 0x400
exponent -= 1
aval = significand * pow(2.0, exponent - 24)
return -aval if sign else aval
def float_to_binary16(value):
if not isinstance(value, float):
value = float(value)
if math.isnan(value):
# return 0x7dff # sNaN
return 0x7fff # qNaN
sign = 0x8000 if value.hex()[0] == '-' else 0
if math.isinf(value):
return sign | 0x7c00
if value == 0:
return sign
significand, exponent = math.frexp(abs(value))
exponent += 14
if exponent <= 0:
significand *= pow(2.0, exponent + 10)
exponent = 0
elif exponent < 31:
significand *= 2048.0
else:
exponent = 31
significand = 0
return sign | (exponent << 10) | (math.trunc(significand) & 0x3ff)
class Float16(numbers.Real):
TABLE = tuple(binary16_to_float(n) for n in range(1 << 16))
frombinary = binary16_to_float
tobinary = float_to_binary16
def __init__(self, *args):
self.value = Float16.TABLE[Float16.tobinary(float(*args))]
def __hash__(self): return hash(self.value)
def __repr__(self): return f'Float16({repr(self.value)})'
def __format__(self, format_spec): return format(self.value, format_spec)
def __lt__(self, rhs): return self.value < rhs
def __le__(self, rhs): return self.value <= rhs
def __eq__(self, rhs): return self.value == rhs
def __ge__(self, rhs): return self.value >= rhs
def __gt__(self, rhs): return self.value > rhs
def __bool__(self): return bool(self.value)
def __float__(self): return self.value
def __abs__(self): return Float16(abs(self.value))
def __neg__(self): return Float16(-self.value)
def __pos__(self): return Float16(self.value)
def __ceil__(self): return math.ceil(self.value)
def __floor__(self): return math.floor(self.value)
def __trunc__(self): return math.trunc(self.value)
def __round__(self, ndigits=None):
rval = round(self.value, ndigits)
return rval if ndigits is None else Float16(rval)
def __add__(self, rhs): return self.value + rhs
def __mul__(self, rhs): return self.value * rhs
def __pow__(self, rhs): return pow(self.value, rhs)
def __floordiv__(self, rhs): return self.value // rhs
def __truediv__(self, rhs): return self.value / rhs
def __mod__(self, rhs): return self.value % rhs
def __divmod__(self, rhs): return divmod(self.value, rhs)
def __radd__(self, lhs): return lhs + self.value
def __rmul__(self, lhs): return lhs * self.value
def __rpow__(self, lhs): return pow(lhs, self.value)
def __rfloordiv__(self, lhs): return lhs // self.value
def __rtruediv__(self, lhs): return lhs / self.value
def __rmod__(self, lhs): return lhs % self.value
def __iadd__(self, rhs):
self.value = float(Float16(self.value + rhs))
return self
def __isub__(self, rhs):
self.value = float(Float16(self.value - rhs))
return self
def __imul__(self, rhs):
self.value = float(Float16(self.value * rhs))
return self
def __ipow__(self, rhs):
self.value = float(Float16(pow(self.value, rhs)))
return self
def __ifloordiv__(self, rhs):
self.value = float(Float16(self.value // rhs))
return self
def __itruediv__(self, rhs):
self.value = float(Float16(self.value / rhs))
return self
def __imod__(self, rhs):
self.value = float(Float16(self.value % rhs))
return self
def hex(self): return self.value.hex()
def is_integer(self): return self.value.is_integer()
def binary(self): return Float16.tobinary(self.value)
@staticmethod
def fromhex(string):
return Float16(float.fromhex(string))
実行結果
>>> from float16 import Float16
>>> x = Float16(1.0)
>>> x
Float16(1.0)
>>> x.binary()
15360
>>> hex(x.binary())
'0x3c00'
>>> bin(x.binary())
'0b11110000000000'
>>> x + 1/(1<<11)
1.00048828125
>>> x += 1/(1<<11)
>>> x
Float16(1.0)
>>> x + 1/(1<<10)
1.0009765625
>>> x += 1/(1<<10)
>>> x
Float16(1.0009765625)
>>> x.binary()
15361
>>> hex(x.binary())
'0x3c01'
>>> bin(x.binary())
'0b11110000000001'
>>> x = Float16(1.0)
>>> x /= 1/(1<<16)
>>> x
Float16(inf)
>>> x.binary()
31744
>>> hex(x.binary())
'0x7c00'
>>> bin(x.binary())
'0b111110000000000'