はじめに
AES(Advanced Encryption Standard)について説明します。
[1]で標準化されています。
暗号は 4 word 単位で行います。word数をNbと表記します。
※ 1 word = 4 byte = 32 bit です。
鍵長は 4 or 6 or 8 word です。word数をNkと表記します。
鍵長に応じてRound数(Nr)が決まります。次の通りです。
Key(Nk) | Round(Nr) | |
---|---|---|
AES-128 | 4 | 10 |
AES-192 | 6 | 12 |
AES-256 | 8 | 14 |
input / state / output / key
暗号処理において、暗号対象のデータをinputと表します。暗号化されたデータをoutputと表します。
復号処理において、復号対象のデータをinputと表します。復号されたデータをoutputと表します。
暗号・復号それぞれにおいて、処理中のデータをstateと表します。
inputをin0, in1, ... in15 と表します。
outputをout0, out1, ... out15 と表します。
stateをs0,0 s1,0 s2,0 s3,0 s0,1 s1,1 s2,1 s3,1 s0,2 s1,2 s2,2 s3,2 s0,3 s1,3 s2,3 s3,3 と表します。
keyをK0, K1, ... K(4Nk-1) と表します。
上記はbyte単位で低いアドレスから高いアドレスの順に並べます。
input / state / output を 4x4 行列と考えます。
1要素が1byteです。1列が1wordです。
後で説明するShiftRows / MixColumns に現れるrow, columnは上記行列で考えます。
暗号(Cipher)
暗号の疑似コードを示します。
state = in;
AddRoundKey(state, 0);
for (i=1; i<Nr; i++) {
SubBytes(state);
ShiftRows(state);
MixColumns(state);
AddRoundKey(state, i);
}
SubBytes(state);
ShiftRows(state);
AddRoundKey(state, Nr);
out = state;
最初にAddRoundKeyを行います。
次に以下を(Nr-1)回繰り返します。
- SubBytes
- ShiftRows
- MixColumns
- AddRoundKey
最後に以下を実行します。MixColumnsは実行しません。
- SubBytes
- ShiftRows
- AddRoundKey
復号(InvCipher)
復号の疑似コードを示します。暗号の逆処理になっています。
state = in;
AddRoundKey(state, Nr);
for (i=Nr-1; 1<=i; i--) {
InvShiftRows(state);
InvSubBytes(state);
AddRoundKey(state, i);
InvMixColumns(state);
}
InvShiftRows(state);
InvSubBytes(state);
AddRoundKey(state, 0);
out = state;
最初にAddRoundKeyを行います。
次に以下を(Nr-1)回繰り返します。
- InvShiftRows
- InvSubBytes
- AddRoundKey
- InvMixColumns
最後に以下を実行します。InvMixColumnsは実行しません。
- InvShiftRows
- InvSubBytes
- AddRoundKey
AddRoundKey
AddRoundKeyはword毎にround keyとstateのXORを取ります。
SubBytes / InvSubBytes
SubBytesはbyte単位の置換です。置換表をS-Boxといいます。
InvSubBytesはSubBytesの逆の置換を行います。置換表をInverse S-Boxといいます。
具体的な置換表はCode exampleを参照してください。
ShiftRows / InvShiftRows
ShiftRowsは次のようにデータを入れ替えます。
2行目を1byte左側に回転します。3行目を2byte左側に回転します。4行目を3byte左側に回転します。
InvShiftRowsは次のようにデータを入れ替えます。
2行目を1byte右側に回転します。3行目を2byte右側に回転します。4行目を3byte右側に回転します。
MixColumns / InvMixColumns
次の計算を行います。+ は XORです。●は次の既約多項式を法とする多項式の積です。
$x^8 + x^4 + x^3 + x + 1$
Round Key
KeyからRound Keyを作成します。各Round KeyのサイズはNb(4word)です。Nr+1個作成します。
疑似コードを示します。
KeyExpansion(word key[Nk], word w[Nb*(Nr+1)], int Nk) {
for (i=0; i<Nk; i++) {
w[i] = key[i];
}
for (i=Nk; i<Nb*(Nr+1); i++) {
temp = w[i-1];
if (i%Nk == 0) {
temp = SubWord(RotWord(temp)) xor Rcon[i/Nk];
} else if (6 < Nk && i%Nk == 4) {
temp = SubWord(temp);
}
w[i] = w[i-Nk] xor temp
}
}
RotWordは 1wordをbyte単位で左に回転します。word を a0 a1 a2 a3 とすると a1 a2 a3 a0 に変換します。
SubWordは S-Boxによるbyte単位の置換です。
Rconの具体値はCode exampleを参照してください。
AES-128 / AES-192 / AES-256 について、round keyの作成手順を図に示します。
code example - C
MINGW64 の gcc で make します。
CFLAGS=-I. -Wall -Werror -O2 -march=native
INCS=
OBJS=test.c
LIBS=
TARGET=test
all: $(TARGET)
%.o: %.c $(INCS)
$(CC) $(CFLAGS) -c -o $@ $<
$(TARGET): $(OBJS)
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
clean:
rm -rf $(TARGET) *.o
#include <inttypes.h>
#include <stdio.h>
#include <string.h>
#define NK_MAX (8)
#define NR_MAX (14)
enum {
AES128 = 0,
AES192,
AES256,
};
uint8_t aes_type = AES128;
const uint8_t Nb = 4;
struct key_round {
uint8_t Nk;
uint8_t Nr;
} const key_round_table[] = {
{ 4, 10 }, // AES128(0)
{ 6, 12 }, // AES192(1)
{ 8, 14 }, // AES256(2)
};
/* Figure 7. S-box: substitution values for the byte xy (in hexadecimal format). */
const uint8_t sbox[] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
};
/* Figure 14. Inverse S-box: substitution values for the byte xy (in hexadecimal format). */
const uint8_t inv_sbox[] = {
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
};
/* x^(i-1) mod x^8 + x^4 + x^3 + x + 1 */
const uint32_t rcon[] = {
0x00000000, /* invalid */
0x00000001, /* x^0 */
0x00000002, /* x^1 */
0x00000004, /* x^2 */
0x00000008, /* x^3 */
0x00000010, /* x^4 */
0x00000020, /* x^5 */
0x00000040, /* x^6 */
0x00000080, /* x^7 */
0x0000001B, /* x^4 + x^3 + x^1 + x^0 */
0x00000036, /* x^5 + x^4 + x^2 + x^1 */
};
static uint8_t gmult(uint8_t a, uint8_t b)
{
uint8_t c = 0, i, msb;
for (i=0; i<8; i++) {
if (b & 1)
c ^= a;
msb = a & 0x80;
a <<= 1;
if (msb)
a ^= 0x1b;
b >>= 1;
}
return c;
}
static uint32_t rot_word(uint32_t word)
{
/* a3 a2 a1 a0 -> a0 a3 a2 a1 */
return word << 24 | word >> 8;
}
static uint32_t sub_word(uint32_t word)
{
uint32_t val = word;
uint8_t* p = (uint8_t*)&val;
p[0] = sbox[p[0]]; p[1] = sbox[p[1]];
p[2] = sbox[p[2]]; p[3] = sbox[p[3]];
return val;
}
static void add_round_key(uint8_t* state /*4*Nb*/, const uint32_t* w /*Nb*(Nr+1)*/)
{
int i;
uint32_t* s = (uint32_t*)state;
for (i=0; i<Nb; i++) {
s[i] ^= w[i];
}
}
static void sub_bytes(uint8_t* state /*4*Nb*/)
{
int i;
for (i=0; i<4*Nb; i++) {
state[i] = sbox[state[i]];
}
}
static void inv_sub_bytes(uint8_t* state /*4*Nb*/)
{
int i;
for (i=0; i<4*Nb; i++) {
state[i] = inv_sbox[state[i]];
}
}
static void shift_rows(uint8_t* state /*4*Nb*/)
{
/*
00 04 08 12 => 00 04 08 12
01 05 09 13 => 05 09 13 01
02 06 10 14 => 10 14 02 06
03 07 11 15 => 15 03 07 11
*/
uint8_t tmp[3];
tmp[0] = state[1];
state[1] = state[5]; state[5] = state[9]; state[9] = state[13]; state[13] = tmp[0];
tmp[0] = state[2]; tmp[1] = state[6];
state[2] = state[10]; state[6] = state[14]; state[10] = tmp[0]; state[14] = tmp[1];
tmp[0] = state[3]; tmp[1] = state[7]; tmp[2] = state[11];
state[3] = state[15]; state[7] = tmp[0]; state[11] = tmp[1]; state[15] = tmp[2];
}
static void inv_shift_rows(uint8_t* state /*4*Nb*/)
{
/*
00 04 08 12 => 00 04 08 12
01 05 09 13 => 13 01 05 09
02 06 10 14 => 10 14 02 06
03 07 11 15 => 07 11 15 03
*/
uint8_t tmp[3];
tmp[0] = state[13];
state[13] = state[9]; state[9] = state[5]; state[5] = state[1]; state[1] = tmp[0];
tmp[0] = state[14]; tmp[1] = state[10];
state[14] = state[6]; state[10] = state[2]; state[6] = tmp[0]; state[2] = tmp[1];
tmp[0] = state[15]; tmp[1] = state[11]; tmp[2] = state[7];
state[15] = state[3]; state[11] = tmp[0]; state[7] = tmp[1]; state[3] = tmp[2];
}
static void mix_columns(uint8_t* state /*4*Nb*/)
{
int i;
uint8_t tmp[4], *s = state;
for (i=0; i<Nb; i++) {
tmp[0] = gmult(0x02, s[0]) ^ gmult(0x03, s[1]) ^ s[2] ^ s[3];
tmp[1] = s[0] ^ gmult(0x02, s[1]) ^ gmult(0x03, s[2]) ^ s[3];
tmp[2] = s[0] ^ s[1] ^ gmult(0x02, s[2]) ^ gmult(0x03, s[3]);
tmp[3] = gmult(0x03, s[0]) ^ s[1] ^ s[2] ^ gmult(0x02, s[3]);
memcpy(s, tmp, 4);
s += 4;
}
}
static void inv_mix_columns(uint8_t* state /*4*Nb*/)
{
int i;
uint8_t tmp[4], *s = state;
for (i=0; i<Nb; i++) {
tmp[0] = gmult(0x0e, s[0]) ^ gmult(0x0b, s[1]) ^ gmult(0x0d, s[2]) ^ gmult(0x09, s[3]);
tmp[1] = gmult(0x09, s[0]) ^ gmult(0x0e, s[1]) ^ gmult(0x0b, s[2]) ^ gmult(0x0d, s[3]);
tmp[2] = gmult(0x0d, s[0]) ^ gmult(0x09, s[1]) ^ gmult(0x0e, s[2]) ^ gmult(0x0b, s[3]);
tmp[3] = gmult(0x0b, s[0]) ^ gmult(0x0d, s[1]) ^ gmult(0x09, s[2]) ^ gmult(0x0e, s[3]);
memcpy(s, tmp, 4);
s += 4;
}
}
static void print_Nwords(const uint32_t* word, int N)
{
int i;
for (i=0; i<N; i++) {
uint8_t* p = (uint8_t*)(word+i);
printf("%02x %02x %02x %02x ", p[0], p[1], p[2], p[3]);
}
}
extern void key_expansion(const uint32_t* key /*Nk*/, uint32_t* w /*Nb*(Nr+1)*/)
{
int i;
uint8_t Nr = key_round_table[aes_type].Nr;
uint8_t Nk = key_round_table[aes_type].Nk;
memcpy(w, key, Nk*4);
for (i=Nk; i<Nb*(Nr+1); i++) {
uint32_t temp = w[i-1];
if (i%Nk == 0) {
temp = sub_word(rot_word(temp)) ^ rcon[i/Nk];
} else if (6<Nk && i%Nk == 4) {
temp = sub_word(temp);
}
w[i] = w[i-Nk] ^ temp;
}
}
extern void cipher(const uint8_t* in /*4*Nb*/, uint8_t* out /*4*Nb*/, const uint32_t* w /*Nb*(Nr+1)*/)
{
int i;
uint8_t Nr = key_round_table[aes_type].Nr, *state = out;
memcpy(state, in, 4*Nb);
add_round_key(state, &w[0]);
for (i=1; i<Nr; i++) {
sub_bytes(state);
shift_rows(state);
mix_columns(state);
add_round_key(state, &w[Nb*i]);
}
sub_bytes(state);
shift_rows(state);
add_round_key(state, &w[Nb*Nr]);
}
extern void inv_cipher(const uint8_t* in /*4*Nb*/, uint8_t* out /*4*Nb*/, const uint32_t* w /*Nb*(Nr+1)*/)
{
int i;
uint8_t Nr = key_round_table[aes_type].Nr, *state = out;
memcpy(state, in, 4*Nb);
add_round_key(state, &w[Nb*Nr]);
for (i=Nr-1; 1<=i; i--) {
inv_shift_rows(state);
inv_sub_bytes(state);
add_round_key(state, &w[Nb*i]);
inv_mix_columns(state);
}
inv_shift_rows(state);
inv_sub_bytes(state);
add_round_key(state, &w[0]);
}
static void cipher_and_inv_cipher(const uint32_t* key, const uint32_t* in)
{
uint32_t w[Nb*(NR_MAX+1)], out[4], tmp[4];
uint8_t Nk = key_round_table[aes_type].Nk;
printf("Cipher Key = "); print_Nwords(key, Nk); printf("\n");
key_expansion(key, w);
printf("Input = "); print_Nwords(in, 4); printf("\n");
cipher((uint8_t*)in, (uint8_t*)out, w);
printf("Output = "); print_Nwords(out, 4); printf("\n");
inv_cipher((uint8_t*)out, (uint8_t*)tmp, w);
printf("Input(Inv) = "); print_Nwords(tmp, 4); printf("\n");
printf("\n");
}
int main(int argc, char* argv[])
{
uint8_t key[NK_MAX*4], in[Nb*4], i;
for (i=0; i<NK_MAX*4; i++) {
key[i] = i;
}
for (i=0; i<Nb*4; i++) {
in[i] = i << 4 | i;
}
printf("AES-128\n");
aes_type = AES128;
cipher_and_inv_cipher((uint32_t*)key, (uint32_t*)in);
printf("AES-192\n");
aes_type = AES192;
cipher_and_inv_cipher((uint32_t*)key, (uint32_t*)in);
printf("AES-256\n");
aes_type = AES256;
cipher_and_inv_cipher((uint32_t*)key, (uint32_t*)in);
return 0;
}
$ gcc --version
gcc.exe (Rev2, Built by MSYS2 project) 6.2.0
Copyright (C) 2016 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
$ make clean && make &&./test.exe
rm -rf test *.o
cc -I. -Wall -Werror -O2 -march=native -o test test.c
AES-128
Cipher Key = 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
Input = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
Output = 69 c4 e0 d8 6a 7b 04 30 d8 cd b7 80 70 b4 c5 5a
Input(Inv) = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
AES-192
Cipher Key = 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17
Input = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
Output = dd a9 7c a4 86 4c df e0 6e af 70 a0 ec 0d 71 91
Input(Inv) = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
AES-256
Cipher Key = 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
Input = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
Output = 8e a2 b7 ca 51 67 45 bf ea fc 49 90 4b 49 60 89
Input(Inv) = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
code example - Assembly Code
Intel AES-NIを使ったAES暗号の例を示します。
[2]を参照します。
CFLAGS=-I. -Wall -Werror -O2
INCS=
OBJS=test.o test_s.o
LIBS=
TARGET=test
all: $(TARGET)
%.o: %.c $(INCS)
$(CC) $(CFLAGS) -c -o $@ $<
%.o: %.s $(INCS)
$(CC) $(CFLAGS) -c -o $@ $<
$(TARGET): $(OBJS)
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
clean:
rm -rf $(TARGET) *.o
#include <inttypes.h>
#include <stdio.h>
#include <string.h>
#define NK_MAX (8)
#define NR_MAX (14)
enum {
AES128 = 0,
AES192,
AES256,
};
uint8_t aes_type = AES128;
const uint8_t Nb = 4;
extern void cipher_key_expansion128(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* key /*Nk*/);
extern void cipher_key_expansion192(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* key /*Nk*/);
extern void cipher_key_expansion256(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* key /*Nk*/);
extern void inv_cipher_key_expansion128(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* cipher_w /*Nb*(Nr+1)*/);
extern void inv_cipher_key_expansion192(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* cipher_w /*Nb*(Nr+1)*/);
extern void inv_cipher_key_expansion256(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* cipher_w /*Nb*(Nr+1)*/);
extern void cipher128(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
extern void cipher192(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
extern void cipher256(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
extern void inv_cipher128(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
extern void inv_cipher192(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
extern void inv_cipher256(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
static void print_Nwords(const uint32_t* word, int N)
{
int i;
for (i=0; i<N; i++) {
uint8_t* p = (uint8_t*)(word+i);
printf("%02x %02x %02x %02x ", p[0], p[1], p[2], p[3]);
}
}
static void cipher_and_inv_cipher(const uint32_t* key, const uint32_t* in)
{
uint32_t w[Nb*(NR_MAX+1)], w2[Nb*(NR_MAX+1)], out[4], tmp[4];
uint8_t Nk;
if (aes_type == AES128) {
Nk = 4;
} else if (aes_type == AES192) {
Nk = 6;
} else { // aes_type == AES256
Nk = 8;
}
printf("Cipher Key = "); print_Nwords(key, Nk); printf("\n");
if (aes_type == AES128) {
printf("Input = "); print_Nwords(in, 4); printf("\n");
cipher_key_expansion128(w, key);
cipher128((uint8_t*)out, (uint8_t*)in);
printf("Output = "); print_Nwords(out, 4); printf("\n");
inv_cipher_key_expansion128(w2, w);
inv_cipher128((uint8_t*)tmp, (uint8_t*)out);
printf("Input(Inv) = "); print_Nwords(tmp, 4); printf("\n");
printf("\n");
} else if (aes_type == AES192) {
printf("Input = "); print_Nwords(in, 4); printf("\n");
cipher_key_expansion192(w, key);
cipher192((uint8_t*)out, (uint8_t*)in);
printf("Output = "); print_Nwords(out, 4); printf("\n");
inv_cipher_key_expansion192(w2, w);
inv_cipher192((uint8_t*)tmp, (uint8_t*)out);
printf("Input(Inv) = "); print_Nwords(tmp, 4); printf("\n");
printf("\n");
} else { // aes_type == AES256
printf("Input = "); print_Nwords(in, 4); printf("\n");
cipher_key_expansion256(w, key);
cipher256((uint8_t*)out, (uint8_t*)in);
printf("Output = "); print_Nwords(out, 4); printf("\n");
inv_cipher_key_expansion256(w2, w);
inv_cipher256((uint8_t*)tmp, (uint8_t*)out);
printf("Input(Inv) = "); print_Nwords(tmp, 4); printf("\n");
printf("\n");
}
}
int main(int argc, char* argv[])
{
uint8_t key[NK_MAX*4], in[Nb*4], i;
for (i=0; i<Nb*4; i++) {
in[i] = i << 4 | i;
}
printf("AES-128\n");
aes_type = AES128;
memset(key, 0, sizeof(key));
for (i=0; i<4*4; i++) {
key[i] = i;
}
cipher_and_inv_cipher((uint32_t*)key, (uint32_t*)in);
printf("AES-192\n");
aes_type = AES192;
memset(key, 0, sizeof(key));
for (i=0; i<6*4; i++) {
key[i] = i;
}
cipher_and_inv_cipher((uint32_t*)key, (uint32_t*)in);
printf("AES-256\n");
aes_type = AES256;
memset(key, 0, sizeof(key));
for (i=0; i<8*4; i++) {
key[i] = i;
}
cipher_and_inv_cipher((uint32_t*)key, (uint32_t*)in);
return 0;
}
# 1st(%rcx) 2nd(%rdx) 3rd(%r8) 4th(%r9) Microsoft x64 calling convention
.globl cipher_key_expansion128
.globl cipher_key_expansion192
.globl cipher_key_expansion256
.globl inv_cipher_key_expansion128
.globl inv_cipher_key_expansion192
.globl inv_cipher_key_expansion256
.globl cipher128
.globl cipher192
.globl cipher256
.globl inv_cipher128
.globl inv_cipher192
.globl inv_cipher256
# void cipher_key_expansion128(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* key /*Nk*/);
cipher_key_expansion128:
push %rcx
movdqu (%rdx), %xmm1
movdqu %xmm1, (%rcx)
add $0x10, %rcx
aeskeygenassist $0x01, %xmm1, %xmm2 # |a|b|c|d| => |x|-|-|-|, x=rotword(subword(a)) xor Rcon
call cipher_key_expansion128_round
aeskeygenassist $0x02, %xmm1, %xmm2
call cipher_key_expansion128_round
aeskeygenassist $0x04, %xmm1, %xmm2
call cipher_key_expansion128_round
aeskeygenassist $0x08, %xmm1, %xmm2
call cipher_key_expansion128_round
aeskeygenassist $0x10, %xmm1, %xmm2
call cipher_key_expansion128_round
aeskeygenassist $0x20, %xmm1, %xmm2
call cipher_key_expansion128_round
aeskeygenassist $0x40, %xmm1, %xmm2
call cipher_key_expansion128_round
aeskeygenassist $0x80, %xmm1, %xmm2
call cipher_key_expansion128_round
aeskeygenassist $0x1b, %xmm1, %xmm2
call cipher_key_expansion128_round
aeskeygenassist $0x36, %xmm1, %xmm2
call cipher_key_expansion128_round
pop %rcx
movdqu 0(%rcx), %xmm0 # round key 0
movdqu 0x10(%rcx), %xmm1 # round key 1
movdqu 0x20(%rcx), %xmm2 # round key 2
movdqu 0x30(%rcx), %xmm3 # round key 3
movdqu 0x40(%rcx), %xmm4 # round key 4
movdqu 0x50(%rcx), %xmm5 # round key 5
movdqu 0x60(%rcx), %xmm6 # round key 6
movdqu 0x70(%rcx), %xmm7 # round key 7
movdqu 0x80(%rcx), %xmm8 # round key 8
movdqu 0x90(%rcx), %xmm9 # round key 9
movdqu 0xa0(%rcx), %xmm10 # round key 10
ret
cipher_key_expansion128_round:
pshufd $0xff, %xmm2, %xmm2 # |x|-|-|-| => |x|x|x|x|
vpslldq $0x4, %xmm1, %xmm3 # |a|b|c|d|
pxor %xmm3, %xmm1 # +|b|c|d|0|
vpslldq $0x4, %xmm1, %xmm3 # |a+b|b+c|c+d|d|
pxor %xmm3, %xmm1 # +|b+c|c+d|d |0|
vpslldq $0x4, %xmm1, %xmm3 # |a+c|b+d|c|d|
pxor %xmm3, %xmm1 # +|b+d|c |d|0|
# |a+b+c+d|b+c+d|c+d|d|
pxor %xmm2, %xmm1 # |x+a+b+c+d|x+b+c+d|x+c+d|x+d|
movdqu %xmm1, (%rcx)
add $0x10, %rcx
ret
# void cipher_key_expansion192(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* key /*Nk*/);
cipher_key_expansion192:
push %rcx
movdqu (%rdx), %xmm1 # |c|d|e|f|
movdqu %xmm1, (%rcx)
movdqu 0x10(%rdx), %xmm2 # |-|-|a|b|
movdqu %xmm2, 0x10(%rcx)
add $0x18, %rcx
aeskeygenassist $0x01, %xmm2, %xmm3 # |-|-|a|b| => |-|-|x|-|, x=rotword(subword(a)) xor Rcon
call cipher_key_expansion192_round
aeskeygenassist $0x02, %xmm2, %xmm3
call cipher_key_expansion192_round
aeskeygenassist $0x04, %xmm2, %xmm3
call cipher_key_expansion192_round
aeskeygenassist $0x08, %xmm2, %xmm3
call cipher_key_expansion192_round
aeskeygenassist $0x10, %xmm2, %xmm3
call cipher_key_expansion192_round
aeskeygenassist $0x20, %xmm2, %xmm3
call cipher_key_expansion192_round
aeskeygenassist $0x40, %xmm2, %xmm3
call cipher_key_expansion192_round
aeskeygenassist $0x80, %xmm2, %xmm3
pshufd $0x55, %xmm3, %xmm2 # |-|-|x|-| => |x|x|x|x|
call cipher_key_expansion128_round
pop %rcx
movdqu 0(%rcx), %xmm0 # round key 0
movdqu 0x10(%rcx), %xmm1 # round key 1
movdqu 0x20(%rcx), %xmm2 # round key 2
movdqu 0x30(%rcx), %xmm3 # round key 3
movdqu 0x40(%rcx), %xmm4 # round key 4
movdqu 0x50(%rcx), %xmm5 # round key 5
movdqu 0x60(%rcx), %xmm6 # round key 6
movdqu 0x70(%rcx), %xmm7 # round key 7
movdqu 0x80(%rcx), %xmm8 # round key 8
movdqu 0x90(%rcx), %xmm9 # round key 9
movdqu 0xa0(%rcx), %xmm10 # round key 10
movdqu 0xb0(%rcx), %xmm11 # round key 11
movdqu 0xc0(%rcx), %xmm12 # round key 12
ret
cipher_key_expansion192_round:
pshufd $0x55, %xmm3, %xmm3 # |-|-|x|-| => |x|x|x|x|
vpslldq $0x4, %xmm1, %xmm4 # |c|d|e|f|
pxor %xmm4, %xmm1 # +|d|e|f|0|
vpslldq $0x4, %xmm1, %xmm4 # |c+d|d+e|e+f|f|
pxor %xmm4, %xmm1 # +|d+e|e+f|f |0|
vpslldq $0x4, %xmm1, %xmm4 # |c+e|d+f|e|f|
pxor %xmm4, %xmm1 # +|d+f|e |f|0|
# |c+d+e+f|d+e+f|e+f|f|
pxor %xmm3, %xmm1 # |x+c+d+e+f|x+d+e+f|x+e+f|x+f|
vpslldq $0x4, %xmm2, %xmm4 # |-|-|a|b|
pxor %xmm4, %xmm2 # +|-|a|b|0|
# |-|-|a+b|b|
pshufd $0x0f, %xmm1, %xmm3 # |-|-|x+c+d+e+f|x+c+d+e+f|
pxor %xmm3, %xmm2 # +|-|-|a+b |b |
# |-|-|x+a+b+c+d+e+f|x+b+c+d+e+f|
movdqu %xmm1, (%rcx)
movdqu %xmm2, 0x10(%rcx)
add $0x18, %rcx
ret
# void cipher_key_expansion256(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* key /*Nk*/);
cipher_key_expansion256:
push %rcx
movdqu (%rdx), %xmm1 # |e|f|g|h|
movdqu %xmm1, (%rcx)
movdqu 0x10(%rdx), %xmm2 # |a|b|c|d|
movdqu %xmm2, 0x10(%rcx)
add $0x20, %rcx
aeskeygenassist $0x01, %xmm2, %xmm3 # |a|b|c|d| => |x|-|-|-|, x=rotword(subword(a)) xor Rcon
call cipher_key_expansion256_round
aeskeygenassist $0x02, %xmm2, %xmm3
call cipher_key_expansion256_round
aeskeygenassist $0x04, %xmm2, %xmm3
call cipher_key_expansion256_round
aeskeygenassist $0x08, %xmm2, %xmm3
call cipher_key_expansion256_round
aeskeygenassist $0x10, %xmm2, %xmm3
call cipher_key_expansion256_round
aeskeygenassist $0x20, %xmm2, %xmm3
call cipher_key_expansion256_round
aeskeygenassist $0x40, %xmm2, %xmm2
call cipher_key_expansion128_round
pop %rcx
movdqu 0(%rcx), %xmm0 # round key 0
movdqu 0x10(%rcx), %xmm1 # round key 1
movdqu 0x20(%rcx), %xmm2 # round key 2
movdqu 0x30(%rcx), %xmm3 # round key 3
movdqu 0x40(%rcx), %xmm4 # round key 4
movdqu 0x50(%rcx), %xmm5 # round key 5
movdqu 0x60(%rcx), %xmm6 # round key 6
movdqu 0x70(%rcx), %xmm7 # round key 7
movdqu 0x80(%rcx), %xmm8 # round key 8
movdqu 0x90(%rcx), %xmm9 # round key 9
movdqu 0xa0(%rcx), %xmm10 # round key 10
movdqu 0xb0(%rcx), %xmm11 # round key 11
movdqu 0xc0(%rcx), %xmm12 # round key 12
movdqu 0xd0(%rcx), %xmm13 # round key 13
movdqu 0xe0(%rcx), %xmm14 # round key 14
ret
cipher_key_expansion256_round:
pshufd $0xff, %xmm3, %xmm3 # |x|-|-|-| => |x|x|x|x|
vpslldq $0x4, %xmm1, %xmm4 # |e|f|g|h|
pxor %xmm4, %xmm1 # +|f|g|h|0|
vpslldq $0x4, %xmm1, %xmm4 # |e+f|f+g|g+h|h|
pxor %xmm4, %xmm1 # +|f+g|g+h|h |0|
vpslldq $0x4, %xmm1, %xmm4 # |e+g|f+h|g|h|
pxor %xmm4, %xmm1 # +|f+h|g|f|0|
# |e+f+g+h|f+g+h|g+h|h|
pxor %xmm3, %xmm1 # |x+e+f+g+h|x+f+g+h|x+g+h|x+h|
aeskeygenassist $0x01, %xmm1, %xmm3
pshufd $0xaa, %xmm3, %xmm3 # |-|y|-|-| => |y|y|y|y|
vpslldq $0x4, %xmm2, %xmm4 # |a|b|c|d|
pxor %xmm4, %xmm2 # +|b|c|d|0|
vpslldq $0x4, %xmm2, %xmm4 # |a+b|b+c|c+d|d|
pxor %xmm4, %xmm2 # +|b+c|c+d|d |0|
vpslldq $0x4, %xmm2, %xmm4 # |a+c|b+d|c|d|
pxor %xmm4, %xmm2 # +|b+d|c|d|0|
# |a+b+c+d|b+c+d|c+d|d|
pxor %xmm3, %xmm2 # |y+a+b+c+d|y+b+c+d|y+c+d|y+d|
movdqu %xmm1, (%rcx)
movdqu %xmm2, 0x10(%rcx)
add $0x20, %rcx
ret
# void inv_cipher_key_expansion128(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* cipher_w /*Nb*(Nr+1)*/);
inv_cipher_key_expansion128:
push %rcx
push %rdx
movdqu (%rdx), %xmm1
movdqu %xmm1, (%rcx)
add $0x10, %rcx
add $0x10, %rdx
mov $9, %rax
inv_cipher_key_expansion128_loop:
movdqu (%rdx), %xmm1
aesimc %xmm1, %xmm1
movdqu %xmm1, (%rcx)
add $0x10, %rcx
add $0x10, %rdx
dec %rax
jne inv_cipher_key_expansion128_loop
movdqu (%rdx), %xmm1
movdqu %xmm1, (%rcx)
pop %rdx
pop %rcx
movdqu 0(%rcx), %xmm0 # round key 0
movdqu 0x10(%rcx), %xmm1 # round key 1
movdqu 0x20(%rcx), %xmm2 # round key 2
movdqu 0x30(%rcx), %xmm3 # round key 3
movdqu 0x40(%rcx), %xmm4 # round key 4
movdqu 0x50(%rcx), %xmm5 # round key 5
movdqu 0x60(%rcx), %xmm6 # round key 6
movdqu 0x70(%rcx), %xmm7 # round key 7
movdqu 0x80(%rcx), %xmm8 # round key 8
movdqu 0x90(%rcx), %xmm9 # round key 9
movdqu 0xa0(%rcx), %xmm10 # round key 10
ret
# void inv_cipher_key_expansion192(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* cipher_w /*Nb*(Nr+1)*/);
inv_cipher_key_expansion192:
push %rcx
push %rdx
movdqu (%rdx), %xmm1
movdqu %xmm1, (%rcx)
add $0x10, %rcx
add $0x10, %rdx
mov $11, %rax
inv_cipher_key_expansion192_loop:
movdqu (%rdx), %xmm1
aesimc %xmm1, %xmm1
movdqu %xmm1, (%rcx)
add $0x10, %rcx
add $0x10, %rdx
dec %rax
jne inv_cipher_key_expansion192_loop
movdqu (%rdx), %xmm1
movdqu %xmm1, (%rcx)
pop %rdx
pop %rcx
movdqu 0(%rcx), %xmm0 # round key 0
movdqu 0x10(%rcx), %xmm1 # round key 1
movdqu 0x20(%rcx), %xmm2 # round key 2
movdqu 0x30(%rcx), %xmm3 # round key 3
movdqu 0x40(%rcx), %xmm4 # round key 4
movdqu 0x50(%rcx), %xmm5 # round key 5
movdqu 0x60(%rcx), %xmm6 # round key 6
movdqu 0x70(%rcx), %xmm7 # round key 7
movdqu 0x80(%rcx), %xmm8 # round key 8
movdqu 0x90(%rcx), %xmm9 # round key 9
movdqu 0xa0(%rcx), %xmm10 # round key 10
movdqu 0xb0(%rcx), %xmm11 # round key 11
movdqu 0xc0(%rcx), %xmm12 # round key 12
ret
# void inv_cipher_key_expansion256(uint32_t* w /*Nb*(Nr+1)*/, const uint32_t* cipher_w /*Nb*(Nr+1)*/);
inv_cipher_key_expansion256:
push %rcx
push %rdx
movdqu (%rdx), %xmm1
movdqu %xmm1, (%rcx)
add $0x10, %rcx
add $0x10, %rdx
mov $13, %rax
inv_cipher_key_expansion256_loop:
movdqu (%rdx), %xmm1
aesimc %xmm1, %xmm1
movdqu %xmm1, (%rcx)
add $0x10, %rcx
add $0x10, %rdx
dec %rax
jne inv_cipher_key_expansion256_loop
movdqu (%rdx), %xmm1
movdqu %xmm1, (%rcx)
pop %rdx
pop %rcx
movdqu 0(%rcx), %xmm0 # round key 0
movdqu 0x10(%rcx), %xmm1 # round key 1
movdqu 0x20(%rcx), %xmm2 # round key 2
movdqu 0x30(%rcx), %xmm3 # round key 3
movdqu 0x40(%rcx), %xmm4 # round key 4
movdqu 0x50(%rcx), %xmm5 # round key 5
movdqu 0x60(%rcx), %xmm6 # round key 6
movdqu 0x70(%rcx), %xmm7 # round key 7
movdqu 0x80(%rcx), %xmm8 # round key 8
movdqu 0x90(%rcx), %xmm9 # round key 9
movdqu 0xa0(%rcx), %xmm10 # round key 10
movdqu 0xb0(%rcx), %xmm11 # round key 11
movdqu 0xc0(%rcx), %xmm12 # round key 12
movdqu 0xd0(%rcx), %xmm13 # round key 13
movdqu 0xe0(%rcx), %xmm14 # round key 14
ret
# void cipher128(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
# Registers xmm0-xmm10 hold the round keys(from 0 to 10 in this order).
cipher128:
movdqu (%rdx), %xmm15 # in
pxor %xmm0, %xmm15 # round 0
aesenc %xmm1, %xmm15 # round 1
aesenc %xmm2, %xmm15 # round 2
aesenc %xmm3, %xmm15 # round 3
aesenc %xmm4, %xmm15 # round 4
aesenc %xmm5, %xmm15 # round 5
aesenc %xmm6, %xmm15 # round 6
aesenc %xmm7, %xmm15 # round 7
aesenc %xmm8, %xmm15 # round 8
aesenc %xmm9, %xmm15 # round 9
aesenclast %xmm10, %xmm15 # round 10
movdqu %xmm15, (%rcx) # out
ret
# void cipher192(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
# Registers xmm0-xmm12 hold the round keys(from 0 to 12 in this order).
cipher192:
movdqu (%rdx), %xmm15 # in
pxor %xmm0, %xmm15 # round 0
aesenc %xmm1, %xmm15 # round 1
aesenc %xmm2, %xmm15 # round 2
aesenc %xmm3, %xmm15 # round 3
aesenc %xmm4, %xmm15 # round 4
aesenc %xmm5, %xmm15 # round 5
aesenc %xmm6, %xmm15 # round 6
aesenc %xmm7, %xmm15 # round 7
aesenc %xmm8, %xmm15 # round 8
aesenc %xmm9, %xmm15 # round 9
aesenc %xmm10, %xmm15 # round 10
aesenc %xmm11, %xmm15 # round 11
aesenclast %xmm12, %xmm15 # round 12
movdqu %xmm15, (%rcx) # out
ret
# void cipher256(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
# Registers xmm0-xmm14 hold the round keys(from 0 to 14 in this order).
cipher256:
movdqu (%rdx), %xmm15 # in
pxor %xmm0, %xmm15 # round 0
aesenc %xmm1, %xmm15 # round 1
aesenc %xmm2, %xmm15 # round 2
aesenc %xmm3, %xmm15 # round 3
aesenc %xmm4, %xmm15 # round 4
aesenc %xmm5, %xmm15 # round 5
aesenc %xmm6, %xmm15 # round 6
aesenc %xmm7, %xmm15 # round 7
aesenc %xmm8, %xmm15 # round 8
aesenc %xmm9, %xmm15 # round 9
aesenc %xmm10, %xmm15 # round 10
aesenc %xmm11, %xmm15 # round 11
aesenc %xmm12, %xmm15 # round 12
aesenc %xmm13, %xmm15 # round 13
aesenclast %xmm14, %xmm15 # round 14
movdqu %xmm15, (%rcx) # out
ret
# void inv_cipher128(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
# Registers xmm0-xmm10 hold the round keys(from 0 to 10 in this order).
inv_cipher128:
movdqu (%rdx), %xmm15 # in
pxor %xmm10, %xmm15 # round 10
aesdec %xmm9, %xmm15 # round 9
aesdec %xmm8, %xmm15 # round 8
aesdec %xmm7, %xmm15 # round 7
aesdec %xmm6, %xmm15 # round 6
aesdec %xmm5, %xmm15 # round 5
aesdec %xmm4, %xmm15 # round 4
aesdec %xmm3, %xmm15 # round 3
aesdec %xmm2, %xmm15 # round 2
aesdec %xmm1, %xmm15 # round 1
aesdeclast %xmm0, %xmm15 # round 0
movdqu %xmm15, (%rcx) # out
ret
# void inv_cipher192(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
# Registers xmm0-xmm12 hold the round keys(from 0 to 12 in this order).
inv_cipher192:
movdqu (%rdx), %xmm15 # in
pxor %xmm12, %xmm15 # round 12
aesdec %xmm11, %xmm15 # round 11
aesdec %xmm10, %xmm15 # round 10
aesdec %xmm9, %xmm15 # round 9
aesdec %xmm8, %xmm15 # round 8
aesdec %xmm7, %xmm15 # round 7
aesdec %xmm6, %xmm15 # round 6
aesdec %xmm5, %xmm15 # round 5
aesdec %xmm4, %xmm15 # round 4
aesdec %xmm3, %xmm15 # round 3
aesdec %xmm2, %xmm15 # round 2
aesdec %xmm1, %xmm15 # round 1
aesdeclast %xmm0, %xmm15 # round 0
movdqu %xmm15, (%rcx) # out
ret
# void inv_cipher256(uint8_t* out /*4*Nb*/, const uint8_t* in /*4*Nb*/);
# Registers xmm0-xmm14 hold the round keys(from 0 to 14 in this order).
inv_cipher256:
movdqu (%rdx), %xmm15 # in
pxor %xmm14, %xmm15 # round 14
aesdec %xmm13, %xmm15 # round 13
aesdec %xmm12, %xmm15 # round 12
aesdec %xmm11, %xmm15 # round 11
aesdec %xmm10, %xmm15 # round 10
aesdec %xmm9, %xmm15 # round 9
aesdec %xmm8, %xmm15 # round 8
aesdec %xmm7, %xmm15 # round 7
aesdec %xmm6, %xmm15 # round 6
aesdec %xmm5, %xmm15 # round 5
aesdec %xmm4, %xmm15 # round 4
aesdec %xmm3, %xmm15 # round 3
aesdec %xmm2, %xmm15 # round 2
aesdec %xmm1, %xmm15 # round 1
aesdeclast %xmm0, %xmm15 # round 0
movdqu %xmm15, (%rcx) # out
ret
$ make clean && make && ./test.exe
rm -rf test *.o
cc -I. -Wall -Werror -O2 -c -o test.o test.c
cc -I. -Wall -Werror -O2 -c -o test_s.o test_s.s
cc -I. -Wall -Werror -O2 -o test test.o test_s.o
AES-128
Cipher Key = 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
Input = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
Output = 69 c4 e0 d8 6a 7b 04 30 d8 cd b7 80 70 b4 c5 5a
Input(Inv) = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
AES-192
Cipher Key = 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17
Input = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
Output = dd a9 7c a4 86 4c df e0 6e af 70 a0 ec 0d 71 91
Input(Inv) = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
AES-256
Cipher Key = 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
Input = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
Output = 8e a2 b7 ca 51 67 45 bf ea fc 49 90 4b 49 60 89
Input(Inv) = 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff
references
- [1] Federal Information Processing Standards Publication 197
- [2] Intel® Advanced Encryption Standard (AES) New Instructions Set