1.AES的加密流程

明文与初始密钥进行轮密钥加,然后经过9轮的字节代换、行移位、列混合、轮密钥加

aes_struct

字节代换

非线性运算,其实就是查表。AES有一个标准的S盒与逆S盒

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
unsigned char S[256] = {
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
};

逆字节替换:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
unsigned char inv_S[256] = {
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D
};

这个表是怎么查的呢: 把字节的高4位作为行值,低4位作为列值

这个地方把S盒定义为16x16二维数组S/[16/]/[16/],字节替换时取该字节的高4位作为行下标,低4位作为列下标。这种方式因为还得对需要替换字节分别取高低位,得到结果再合并高低位,无疑把字节替换操作复杂化了。采用S[256]一维数组可以直接把该字节的值做为S盒数组的下标

1
2
3
4
5
6
7
int subBytes(uint8_t (*state)[4]){
for (int i = 0; i < 4; ++i){
for(int j = 0; j < 4; ++j){
state[i][j] = S[state[i][j]]
}
}
}

行位移

简单的左循环移位操作

状态矩阵第0行左移0字节,第1行左移1字节,第2行左移2字节,第3行左移3字节

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
// uint8_t y[4] -> uint32_t x
#define LOAD32H(x, y) \
do { (x) = ((uint32_t)((y)[0] & 0xff)<<24) | ((uint32_t)((y)[1] & 0xff)<<16) | \
((uint32_t)((y)[2] & 0xff)<<8) | ((uint32_t)((y)[3] & 0xff));} while(0)

// uint32_t x -> uint8_t y[4]
#define STORE32H(x, y) \
do { (y)[0] = (uint8_t)(((x)>>24) & 0xff); (y)[1] = (uint8_t)(((x)>>16) & 0xff); \
(y)[2] = (uint8_t)(((x)>>8) & 0xff); (y)[3] = (uint8_t)((x) & 0xff); } while(0)

// uint32_t x循环左移n位
#define ROF32(x, n) (((x) << (n)) | ((x) >> (32-(n))))

int shiftRows(uint8_t (*state)[4]){
uint8_t block[4] = 0;
for(int i=0; i<4; ++i){
//把一行4字节拼成uint_32结构
LOAD32H(block[i],state[i]);
block[i] = ROF32(block[i], 8*i);
STORE32H(block[i], state[i]);
}
}

列混合

正常的AES是通过矩阵相乘来实现的,经过行移位的状态矩阵与固定的矩阵相乘

image-20260305202619057
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
// 两字节的伽罗华域乘法运算
uint8_t GMul(uint8_t u, uint8_t v) {
uint8_t p = 0;

for (int i = 0; i < 8; ++i) {
if (u & 0x01) {
p ^= v;
}

int flag = (v & 0x80);
v <<= 1;
if (flag) {
v ^= 0x1B; /* x^8 + x^4 + x^3 + x + 1 */
}

u >>= 1;
}
return p;
}

int mixColumns(uint8_t (*state)[4]){
uint8_t tmp[4][4];
uint8_t M[4][4] = {{2,3,1,1},
{1,2,3,1},
{1,1,2,3},
{3,1,1,2}};
// 复制
for(int i=0; i<4; ++i){
for(int j=0; j<4; ++j){
tmp[i][j] = state[i][j];
}
}

// 循环相乘
for(int i=0; i<4; ++i){
for(int j=0; j<4; ++j){
state[i][j] = GMul(M[i][0], tmp[0][j]) ^ GMul(M[i][1], tmp[1][j]) ^ GMul(M[i][2], tmp[2][j]) ^ GMul(M[i][3], tmp[3][j]);
}
}
}

轮密钥加

128位轮密钥Ki同状态矩阵中的数据进行逐位异或操作

其中,密钥Ki中的每一个字W[4i],W[4i+1],W[4i+2],W[4i+3] 每一个都为32bit

1
2
3
4
5
6
7
8
9
10
11
12
13
14
// 从uint32_t x中提取从低位开始的第n个字节
// BYTE(x, 0) 取出的是最低位字节 B0
#define BYTE(x, n) (((x) >> (8 * (n))) & 0xff)

int addRoundKey(uint8_t (*state)[4], const uint32_t *key) {
uint8_t k[4][4];
for(int i=0; i<4; ++i){
for(int j=0; j<4; ++j){
//uint32 key[4] 先转换为矩阵 uint8 k[4][4]
k[i][j] = (uint8_t) BYTE(key[i], 3-i);
state[i][j] ^= k[i][j];
}
}
}

密钥扩展

首先,初始密钥是128bit

放到一个4*4的矩阵里面,每一列4个字节,组成一共字,命名为W[0]、W[1]、W[2]和W[3]

image-20260305203810126

然后,对W数组进行扩充,构成总共44列的扩展密钥数组

如果i是4的倍数,那么:W[i] = W[i-4] ^ W[i-1]

如果i不是4的倍数,那么:W[i] = W[i-4] ^ T(W[i-1])

其中,T是一个函数,由三部分组成:

  1. 字循环:将1个字中的4个字节循环左移1个字节
  2. 字节代换:对字循环的结果使用S盒进行字节代换
  3. 轮常量异或:将前两步的结果同轮常量Rcon[j]进行异或,其中j表示轮数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
typedef struct{
uint32_t eK[44], dK[44]; // encKey, decKey
int Nr; // 10 rounds
}AesKey;

// 字节替换然后循环左移1字节
#define MIX(x) (((S[BYTE(x, 2)] << 24) & 0xff000000) ^ ((S[BYTE(x, 1)] << 16) & 0xff0000) ^ ((S[BYTE(x, 0)] << 8) & 0xff00) ^ (S[BYTE(x, 3)] & 0xff))
static const uint32_t rcon[10] = {
0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL, 0x10000000UL,
0x20000000UL, 0x40000000UL, 0x80000000UL, 0x1B000000UL, 0x36000000UL
};

int keyExpansion(const uint8_t *key, uint32_t keyLen, AesKey *aesKey){
if(NULL == key || NULL == aesKey){
return -1;
}
if(keyLen != 16){
return -1;
}
uint32_t *w = aesKey->eK; //加密秘钥

// 初始密钥 w[0-3]
for(int i=0; i<4; ++i){
LOAD32H(w[i], key+4*i);
}
// 生成密钥 W[4-43]
for(int i=0; i<10; ++i){
int base = i*4;
w[base+4] = w[base+0] ^ MIX(w[base+3]) ^ rcon[i];
w[base + 5] = w[base + 1] ^ w[base + 4];
w[base + 6] = w[base + 2] ^ w[base + 5];
w[base + 7] = w[base + 3] ^ w[base + 6];
}
}

总结

伪代码如下:

1
2
3
4
5
6
7
8
9
10
11
state ← plaintext
AddRoundKey(state, k0)
for r = 1 ... 9
SubBytes(state)
ShiftRows(state)
MixColumns(state)
AddRoundKey(state, kr)
SubBytes(state)
ShiftRows(state)
AddRoundKey(state, k10)
ciphertext ← state

然后呢,为了加速运算,有了基于表的AES实现

基于表实现的AES实现

基于表实现的AES的思路:将大部分的运算通过查表实现。 通过预先生成的表来进行加密。

调整AES的流程

  1. AddRoundKey(state, k_0)放入循环,AddRoundKey(state,k_9) 移出循环
  2. shiftRow线性变化,SubBytes映射变换,可以调换位置
  3. 如果在轮密钥也进行移位的话,AddRoundKeyShiftRow也可以调换位置

​ 所以从:字节代换–> 行移位–> 列混合 –> 轮密钥加

​ 变成了:轮密钥加–> 字节代换–> 行移位–> 列混合

​ 在变成:轮密钥加–> 行移位–> 字节代换–> 列混合

​ 最后变成:行移位–> 轮密钥加–> 字节代换–> 列混合

1
2
3
4
5
6
7
8
9
10
11
state ← plaintext
for r = 1 ... 9
ShiftRows(state)
AddRoundKey(state, k_{r-1})
SubBytes(state)
MixColumns(state)
ShiftRows(state)
AddRoundKey(state, k_9)
SubBytes(state)
AddRoundKey(state, k_{10})
ciphertext ← state

第一个表: T Boxs

AddRoundKey(state, k_{r-1})SubBytes(state): 可以合成一个过程: image-20260305214357843

不难看出,Tbox是一个10*16*256的表

注意,这里面实现的时候,要注意将轮密钥进行移位

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
void getTbox(u32 expandedKey[44], u8 tbox[10][16][256]){
for(int i=0; i<10; i++){
for(int x=0; x<256; x++){
u8 state[16] = {x};
memset(state, x, 16);
add_round_shiftkey(state, expandedKey + 4 * i);
subByte(state);
if(i == 9){
add_round_key(state, expandedKey + 40);
}
for(int z=0;z<16;z++){
tbox[i][z][x] = state[z];
}
}
}
}

第二个表: Tyi Tables

针对MixColumns

image-20260305221002409 Ty 表:你输入一个 8 位的字节 ,它直接吐出一个 32 位(4 字节)的列向量。这四个表的结果一异或,这列的混淆就做完了

Tyi Table

table[x][y][z] 存的是:在第 x 列、第 z 行的那个字节,如果它的值是 y,它会对最终的 MixColumns 结果产生多大的影响(贡献值)。

  • 举个例子:如果想知道第 1 列、第 2 行的字节(假设它的值是 128,即 0x80)在列混淆后变成了什么,你不需要做乘法,只需去内存里读取 table[1][128][2] 的值
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
u8 gmul(u8 ap, u8 bp) {
u8 p = 0, a = ap, b = bp;
while (a != 0 && b != 0) {
if (b & 1 != 0) p ^= a;
if ((a & 0x80) != 0)
a = (a << 1) ^ 0x1b;
else
a <<= 1;
b >>= 1;
}
return p & 0xFF;
}
void getTyiTable(u8 table[4][256][4]) {
for (int i = 0; i < 256; i++)
{
table[0][i][0] = gmul(i, 0x02);
table[0][i][1] = gmul(i, 0x03);
table[0][i][2] = i;
table[0][i][3] = i;
table[1][i][0] = i;
table[1][i][1] = gmul(i, 0x02);
table[1][i][2] = gmul(i, 0x03);
table[1][i][3] = i;
table[2][i][0] = i;
table[2][i][1] = i;
table[2][i][2] = gmul(i, 0x02);
table[2][i][3] = gmul(i, 0x03);
table[3][i][0] = gmul(i, 0x03);
table[3][i][1] = i;
table[3][i][2] = i;
table[3][i][3] = gmul(i, 0x02);
}
}

Xor Table

计算 a ^ b 时,白盒代码不再执行运算,而是去内存里读 table[a][b]

把整个算法变成一条纯粹的“内存读取链路”

1
2
3
4
5
void getXorTable(u8 table[16][16]) {
for (int i = 0; i < 16; i++)
for (int j = 0; j < 16; j++)
table[i][j] = i ^ j;
}

然后利用上面的Tboxs(AddRoundKey与SubBytes的结果),结合Tyi_tables,得到Tyi_box:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
void getTyiBox(u8 Tbox[10][16][256], u32 Tyibox[9][16][256]){
u8 tyitable[4][256][4] = {0};
getTyiTable(tyitable);
for (int r = 0; r < 9; r++)
for (int x = 0; x < 256; x++)
for (int j = 0; j < 4; j++)
for (int i = 0; i < 4; i++) {
u32 v0 = tyitable[0][Tbox[r][j*4+i][x]][i];
u32 v1 = tyitable[1][Tbox[r][j*4+i][x]][i];
u32 v2 = tyitable[2][Tbox[r][j*4+i][x]][i];
u32 v3 = tyitable[3][Tbox[r][j*4+i][x]][i];
Tyibox[r][j*4+i][x] = (v0 << 24) | (v1 << 16) | (v2 << 8) | v3;
}
}

总实现:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
void aes_encrypt_by_table(u8 input[16], u8 key[16]){
u32 a, b, c, d, aa, bb, cc, dd;
u8 tbox[10][16][256] = {0}, xortable[16][16] = {0};
u32 expandedKey[44] = {0}, tyibox[9][16][256] = {0};
expandKey(key, expandedKey);
getTbox(expandedKey, tbox);
getTyiBox(tbox, tyibox);
getXorTable(xortable);

for(int i=0; i<9; i++){
shiftRows(input);
for(int j=0; j<4; j++){
a = tyibox[i][4 * j + 0][input[4 * j + 0]];
b = tyibox[i][4 * j + 1][input[4 * j + 1]];
c = tyibox[i][4 * j + 2][input[4 * j + 2]];
d = tyibox[i][4 * j + 3][input[4 * j + 3]];
aa = xortable[(a >> 28) & 0xf][(b >> 28) & 0xf];
bb = xortable[(c >> 28) & 0xf][(d >> 28) & 0xf];
cc = xortable[(a >> 24) & 0xf][(b >> 24) & 0xf];
dd = xortable[(c >> 24) & 0xf][(d >> 24) & 0xf];
input[4 * j + 0] = (xortable[aa][bb] << 4) | xortable[cc][dd];
aa = xortable[(a >> 20) & 0xf][(b >> 20) & 0xf];
bb = xortable[(c >> 20) & 0xf][(d >> 20) & 0xf];
cc = xortable[(a >> 16) & 0xf][(b >> 16) & 0xf];
dd = xortable[(c >> 16) & 0xf][(d >> 16) & 0xf];
input[4 * j + 1] = (xortable[aa][bb] << 4) | xortable[cc][dd];
aa = xortable[(a >> 12) & 0xf][(b >> 12) & 0xf];
bb = xortable[(c >> 12) & 0xf][(d >> 12) & 0xf];
cc = xortable[(a >> 8) & 0xf][(b >> 8) & 0xf];
dd = xortable[(c >> 8) & 0xf][(d >> 8) & 0xf];
input[4 * j + 2] = (xortable[aa][bb] << 4) | xortable[cc][dd];
aa = xortable[(a >> 4) & 0xf][(b >> 4) & 0xf];
bb = xortable[(c >> 4) & 0xf][(d >> 4) & 0xf];
cc = xortable[a & 0xf][b & 0xf];
dd = xortable[c & 0xf][d & 0xf];
input[4 * j + 3] = (xortable[aa][bb] << 4) | xortable[cc][dd];
}
}

shiftRows(input);
for(int j=0; j<16; j++){
input[j] = tbox[9][j][input[j]];
}
}

另外,如果要加混淆的话,

  1. mixBijOut 混淆

    相当于多了一个TyiBoxes,操作一样

    第一部分(TyiBoxes + XOR): 读取最初的 input,查表异或后,将结果写回到了 input[4*j + 0~3]

    第二部分(mixBijOut + XOR):读取的 input[4*j + 0~3],得到一个混合双射的input[4*j + 0~3]

    如果mixBijOut有第 10 轮:“额外”应用:如果白盒生成器为 mixBijOut 生成了第 10 轮的数据,它通常被用作输出层混淆。这意味着程序在第 10 轮使用普通的 Tbox 计算出标准的 AES 密文后,并没有直接输出,而是用这多出来的第 10 轮 mixBijOut 对最终的密文再次进行了一次非线性编码。

  2. Xor Table 混淆

在白盒实现中,对这个表加上非线性编码(乱码壳)的。只能看到程序在疯狂地读取内存地址(查表),而根本不知道这些内存地址的跳转,实际上是在做异或运算。从汇编代码层面中抹除了加密算法所需的数学运算。

注意:普通的异或运算具有全局普适性。不管你是第 1 轮还是第 9 轮,不管你是第 0 列还是第 3 列, 永远等于 。4bit只有16种可能,所以我们仅仅需要16*16的一张表即可

但是加上非线性编码之后:xortable[i][j][x][y] = Decode_NodeOut( Encode_NodeIn1(x) ^ Encode_NodeIn2(y) );

每一个节点的 Encode 和 Decode 都是完全随机且独立的

总共有 9 轮,每轮 4 列,每列 24 个异或节点,一个节点对应的表的大小16*16,共需要[9][96][16][16]大小

DFA介绍

简单来说,就是在倒数第一轮的列混淆和倒数第二轮的列混淆之间

(也就是AES-128中也就是第8轮和第9轮之间)

1
2
3
4
5
ShiftRows(state)
AddRoundKey(state, k_{r-1})
这里 <<-----------------
SubBytes(state)
MixColumns(state)

但是,查表实现的怎么办呢?

我一般放在行移位之前,后面好像也无所谓,譬如:

1
2
3
4
5
6
7
8
9
10
u8 DFA = 0; 
............
// DFA Attack
for (int i = 0; i < 9; i++) {
if (DFA && i == 8) {
input[rand() % 16] = rand() % 256;
}
shiftRows(input);
............
}

原理就是:每一次 MixColumns ,都会让一个字节的差异变成四个字节的差异。

然后使用phoenixAES还原第10轮的密钥

1
2
3
4
5
6
#crackfile' (tracefile) 含义:包含了明文/密文对的“轨迹文件”路径。这个文件的第一行必须是正确无误的最终密文
#[] (lastroundkeys) 含义:已知的末尾轮密钥列表。什么都不知道填[]即可
# True (encrypt) 含义:目标算法的方向(加密/解密)。加密得到的数据填True,解密得到的数据填False
# False (outputbeforelastrounds) 含义:密文是否在最后一轮之前被截获。标准 AES 的最后一轮(第 10 轮)是没有列混淆(MixColumns)操作的,传入 False 代表密文就是标准的、跑完了全部 10 轮的最终密文。工具会按照“没有列混淆”的方程来解方程组。# 如果某些奇葩的变种题目,把密文在第 9 轮刚结束时就强行输出了(带有列混淆),你就需要传 True。
# verbose=2 (verbose) 含义:日志打印的详细程度。verbose 级别设为 2,意味着开启最高级别的 Debug 模式
phoenixAES.crack_file('tracefile', [], True, False, 2)

再使用aes_keyschedule去还原初始密钥即可

理论上来说,最少最少需要5份数据,但必须让错误覆盖到所有的 4 个组

DFA练习

2025 强网杯车联网 car music

解包之后定位关键逻辑:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
int8x16_t __fastcall Baes_Table_AES(Bytes15 *input, Bytes15 *out)
{
unsigned __int8 *xortable; // x11
__int64 v3; // x9
__int64 n9; // x10
char *v5; // x13
char *v6; // x14
char *v7; // x15
char *v8; // x16
char *v9; // x17
char *v10; // x2
char *v11; // x3
__int64 n24576; // x19
unsigned __int8 *input_1; // x20
__int64 v14; // x21
unsigned __int64 v15; // x25
unsigned __int64 v16; // x24
unsigned __int64 v17; // x23
unsigned __int64 v18; // x22
unsigned __int8 v19; // w26
unsigned __int8 v20; // w27
unsigned __int8 v21; // w8
__int64 v22; // x30
__int64 v23; // x1
char *v24; // x23
unsigned __int8 v25; // w12
unsigned __int64 v26; // x22
char *v27; // x12
unsigned __int64 v28; // x24
unsigned __int64 v29; // x25
unsigned __int64 v30; // x23
char v31; // w8
int8x16_t result; // q0
char v33; // w10
_BYTE *v34; // x11
__int64 v35; // x13
_BYTE *v36; // x12
_BYTE *v37; // x12
__int64 v38; // x13
_BYTE *v39; // x12
__int64 v40; // x13
_BYTE *v41; // x12
__int64 v42; // x13
_BYTE *v43; // x12
__int64 v44; // x13
_BYTE *v45; // x12
__int64 v46; // x13
_BYTE *v47; // x12
__int64 v48; // x13
_BYTE *v49; // x12
__int64 v50; // x13
_BYTE *v51; // x12
__int64 v52; // x13
_BYTE *v53; // x12
__int64 v54; // x13
_BYTE *v55; // x12
__int64 v56; // x13
_BYTE *v57; // x12
__int64 v58; // x13
_BYTE *v59; // x12
__int64 v60; // x13

xortable = (unsigned __int8 *)&::xortable;
v3 = 0;
n9 = 0;
v5 = (char *)&::xortable + (unsigned int)&stru_1700;
v6 = (char *)&::xortable + (unsigned int)&stru_15F8.r_info;
v7 = (char *)&::xortable + (unsigned int)&stru_14F0.r_addend;
v8 = (char *)&::xortable + (unsigned int)&stru_1400;
v9 = (char *)&::xortable + (unsigned int)&stru_12F8.r_info;
v10 = (char *)&::xortable + (unsigned int)&stru_11F0.r_addend;
v11 = (char *)&::xortable + (unsigned int)&stru_1100;
_ReadStatusReg(TPIDR_EL0);
do // 循环9次
{
n24576 = 0;
input_1 = (unsigned __int8 *)&input->field_0[3];
v14 = v3;
*(int8x16_t *)input->field_0 = vqtbl1q_s8(*(int8x16_t *)input->field_0, (int8x16_t)xmmword_7360);// shiftRows
do // 循环4次
{
v15 = *(unsigned int *)((char *)&tyibox[*(input_1 - 3)] + v14);
v16 = *(unsigned int *)((char *)&tyibox[*(input_1 - 2) + 256] + v14);
v17 = *(unsigned int *)((char *)&tyibox[*(input_1 - 1) + 512] + v14);
v18 = *(unsigned int *)((char *)&tyibox[*input_1 + 768] + v14);
v19 = xortable[16 * xortable[((v16 >> 24) & 0xF | (16 * ((v15 >> 24) & 0xF))) + 512 + n24576]
+ 1280
+ xortable[((v18 >> 24) & 0xF | (16 * ((v17 >> 24) & 0xF))) + 768 + n24576]
+ n24576]
| (16
* xortable[16 * xortable[((v15 >> 24) & 0xF0 | (v16 >> 28)) + n24576]
+ 1024
+ xortable[((v17 >> 24) & 0xF0 | (v18 >> 28)) + 256 + n24576]
+ n24576]);
*(input_1 - 3) = v19;
v20 = xortable[16 * xortable[((v16 >> 16) & 0xF | (16 * ((v15 >> 16) & 0xF))) + 2048 + n24576]
+ 2816
+ xortable[((v18 >> 16) & 0xF | (16 * ((v17 >> 16) & 0xF))) + 2304 + n24576]
+ n24576]
| (16
* xortable[16 * xortable[((v16 >> 20) & 0xF | (16 * ((v15 >> 20) & 0xF))) + 1536 + n24576]
+ 2560
+ xortable[((v18 >> 20) & 0xF | (16 * ((v17 >> 20) & 0xF))) + 1792 + n24576]
+ n24576]);
*(input_1 - 2) = v20;
v21 = v11[16 * xortable[((v16 >> 8) & 0xF | (16 * ((v15 >> 8) & 0xF))) + 3584 + n24576]
+ xortable[((v18 >> 8) & 0xF | (16 * ((v17 >> 8) & 0xF))) + 3840 + n24576]
+ n24576]
| (16
* xortable[16
* xortable[(((unsigned __int16)v16 >> 12) & 0xF | (16LL * (((unsigned __int16)v15 >> 12) & 0xF)))
+ 3072
+ n24576]
+ 4096
+ xortable[(((unsigned __int16)v18 >> 12) & 0xF | (16LL * (((unsigned __int16)v17 >> 12) & 0xF)))
+ 3328
+ n24576]
+ n24576]);
v22 = ((unsigned __int8)v18 >> 4) | (16LL * ((unsigned __int8)v17 >> 4));
*(input_1 - 1) = v21;
v23 = (unsigned __int8)v7[(v18 & 0xF | (16 * (v17 & 0xF))) + n24576]
+ 16LL * (unsigned __int8)v8[(v16 & 0xF | (16 * (v15 & 0xF))) + n24576];
v24 = (char *)&mixBijOut[v20] + v14;
v25 = v5[v23 + n24576]
| (16
* v6[16 * (unsigned __int8)v10[(((unsigned __int8)v16 >> 4) | (16LL * ((unsigned __int8)v15 >> 4))) + n24576]
+ (unsigned __int8)v9[v22 + n24576]
+ n24576]);
*input_1 = v25;
v26 = *(unsigned int *)((char *)&mixBijOut[v19] + v14);
v27 = (char *)&mixBijOut[v25] + v14;
v28 = *(unsigned int *)((char *)&mixBijOut[v21 + 512] + v14);
v14 += 4096;
v29 = *((unsigned int *)v24 + 256);
v30 = *((unsigned int *)v27 + 768);
*(input_1 - 3) = xortable[16 * xortable[((v29 >> 24) & 0xF | (16 * ((v26 >> 24) & 0xF))) + 512 + n24576]
+ 1280
+ xortable[((v30 >> 24) & 0xF | (16 * ((v28 >> 24) & 0xF))) + 768 + n24576]
+ n24576]
| (16
* xortable[16 * xortable[((v26 >> 24) & 0xF0 | (v29 >> 28)) + n24576]
+ 1024
+ xortable[((v28 >> 24) & 0xF0 | (v30 >> 28)) + 256 + n24576]
+ n24576]);
*(input_1 - 2) = xortable[16 * xortable[((v29 >> 16) & 0xF | (16 * ((v26 >> 16) & 0xF))) + 2048 + n24576]
+ 2816
+ xortable[((v30 >> 16) & 0xF | (16 * ((v28 >> 16) & 0xF))) + 2304 + n24576]
+ n24576]
| (16
* xortable[16 * xortable[((v29 >> 20) & 0xF | (16 * ((v26 >> 20) & 0xF))) + 1536 + n24576]
+ 2560
+ xortable[((v30 >> 20) & 0xF | (16 * ((v28 >> 20) & 0xF))) + 1792 + n24576]
+ n24576]);
*(input_1 - 1) = v11[16 * xortable[((v29 >> 8) & 0xF | (16 * ((v26 >> 8) & 0xF))) + 3584 + n24576]
+ xortable[((v30 >> 8) & 0xF | (16 * ((v28 >> 8) & 0xF))) + 3840 + n24576]
+ n24576]
| (16
* xortable[16
* xortable[(((unsigned __int16)v29 >> 12) & 0xF
| (16LL * (((unsigned __int16)v26 >> 12) & 0xF)))
+ 3072
+ n24576]
+ 4096
+ xortable[(((unsigned __int16)v30 >> 12) & 0xF
| (16LL * (((unsigned __int16)v28 >> 12) & 0xF)))
+ 3328
+ n24576]
+ n24576]);
LOBYTE(v27) = v6[16
* (unsigned __int8)v10[(((unsigned __int8)v29 >> 4) | (16LL * ((unsigned __int8)v26 >> 4)))
+ n24576]
+ (unsigned __int8)v9[(((unsigned __int8)v30 >> 4) | (16LL * ((unsigned __int8)v28 >> 4))) + n24576]
+ n24576];
v31 = v5[16 * (unsigned __int8)v8[(v29 & 0xF | (16 * (v26 & 0xF))) + n24576]
+ (unsigned __int8)v7[(v30 & 0xF | (16 * (v28 & 0xF))) + n24576]
+ n24576];
n24576 += 6144;
*input_1 = v31 | (16 * (_BYTE)v27);
input_1 += 4;
}
while ( n24576 != 24576 );
++n9;
xortable += 24576;
v5 += 24576;
v6 += 24576;
v7 += 24576;
v8 += 24576;
v9 += 24576;
v10 += 24576;
v11 += 24576;
v3 += 16384;
}
while ( n9 != 9 );
result = vqtbl1q_s8(*(int8x16_t *)input->field_0, (int8x16_t)xmmword_7360);
*(int8x16_t *)input->field_0 = result;
v33 = Tbox[(unsigned __int8)input->field_0[0]];
v34 = &Tbox[(unsigned __int8)input->field_0[1]];
v35 = (unsigned __int8)input->field_0[3];
v36 = &Tbox[(unsigned __int8)input->field_0[2]];
input->field_0[0] = v33;
input->field_0[1] = v34[256];
LOBYTE(v34) = v36[512];
v37 = &Tbox[v35];
v38 = (unsigned __int8)input->field_0[4];
input->field_0[2] = (char)v34;
LOBYTE(v34) = v37[768];
v39 = &Tbox[v38];
v40 = (unsigned __int8)input->field_0[5];
input->field_0[3] = (char)v34;
LOBYTE(v34) = v39[1024];
v41 = &Tbox[v40];
v42 = (unsigned __int8)input->field_0[6];
input->field_0[4] = (char)v34;
LOBYTE(v34) = v41[1280];
v43 = &Tbox[v42];
v44 = (unsigned __int8)input->field_0[7];
input->field_0[5] = (char)v34;
LOBYTE(v34) = v43[1536];
v45 = &Tbox[v44];
v46 = (unsigned __int8)input->field_0[8];
input->field_0[6] = (char)v34;
LOBYTE(v34) = v45[1792];
v47 = &Tbox[v46];
v48 = (unsigned __int8)input->field_0[9];
input->field_0[7] = (char)v34;
LOBYTE(v34) = v47[2048];
v49 = &Tbox[v48];
v50 = (unsigned __int8)input->field_0[10];
input->field_0[8] = (char)v34;
LOBYTE(v34) = v49[2304];
v51 = &Tbox[v50];
v52 = (unsigned __int8)input->field_0[11];
input->field_0[9] = (char)v34;
LOBYTE(v34) = v51[2560];
v53 = &Tbox[v52];
v54 = (unsigned __int8)input->field_0[12];
input->field_0[10] = (char)v34;
LOBYTE(v34) = v53[2816];
v55 = &Tbox[v54];
v56 = (unsigned __int8)input->field_0[13];
input->field_0[11] = (char)v34;
LOBYTE(v34) = v55[3072];
v57 = &Tbox[v56];
v58 = (unsigned __int8)input->field_0[14];
input->field_0[12] = (char)v34;
LOBYTE(v34) = v57[3328];
v59 = &Tbox[v58];
v60 = (unsigned __int8)input[1].field_0[0];
input->field_0[13] = (char)v34;
input->field_0[14] = v59[3584];
input[1].field_0[0] = Tbox[v60 + 3840];
out->field_0[0] = v33;
out->field_0[1] = input->field_0[1];
out->field_0[2] = input->field_0[2];
out->field_0[3] = input->field_0[3];
out->field_0[4] = input->field_0[4];
out->field_0[5] = input->field_0[5];
out->field_0[6] = input->field_0[6];
out->field_0[7] = input->field_0[7];
out->field_0[8] = input->field_0[8];
out->field_0[9] = input->field_0[9];
out->field_0[10] = input->field_0[10];
out->field_0[11] = input->field_0[11];
out->field_0[12] = input->field_0[12];
out->field_0[13] = input->field_0[13];
out->field_0[14] = input->field_0[14];
out[1].field_0[0] = input[1].field_0[0];
return result;
}

很明显的基于表的AES,但是多了一个mixBijOut

解题脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
// open('Xortable.bin', 'wb').write(ida_bytes.get_bytes(0x84610, 9*96*16*16))
// open('Tyibox.bin', 'wb').write(ida_bytes.get_bytes(0x38610, 4*9*16*256))
// open('Tbox.bin', 'wb').write(ida_bytes.get_bytes(0x37610, 16*256))
// open('MixBijOut.bin', 'wb').write(ida_bytes.get_bytes(0x5C610, 4*10*16*256))
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

typedef unsigned char u8;
typedef unsigned int u32;

u8 Tbox_round9[16][256];
u32 Tyibox[9][16][256];
u32 MixBijOut[10][16][256];
u8 Xortable[9][96][16][16];

void shiftRows(u8 state[16]){
u8 out[16];
int shiftRows[16] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11};
for(int i = 0; i < 16; i++){
out[i] = state[shiftRows[i]];
}
memcpy(state, out, sizeof(out));
}

void GetxorTable() {
for (int i = 0; i < 9; i++) {
for (int j = 0; j < 96; j++) {
for (int x = 0; x < 16; x++) { //2的4次方=16
for (int y = 0; y < 16; y++) {
Xortable[i][j][x][y] = x ^ y;
}
}
}
}
}

void loadTables(){
FILE *fp;
// fp = fopen("D:\\vs\\vscode03_c\\RC4_DES_AES\\DFA\\Xortable.bin", "rb");
// if(!fp) { printf("Error: Cannot find Xortable.bin\n"); exit(1); }
// fread(Xortable, sizeof(u8), 9*96*16*16, fp);
// fclose(fp);


fp = fopen("D:\\vs\\vscode03_c\\RC4_DES_AES\\DFA\\Tyibox.bin", "rb");
if(!fp) { printf("Error: Cannot find Tyibox.bin\n"); exit(1); }
fread(Tyibox, sizeof(u32), 9*16*256, fp);
fclose(fp);

fp = fopen("D:\\vs\\vscode03_c\\RC4_DES_AES\\DFA\\Tbox.bin", "rb");
if(!fp) { printf("Error: Cannot find Tbox.bin\n"); exit(1); }
fread(Tbox_round9, sizeof(u8), 16*256, fp);
fclose(fp);

fp = fopen("D:\\vs\\vscode03_c\\RC4_DES_AES\\DFA\\MixBijOut.bin", "rb");
if(!fp) { printf("Error: Cannot find MixBijOut.bin\n"); exit(1); }
fread(MixBijOut, sizeof(u32), 10*16*256, fp);
fclose(fp);
}

void aes_128_table_encrypt(u8 plaintext[16], u8 ciphertext[16], int rand_idx, u8 rand_bytes) {
u32 a,b,c,d,aa,bb,cc,dd;

for(int i = 0; i < 9; i++){
// 最后一轮进行DFA
if(i == 8 && rand_idx != -1){
plaintext[rand_idx] = rand_bytes;
}

shiftRows(plaintext);
for (int j = 0; j < 4; j++){
a = Tyibox[i][j*4+0][plaintext[j*4+0]];
b = Tyibox[i][j*4+1][plaintext[j*4+1]];
c = Tyibox[i][j*4+2][plaintext[j*4+2]];
d = Tyibox[i][j*4+3][plaintext[j*4+3]];

aa = Xortable[i][j*24+0][(a>>28)&0xf][(b>>28)&0xf];
bb = Xortable[i][j*24+1][(c>>28)&0xf][(d>>28)&0xf];
cc = Xortable[i][j*24+2][(a>>24)&0xf][(b>>24)&0xf];
dd = Xortable[i][j*24+3][(c>>24)&0xf][(d>>24)&0xf];

plaintext[j*4+0] = (Xortable[i][j*24+4][aa][bb] << 4) | Xortable[i][j*24+5][cc][dd];

aa = Xortable[i][j*24+6][(a>>20)&0xf][(b>>20)&0xf];
bb = Xortable[i][j*24+7][(c>>20)&0xf][(d>>20)&0xf];
cc = Xortable[i][j*24+8][(a>>16)&0xf][(b>>16)&0xf];
dd = Xortable[i][j*24+9][(c>>16)&0xf][(d>>16)&0xf];

plaintext[j*4+1] = (Xortable[i][j*24+10][aa][bb] << 4) | Xortable[i][j*24+11][cc][dd];

aa = Xortable[i][j*24+12][(a>>12)&0xf][(b>>12)&0xf];
bb = Xortable[i][j*24+13][(c>>12)&0xf][(d>>12)&0xf];
cc = Xortable[i][j*24+14][(a>>8)&0xf][(b>>8)&0xf];
dd = Xortable[i][j*24+15][(c>>8)&0xf][(d>>8)&0xf];

plaintext[j*4+2] = (Xortable[i][j*24+16][aa][bb] << 4) | Xortable[i][j*24+17][cc][dd];

aa = Xortable[i][j*24+18][(a>>4)&0xf][(b>>4)&0xf];
bb = Xortable[i][j*24+19][(c>>4)&0xf][(d>>4)&0xf];
cc = Xortable[i][j*24+20][(a>>0)&0xf][(b>>0)&0xf];
dd = Xortable[i][j*24+21][(c>>0)&0xf][(d>>0)&0xf];

plaintext[j*4+3] = (Xortable[i][j*24+22][aa][bb] << 4) | Xortable[i][j*24+23][cc][dd];


a = MixBijOut[i][j*4+0][plaintext[j*4+0]];
b = MixBijOut[i][j*4+1][plaintext[j*4+1]];
c = MixBijOut[i][j*4+2][plaintext[j*4+2]];
d = MixBijOut[i][j*4+3][plaintext[j*4+3]];

aa = Xortable[i][j*24+0][(a>>28)&0xf][(b>>28)&0xf];
bb = Xortable[i][j*24+1][(c>>28)&0xf][(d>>28)&0xf];
cc = Xortable[i][j*24+2][(a>>24)&0xf][(b>>24)&0xf];
dd = Xortable[i][j*24+3][(c>>24)&0xf][(d>>24)&0xf];

plaintext[j*4+0] = (Xortable[i][j*24+4][aa][bb] << 4) | Xortable[i][j*24+5][cc][dd];

aa = Xortable[i][j*24+6][(a>>20)&0xf][(b>>20)&0xf];
bb = Xortable[i][j*24+7][(c>>20)&0xf][(d>>20)&0xf];
cc = Xortable[i][j*24+8][(a>>16)&0xf][(b>>16)&0xf];
dd = Xortable[i][j*24+9][(c>>16)&0xf][(d>>16)&0xf];

plaintext[j*4+1] = (Xortable[i][j*24+10][aa][bb] << 4) | Xortable[i][j*24+11][cc][dd];

aa = Xortable[i][j*24+12][(a>>12)&0xf][(b>>12)&0xf];
bb = Xortable[i][j*24+13][(c>>12)&0xf][(d>>12)&0xf];
cc = Xortable[i][j*24+14][(a>>8)&0xf][(b>>8)&0xf];
dd = Xortable[i][j*24+15][(c>>8)&0xf][(d>>8)&0xf];

plaintext[j*4+2] = (Xortable[i][j*24+16][aa][bb] << 4) | Xortable[i][j*24+17][cc][dd];

aa = Xortable[i][j*24+18][(a>>4)&0xf][(b>>4)&0xf];
bb = Xortable[i][j*24+19][(c>>4)&0xf][(d>>4)&0xf];
cc = Xortable[i][j*24+20][(a>>0)&0xf][(b>>0)&0xf];
dd = Xortable[i][j*24+21][(c>>0)&0xf][(d>>0)&0xf];

plaintext[j*4+3] = (Xortable[i][j*24+22][aa][bb] << 4) | Xortable[i][j*24+23][cc][dd];
}
}

shiftRows(plaintext);

for(int k = 0; k < 16; k++){
plaintext[k] = Tbox_round9[k][plaintext[k]];
}


for(int z = 0; z < 16; z++){
ciphertext[z] = plaintext[z];
}
}


void main() {
loadTables();
GetxorTable();
srand(time(NULL));
u8 plaintext[16] = {0};
u8 ciphertext[16] = {0};
FILE *out = fopen("D:\\vs\\vscode03_c\\RC4_DES_AES\\DFA\\output.txt", "w");
if(!out) { printf("Error: Cannot open output.txt\n"); exit(1); }

for(int i = 0; i < 96; i++){
memset(plaintext, 0, sizeof(plaintext));
memset(ciphertext, 0, sizeof(ciphertext));
int rand_idx = i == 0 ? -1 : rand() % 16;
aes_128_table_encrypt(plaintext, ciphertext, rand_idx, rand() % 256);

// 打印
for(int j = 0; j < 16; j++){
fprintf(out, "%02x", ciphertext[j]);
}
fprintf(out, "\n");
}

fclose(out);
}
]

然后:

1
2
import phoenixAES
phoenixAES.crack_file('D://vs//vscode03_c//RC4_DES_AES//DFA//output.txt', [], True, False, verbose=2)

这样即可得到结果:4D31056BCB6C74BF923ACC9E96CD3EF6

image-20260306232730323

2024 强网杯 ez_vm

2023 强网杯 dotdot

HKCERT CTF 2025 findkey

这个题就是一个AES,去过混淆之后大概可以写出下面的样子:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// 更合理的类型
uint8_t *Enc(uint8_t plaintext[16], uint8_t ciphertext[16], uint8_t key[16])
{
uint8_t state[16];
uint8_t tmp[16];
uint8_t roundKeys[11][16]; // 176 bytes total
__m128i expected[16]; // 16 组 16-byte 期望值
int round;
int idx;

white_Sbox();
keyExpansion(key, roundKeys);

memcpy(state, plaintext, 16);
addRoundKey(state, roundKeys[0]);

// round 1..8
for (round = 1; round <= 8; ++round) {
subBytes(state);
shiftRows(state);
mixColumns(state);
addRoundKey(state, roundKeys[round]); // expandKey + 16 * round
}

// round 9 前半
subBytes(state);
shiftRows(state);

// 插入的白盒自校验
memcpy(expected, src_, sizeof(expected));

for (idx = 0; idx < 16; ++idx) {
// 固定测试向量
*(uint64_t *)&tmp[0] = 0x0FC2C4EB7D23BA45ULL;
*(uint64_t *)&tmp[8] = 0xB464F693616239DAULL;

// 第 idx 个字节改成 0
tmp[idx] = 0;

// 从“第9轮 shiftRows 之后”开始跑到最终输出
mixColumns(tmp);
addRoundKey(tmp, roundKeys[9]);

subBytes(tmp);
shiftRows(tmp);
addRoundKey(tmp, roundKeys[10]);

// 和预置答案表比较
if (memcmp(tmp, &expected[idx], 16) != 0) {
match = 0; // 全局/外部标志
}
}

// round 9 后半
mixColumns(state);
addRoundKey(state, roundKeys[9]);

// round 10
subBytes(state);
shiftRows(state);
addRoundKey(state, roundKeys[10]);

memcpy(ciphertext, state, 16);
return ciphertext;
}

这个写法,就是对tmp的 16 个字节分别做一次 byte -> 0 的单字节故障,共16个字节

也就是告诉了我们,第八轮的结果是多少:tmp

我们的目标是获取key_10,然后恢复key

令:

  • x_i:CHECK_SEED 的第 i 个字节置 0 后的 16 字节状态
  • a_i = MixColumns(x_i)
  • y_i = CHECK_TABLE[i] = src_[i]
  • K9 = rk[9]
  • K10 = rk[10]

那么 y_i = shiftRow(subBytes(a_i xor K9)) xor K10

从字节的角度来看,设p是字节的位置

u[p] = a_i[p] xor K9[p] ———> v[p] = SBOX(u[p]) = SBOX(a_i[p] xor K9[p])

————> ShiftRows 之前的位置 p,经过 ShiftRows 之后跑到了位置 j

————> y_i[j] = v[p] xor K10[j] = SBOX(a_i[p] xor K9[p]) xor K10[j]

那么,对于固定的p,K9[p],K10[j],j都是固定的

不同的是不同的故障明文与密文

令:c_i = y_i[j] xor SBOX(a_i[p] xor guess_k9)

如果我们的guess_k9是对的,那么c_i = K10[j]

所以我们固定字节位置P,对16个不同的样本i进行遍历,所以算出来的16个c_j都y应该是一样的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
SBOX = [0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x1, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 0x4, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x5, 0x9a, 0x7, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 0x9, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x0, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x2, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0xc, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0xb, 0xdb, 0xe0, 0x32, 0x3a, 0xa, 0x49, 0x6, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x8, 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x3, 0xf6, 0xe, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0xd, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0xf, 0xb0, 0x54, 0xbb, 0x16]   # 256 bytes
SRC = [0x1f, 0x52, 0xdc, 0x4d, 0x2c, 0x84, 0x80, 0x91, 0xd6, 0x18, 0x5b, 0x4e, 0xba, 0xc3, 0xd, 0x71, 0x56, 0x52, 0xdc, 0x4d, 0x2c, 0x84, 0x80, 0xd8, 0xd6, 0x18, 0x72, 0x4e, 0xba, 0x61, 0xd, 0x71, 0x34, 0x52, 0xdc, 0x4d, 0x2c, 0x84, 0x80, 0x5c, 0xd6, 0x18, 0x43, 0x4e, 0xba, 0x65, 0xd, 0x71, 0xc9, 0x52, 0xdc, 0x4d, 0x2c, 0x84, 0x80, 0x28, 0xd6, 0x18, 0x61, 0x4e, 0xba, 0x50, 0xd, 0x71, 0xd4, 0xb, 0xdc, 0x4d, 0x9b, 0x84, 0x80, 0x77, 0xd6, 0x18, 0x7d, 0x34, 0xba, 0xca, 0x47, 0x71, 0xd4, 0x41, 0xdc, 0x4d, 0x94, 0x84, 0x80, 0x77, 0xd6, 0x18, 0x7d, 0xda, 0xba, 0xca, 0x62, 0x71, 0xd4, 0x36, 0xdc, 0x4d, 0x6, 0x84, 0x80, 0x77, 0xd6, 0x18, 0x7d, 0xe1, 0xba, 0xca, 0xac, 0x71, 0xd4, 0xc2, 0xdc, 0x4d, 0xe7, 0x84, 0x80, 0x77, 0xd6, 0x18, 0x7d, 0xea, 0xba, 0xca, 0x60, 0x71, 0xd4, 0x52, 0x6f, 0x4d, 0x2c, 0x37, 0x80, 0x77, 0x0, 0x18, 0x7d, 0x4e, 0xba, 0xca, 0xd, 0x80, 0xd4, 0x52, 0x9, 0x4d, 0x2c, 0x3a, 0x80, 0x77, 0x40, 0x18, 0x7d, 0x4e, 0xba, 0xca, 0xd, 0x7b, 0xd4, 0x52, 0x62, 0x4d, 0x2c, 0x93, 0x80, 0x77, 0x54, 0x18, 0x7d, 0x4e, 0xba, 0xca, 0xd, 0x36, 0xd4, 0x52, 0xf0, 0x4d, 0x2c, 0x54, 0x80, 0x77, 0x10, 0x18, 0x7d, 0x4e, 0xba, 0xca, 0xd, 0x17, 0xd4, 0x52, 0xdc, 0x1f, 0x2c, 0x84, 0xc8, 0x77, 0xd6, 0x4b, 0x7d, 0x4e, 0x17, 0xca, 0xd, 0x71, 0xd4, 0x52, 0xdc, 0x75, 0x2c, 0x84, 0x76, 0x77, 0xd6, 0x25, 0x7d, 0x4e, 0xcf, 0xca, 0xd, 0x71, 0xd4, 0x52, 0xdc, 0x2b, 0x2c, 0x84, 0x22, 0x77, 0xd6, 0x8, 0x7d, 0x4e, 0x16, 0xca, 0xd, 0x71, 0xd4, 0x52, 0xdc, 0x3e, 0x2c, 0x84, 0x33, 0x77, 0xd6, 0x81, 0x7d, 0x4e, 0x2d, 0xca, 0xd, 0x71] # 256 bytes = 16 * 16

CHECK_SEED = [0x45,0xBA,0x23,0x7D,0xEB,0xC4,0xC2,0x0F,0xDA,0x39,0x62,0x61,0x93,0xF6,0x64,0xB4]
SR_MAP = [0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3]
RCON = [0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1B,0x36]

def xtime(a):
a <<= 1
if a & 0x100:
a ^= 0x11B
return a & 0xFF

def mul(a, b):
r = 0
while b:
if b & 1:
r ^= a
a = xtime(a)
b >>= 1
return r

def mix_columns(s):
out = s[:]
for c in range(4):
a0,a1,a2,a3 = s[c*4:(c+1)*4]
out[c*4:(c+1)*4] = [
mul(a0,2)^mul(a1,3)^a2^a3,
a0^mul(a1,2)^mul(a2,3)^a3,
a0^a1^mul(a2,2)^mul(a3,3),
mul(a0,3)^a1^a2^mul(a3,2),
]
return out

def recover():
T = [SRC[i*16:(i+1)*16] for i in range(16)]
A = []
for i in range(16):
x = CHECK_SEED[:]
x[i] = 0
A.append(mix_columns(x))

rk9 = [0]*16
rk10 = [0]*16
for p in range(16):
j = SR_MAP[p]
hit = []
for k9 in range(256):
k10 = T[0][j] ^ SBOX[A[0][p] ^ k9]
if all((T[i][j] ^ SBOX[A[i][p] ^ k9]) == k10 for i in range(16)):
hit.append((k9, k10))
assert len(hit) == 1, (p, hit)
rk9[p], rk10[j] = hit[0]
return rk9, rk10

def inv_key_schedule_128(rk10):
W = [None] * 44
for i in range(4):
W[40+i] = rk10[i*4:(i+1)*4]
for i in range(43, 3, -1):
if i % 4:
W[i-4] = [W[i][k] ^ W[i-1][k] for k in range(4)]
else:
t = W[i-1][1:] + W[i-1][:1]
t = [SBOX[b] for b in t]
t[0] ^= RCON[i//4 - 1]
W[i-4] = [W[i][k] ^ t[k] for k in range(4)]
return sum(W[:4], [])

rk9, rk10 = recover()
key = inv_key_schedule_128(rk10)

print("rk9 :", ''.join(f'{b:02x}' for b in rk9))
print("rk10:", ''.join(f'{b:02x}' for b in rk10))
print("key :", ''.join(f'{b:02x}' for b in key))
print("ascii:", bytes(key).decode('ascii'))