aes/ni/
aes256.rs

1use super::{arch::*, utils::*};
2use crate::{Block, Block8};
3use cipher::inout::InOut;
4use core::mem;
5
6/// AES-192 round keys
7pub(super) type RoundKeys = [__m128i; 15];
8
9#[inline]
10#[target_feature(enable = "aes")]
11pub(super) unsafe fn encrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) {
12    let (in_ptr, out_ptr) = block.into_raw();
13    let mut b = _mm_loadu_si128(in_ptr as *const __m128i);
14    b = _mm_xor_si128(b, keys[0]);
15    b = _mm_aesenc_si128(b, keys[1]);
16    b = _mm_aesenc_si128(b, keys[2]);
17    b = _mm_aesenc_si128(b, keys[3]);
18    b = _mm_aesenc_si128(b, keys[4]);
19    b = _mm_aesenc_si128(b, keys[5]);
20    b = _mm_aesenc_si128(b, keys[6]);
21    b = _mm_aesenc_si128(b, keys[7]);
22    b = _mm_aesenc_si128(b, keys[8]);
23    b = _mm_aesenc_si128(b, keys[9]);
24    b = _mm_aesenc_si128(b, keys[10]);
25    b = _mm_aesenc_si128(b, keys[11]);
26    b = _mm_aesenc_si128(b, keys[12]);
27    b = _mm_aesenc_si128(b, keys[13]);
28    b = _mm_aesenclast_si128(b, keys[14]);
29    _mm_storeu_si128(out_ptr as *mut __m128i, b);
30}
31
32#[inline]
33#[target_feature(enable = "aes")]
34pub(super) unsafe fn encrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) {
35    let (in_ptr, out_ptr) = blocks.into_raw();
36    let mut b = load8(in_ptr);
37    xor8(&mut b, keys[0]);
38    aesenc8(&mut b, keys[1]);
39    aesenc8(&mut b, keys[2]);
40    aesenc8(&mut b, keys[3]);
41    aesenc8(&mut b, keys[4]);
42    aesenc8(&mut b, keys[5]);
43    aesenc8(&mut b, keys[6]);
44    aesenc8(&mut b, keys[7]);
45    aesenc8(&mut b, keys[8]);
46    aesenc8(&mut b, keys[9]);
47    aesenc8(&mut b, keys[10]);
48    aesenc8(&mut b, keys[11]);
49    aesenc8(&mut b, keys[12]);
50    aesenc8(&mut b, keys[13]);
51    aesenclast8(&mut b, keys[14]);
52    store8(out_ptr, b);
53}
54
55#[inline]
56#[target_feature(enable = "aes")]
57pub(super) unsafe fn decrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) {
58    let (in_ptr, out_ptr) = block.into_raw();
59    let mut b = _mm_loadu_si128(in_ptr as *const __m128i);
60    b = _mm_xor_si128(b, keys[14]);
61    b = _mm_aesdec_si128(b, keys[13]);
62    b = _mm_aesdec_si128(b, keys[12]);
63    b = _mm_aesdec_si128(b, keys[11]);
64    b = _mm_aesdec_si128(b, keys[10]);
65    b = _mm_aesdec_si128(b, keys[9]);
66    b = _mm_aesdec_si128(b, keys[8]);
67    b = _mm_aesdec_si128(b, keys[7]);
68    b = _mm_aesdec_si128(b, keys[6]);
69    b = _mm_aesdec_si128(b, keys[5]);
70    b = _mm_aesdec_si128(b, keys[4]);
71    b = _mm_aesdec_si128(b, keys[3]);
72    b = _mm_aesdec_si128(b, keys[2]);
73    b = _mm_aesdec_si128(b, keys[1]);
74    b = _mm_aesdeclast_si128(b, keys[0]);
75    _mm_storeu_si128(out_ptr as *mut __m128i, b);
76}
77
78#[inline]
79#[target_feature(enable = "aes")]
80pub(super) unsafe fn decrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) {
81    let (in_ptr, out_ptr) = blocks.into_raw();
82    let mut b = load8(in_ptr);
83    xor8(&mut b, keys[14]);
84    aesdec8(&mut b, keys[13]);
85    aesdec8(&mut b, keys[12]);
86    aesdec8(&mut b, keys[11]);
87    aesdec8(&mut b, keys[10]);
88    aesdec8(&mut b, keys[9]);
89    aesdec8(&mut b, keys[8]);
90    aesdec8(&mut b, keys[7]);
91    aesdec8(&mut b, keys[6]);
92    aesdec8(&mut b, keys[5]);
93    aesdec8(&mut b, keys[4]);
94    aesdec8(&mut b, keys[3]);
95    aesdec8(&mut b, keys[2]);
96    aesdec8(&mut b, keys[1]);
97    aesdeclast8(&mut b, keys[0]);
98    store8(out_ptr, b);
99}
100
101macro_rules! expand_round {
102    ($keys:expr, $pos:expr, $round:expr) => {
103        let mut t1 = $keys[$pos - 2];
104        let mut t2;
105        let mut t3 = $keys[$pos - 1];
106        let mut t4;
107
108        t2 = _mm_aeskeygenassist_si128(t3, $round);
109        t2 = _mm_shuffle_epi32(t2, 0xff);
110        t4 = _mm_slli_si128(t1, 0x4);
111        t1 = _mm_xor_si128(t1, t4);
112        t4 = _mm_slli_si128(t4, 0x4);
113        t1 = _mm_xor_si128(t1, t4);
114        t4 = _mm_slli_si128(t4, 0x4);
115        t1 = _mm_xor_si128(t1, t4);
116        t1 = _mm_xor_si128(t1, t2);
117
118        $keys[$pos] = t1;
119
120        t4 = _mm_aeskeygenassist_si128(t1, 0x00);
121        t2 = _mm_shuffle_epi32(t4, 0xaa);
122        t4 = _mm_slli_si128(t3, 0x4);
123        t3 = _mm_xor_si128(t3, t4);
124        t4 = _mm_slli_si128(t4, 0x4);
125        t3 = _mm_xor_si128(t3, t4);
126        t4 = _mm_slli_si128(t4, 0x4);
127        t3 = _mm_xor_si128(t3, t4);
128        t3 = _mm_xor_si128(t3, t2);
129
130        $keys[$pos + 1] = t3;
131    };
132}
133
134macro_rules! expand_round_last {
135    ($keys:expr, $pos:expr, $round:expr) => {
136        let mut t1 = $keys[$pos - 2];
137        let mut t2;
138        let t3 = $keys[$pos - 1];
139        let mut t4;
140
141        t2 = _mm_aeskeygenassist_si128(t3, $round);
142        t2 = _mm_shuffle_epi32(t2, 0xff);
143        t4 = _mm_slli_si128(t1, 0x4);
144        t1 = _mm_xor_si128(t1, t4);
145        t4 = _mm_slli_si128(t4, 0x4);
146        t1 = _mm_xor_si128(t1, t4);
147        t4 = _mm_slli_si128(t4, 0x4);
148        t1 = _mm_xor_si128(t1, t4);
149        t1 = _mm_xor_si128(t1, t2);
150
151        $keys[$pos] = t1;
152    };
153}
154
155#[inline(always)]
156pub(super) unsafe fn expand_key(key: &[u8; 32]) -> RoundKeys {
157    // SAFETY: `RoundKeys` is a `[__m128i; 15]` which can be initialized
158    // with all zeroes.
159    let mut keys: RoundKeys = mem::zeroed();
160
161    let kp = key.as_ptr() as *const __m128i;
162    keys[0] = _mm_loadu_si128(kp);
163    keys[1] = _mm_loadu_si128(kp.add(1));
164
165    expand_round!(keys, 2, 0x01);
166    expand_round!(keys, 4, 0x02);
167    expand_round!(keys, 6, 0x04);
168    expand_round!(keys, 8, 0x08);
169    expand_round!(keys, 10, 0x10);
170    expand_round!(keys, 12, 0x20);
171    expand_round_last!(keys, 14, 0x40);
172
173    keys
174}
175
176#[inline]
177#[target_feature(enable = "aes")]
178pub(super) unsafe fn inv_expanded_keys(keys: &RoundKeys) -> RoundKeys {
179    [
180        keys[0],
181        _mm_aesimc_si128(keys[1]),
182        _mm_aesimc_si128(keys[2]),
183        _mm_aesimc_si128(keys[3]),
184        _mm_aesimc_si128(keys[4]),
185        _mm_aesimc_si128(keys[5]),
186        _mm_aesimc_si128(keys[6]),
187        _mm_aesimc_si128(keys[7]),
188        _mm_aesimc_si128(keys[8]),
189        _mm_aesimc_si128(keys[9]),
190        _mm_aesimc_si128(keys[10]),
191        _mm_aesimc_si128(keys[11]),
192        _mm_aesimc_si128(keys[12]),
193        _mm_aesimc_si128(keys[13]),
194        keys[14],
195    ]
196}