Security Scol plugin
aria.cpp
1// aria.cpp - written and placed in the public domain by Jeffrey Walton
2
3#include "pch.h"
4#include "config.h"
5
6#include "aria.h"
7#include "misc.h"
8#include "cpu.h"
9
10#if CRYPTOPP_SSE2_INTRIN_AVAILABLE
11# define CRYPTOPP_ENABLE_ARIA_SSE2_INTRINSICS 1
12#endif
13
14#if CRYPTOPP_SSSE3_AVAILABLE
15# define CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS 1
16#endif
17
18// GCC cast warning. Note: this is used on round key table,
19// which is word32 and naturally aligned.
20#define UINT32_CAST(x) ((word32 *)(void *)(x))
21
22NAMESPACE_BEGIN(CryptoPP)
23NAMESPACE_BEGIN(ARIATab)
24
25extern const word32 S1[256];
26extern const word32 S2[256];
27extern const word32 X1[256];
28extern const word32 X2[256];
29extern const word32 KRK[3][4];
30
31NAMESPACE_END
32NAMESPACE_END
33
34NAMESPACE_BEGIN(CryptoPP)
35
36using CryptoPP::ARIATab::S1;
37using CryptoPP::ARIATab::S2;
38using CryptoPP::ARIATab::X1;
39using CryptoPP::ARIATab::X2;
40using CryptoPP::ARIATab::KRK;
41
42inline byte ARIA_BRF(const word32 x, const int y) {
43 return static_cast<byte>(GETBYTE(x, y));
44}
45
46// Key XOR Layer
47#define ARIA_KXL { \
48 typedef BlockGetAndPut<word32, NativeByteOrder, true, true> NativeBlock; \
49 NativeBlock::Put(rk, t)(t[0])(t[1])(t[2])(t[3]); \
50 }
51
52// S-Box Layer 1 + M
53#define SBL1_M(T0,T1,T2,T3) { \
54 T0=S1[ARIA_BRF(T0,3)]^S2[ARIA_BRF(T0,2)]^X1[ARIA_BRF(T0,1)]^X2[ARIA_BRF(T0,0)]; \
55 T1=S1[ARIA_BRF(T1,3)]^S2[ARIA_BRF(T1,2)]^X1[ARIA_BRF(T1,1)]^X2[ARIA_BRF(T1,0)]; \
56 T2=S1[ARIA_BRF(T2,3)]^S2[ARIA_BRF(T2,2)]^X1[ARIA_BRF(T2,1)]^X2[ARIA_BRF(T2,0)]; \
57 T3=S1[ARIA_BRF(T3,3)]^S2[ARIA_BRF(T3,2)]^X1[ARIA_BRF(T3,1)]^X2[ARIA_BRF(T3,0)]; \
58 }
59
60// S-Box Layer 2 + M
61#define SBL2_M(T0,T1,T2,T3) { \
62 T0=X1[ARIA_BRF(T0,3)]^X2[ARIA_BRF(T0,2)]^S1[ARIA_BRF(T0,1)]^S2[ARIA_BRF(T0,0)]; \
63 T1=X1[ARIA_BRF(T1,3)]^X2[ARIA_BRF(T1,2)]^S1[ARIA_BRF(T1,1)]^S2[ARIA_BRF(T1,0)]; \
64 T2=X1[ARIA_BRF(T2,3)]^X2[ARIA_BRF(T2,2)]^S1[ARIA_BRF(T2,1)]^S2[ARIA_BRF(T2,0)]; \
65 T3=X1[ARIA_BRF(T3,3)]^X2[ARIA_BRF(T3,2)]^S1[ARIA_BRF(T3,1)]^S2[ARIA_BRF(T3,0)]; \
66 }
67
68#define ARIA_P(T0,T1,T2,T3) { \
69 (T1) = (((T1)<< 8)&0xff00ff00) ^ (((T1)>> 8)&0x00ff00ff); \
70 (T2) = rotrConstant<16>(T2); \
71 (T3) = ByteReverse((T3)); \
72 }
73
74#define ARIA_M(X,Y) { \
75 Y=(X)<<8 ^ (X)>>8 ^ (X)<<16 ^ (X)>>16 ^ (X)<<24 ^ (X)>>24; \
76 }
77
78#define ARIA_MM(T0,T1,T2,T3) { \
79 (T1)^=(T2); (T2)^=(T3); (T0)^=(T1); \
80 (T3)^=(T1); (T2)^=(T0); (T1)^=(T2); \
81 }
82
83#define ARIA_FO {SBL1_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3])}
84#define ARIA_FE {SBL2_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[2],t[3],t[0],t[1]) ARIA_MM(t[0],t[1],t[2],t[3])}
85
86#if (CRYPTOPP_ARM_NEON_AVAILABLE)
87extern void ARIA_UncheckedSetKey_Schedule_NEON(byte* rk, word32* ws, unsigned int keylen);
88extern void ARIA_ProcessAndXorBlock_NEON(const byte* xorBlock, byte* outblock, const byte *rk, word32 *t);
89#endif
90
91#if (CRYPTOPP_SSSE3_AVAILABLE)
92extern void ARIA_ProcessAndXorBlock_SSSE3(const byte* xorBlock, byte* outBlock, const byte *rk, word32 *t);
93#endif
94
95// n-bit right shift of Y XORed to X
96template <unsigned int N>
97inline void ARIA_GSRK(const word32 X[4], const word32 Y[4], byte RK[16])
98{
99 // MSVC is not generating a "rotate immediate". Constify to help it along.
100 static const unsigned int Q = 4-(N/32);
101 static const unsigned int R = N % 32;
102 UINT32_CAST(RK)[0] = (X[0]) ^ ((Y[(Q )%4])>>R) ^ ((Y[(Q+3)%4])<<(32-R));
103 UINT32_CAST(RK)[1] = (X[1]) ^ ((Y[(Q+1)%4])>>R) ^ ((Y[(Q )%4])<<(32-R));
104 UINT32_CAST(RK)[2] = (X[2]) ^ ((Y[(Q+2)%4])>>R) ^ ((Y[(Q+1)%4])<<(32-R));
105 UINT32_CAST(RK)[3] = (X[3]) ^ ((Y[(Q+3)%4])>>R) ^ ((Y[(Q+2)%4])<<(32-R));
106}
107
108void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const NameValuePairs &params)
109{
110 CRYPTOPP_UNUSED(params);
111
112 m_rk.New(16*17); // round keys
113 m_w.New(4*7); // w0, w1, w2, w3, t and u
114
115 byte *rk = m_rk.data();
116 int Q, q, R, r;
117
118 switch (keylen)
119 {
120 case 16:
121 R = r = m_rounds = 12;
122 Q = q = 0;
123 break;
124 case 32:
125 R = r = m_rounds = 16;
126 Q = q = 2;
127 break;
128 case 24:
129 R = r = m_rounds = 14;
130 Q = q = 1;
131 break;
132 default:
133 Q = q = R = r = m_rounds = 0;
134 CRYPTOPP_ASSERT(0);
135 }
136
137 // w0 has room for 32 bytes. w1-w3 each has room for 16 bytes. t and u are 16 byte temp areas.
138 word32 *w0 = m_w.data(), *w1 = m_w.data()+8, *w2 = m_w.data()+12, *w3 = m_w.data()+16, *t = m_w.data()+20;
139
141 block(w0[0])(w0[1])(w0[2])(w0[3]);
142
143 t[0]=w0[0]^KRK[q][0]; t[1]=w0[1]^KRK[q][1];
144 t[2]=w0[2]^KRK[q][2]; t[3]=w0[3]^KRK[q][3];
145
146 ARIA_FO;
147
148 if (keylen == 32)
149 {
150 block(w1[0])(w1[1])(w1[2])(w1[3]);
151 }
152 else if (keylen == 24)
153 {
154 block(w1[0])(w1[1]); w1[2] = w1[3] = 0;
155 }
156 else
157 {
158 w1[0]=w1[1]=w1[2]=w1[3]=0;
159 }
160
161 w1[0]^=t[0]; w1[1]^=t[1]; w1[2]^=t[2]; w1[3]^=t[3];
162 ::memcpy(t, w1, 16);
163
164 q = (q==2) ? 0 : (q+1);
165 t[0]^=KRK[q][0]; t[1]^=KRK[q][1]; t[2]^=KRK[q][2]; t[3]^=KRK[q][3];
166
167 ARIA_FE;
168
169 t[0]^=w0[0]; t[1]^=w0[1]; t[2]^=w0[2]; t[3]^=w0[3];
170 ::memcpy(w2, t, 16);
171
172 q = (q==2) ? 0 : (q+1);
173 t[0]^=KRK[q][0]; t[1]^=KRK[q][1]; t[2]^=KRK[q][2]; t[3]^=KRK[q][3];
174
175 ARIA_FO;
176
177 w3[0]=t[0]^w1[0]; w3[1]=t[1]^w1[1]; w3[2]=t[2]^w1[2]; w3[3]=t[3]^w1[3];
178
179#if CRYPTOPP_ARM_NEON_AVAILABLE
180 if (HasNEON())
181 {
182 ARIA_UncheckedSetKey_Schedule_NEON(rk, m_w, keylen);
183 }
184 else
185#endif // CRYPTOPP_ARM_NEON_AVAILABLE
186 {
187 ARIA_GSRK<19>(w0, w1, rk + 0);
188 ARIA_GSRK<19>(w1, w2, rk + 16);
189 ARIA_GSRK<19>(w2, w3, rk + 32);
190 ARIA_GSRK<19>(w3, w0, rk + 48);
191 ARIA_GSRK<31>(w0, w1, rk + 64);
192 ARIA_GSRK<31>(w1, w2, rk + 80);
193 ARIA_GSRK<31>(w2, w3, rk + 96);
194 ARIA_GSRK<31>(w3, w0, rk + 112);
195 ARIA_GSRK<67>(w0, w1, rk + 128);
196 ARIA_GSRK<67>(w1, w2, rk + 144);
197 ARIA_GSRK<67>(w2, w3, rk + 160);
198 ARIA_GSRK<67>(w3, w0, rk + 176);
199 ARIA_GSRK<97>(w0, w1, rk + 192);
200
201 if (keylen > 16)
202 {
203 ARIA_GSRK<97>(w1, w2, rk + 208);
204 ARIA_GSRK<97>(w2, w3, rk + 224);
205
206 if (keylen > 24)
207 {
208 ARIA_GSRK< 97>(w3, w0, rk + 240);
209 ARIA_GSRK<109>(w0, w1, rk + 256);
210 }
211 }
212 }
213
214 // Decryption operation
215 if (!IsForwardTransformation())
216 {
217 word32 *a, *z, *s;
218 rk = m_rk.data();
219 r = R; q = Q;
220
221 a=UINT32_CAST(rk); s=m_w.data()+24; z=a+r*4;
222 ::memcpy(t, a, 16); ::memcpy(a, z, 16); ::memcpy(z, t, 16);
223
224 a+=4; z-=4;
225 for (; a<z; a+=4, z-=4)
226 {
227 ARIA_M(a[0],t[0]); ARIA_M(a[1],t[1]); ARIA_M(a[2],t[2]); ARIA_M(a[3],t[3]);
228 ARIA_MM(t[0],t[1],t[2],t[3]); ARIA_P(t[0],t[1],t[2],t[3]); ARIA_MM(t[0],t[1],t[2],t[3]);
229 ::memcpy(s, t, 16);
230
231 ARIA_M(z[0],t[0]); ARIA_M(z[1],t[1]); ARIA_M(z[2],t[2]); ARIA_M(z[3],t[3]);
232 ARIA_MM(t[0],t[1],t[2],t[3]); ARIA_P(t[0],t[1],t[2],t[3]); ARIA_MM(t[0],t[1],t[2],t[3]);
233 ::memcpy(a, t, 16); ::memcpy(z, s, 16);
234 }
235
236 ARIA_M(a[0],t[0]); ARIA_M(a[1],t[1]); ARIA_M(a[2],t[2]); ARIA_M(a[3],t[3]);
237 ARIA_MM(t[0],t[1],t[2],t[3]); ARIA_P(t[0],t[1],t[2],t[3]); ARIA_MM(t[0],t[1],t[2],t[3]);
238 ::memcpy(z, t, 16);
239 }
240
241 // Silence warnings
242 CRYPTOPP_UNUSED(Q); CRYPTOPP_UNUSED(R);
243 CRYPTOPP_UNUSED(q); CRYPTOPP_UNUSED(r);
244}
245
246void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
247{
248 const byte *rk = reinterpret_cast<const byte*>(m_rk.data());
249 word32 *t = const_cast<word32*>(m_w.data()+20);
250
251 // Timing attack countermeasure. See comments in Rijndael for more details.
252 // We used Yun's 32-bit implementation, so we use words rather than bytes.
253 const int cacheLineSize = GetCacheLineSize();
254 unsigned int i;
255 volatile word32 _u = 0;
256 word32 u = _u;
257
258 for (i=0; i<COUNTOF(S1); i+=cacheLineSize/(sizeof(S1[0])))
259 u |= *(S1+i);
260 t[0] |= u;
261
262 GetBlock<word32, BigEndian>block(inBlock);
263 block(t[0])(t[1])(t[2])(t[3]);
264
265 if (m_rounds > 12) {
266 ARIA_KXL; rk+= 16; ARIA_FO;
267 ARIA_KXL; rk+= 16; ARIA_FE;
268 }
269
270 if (m_rounds > 14) {
271 ARIA_KXL; rk+= 16; ARIA_FO;
272 ARIA_KXL; rk+= 16; ARIA_FE;
273 }
274
275 ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16; ARIA_FE;
276 ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16; ARIA_FE;
277 ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16; ARIA_FE;
278 ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16; ARIA_FE;
279 ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16; ARIA_FE;
280 ARIA_KXL; rk+= 16; ARIA_FO; ARIA_KXL; rk+= 16;
281
282#if CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS
283 if (HasSSSE3())
284 {
285 ARIA_ProcessAndXorBlock_SSSE3(xorBlock, outBlock, rk, t);
286 return;
287 }
288 else
289#endif // CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS
290#if (CRYPTOPP_ARM_NEON_AVAILABLE)
291 if (HasNEON())
292 {
293 ARIA_ProcessAndXorBlock_NEON(xorBlock, outBlock, rk, t);
294 return;
295 }
296 else
297#endif // CRYPTOPP_ARM_NEON_AVAILABLE
298#if (CRYPTOPP_LITTLE_ENDIAN)
299 {
300 outBlock[ 0] = (byte)(X1[ARIA_BRF(t[0],3)] ) ^ rk[ 3];
301 outBlock[ 1] = (byte)(X2[ARIA_BRF(t[0],2)]>>8) ^ rk[ 2];
302 outBlock[ 2] = (byte)(S1[ARIA_BRF(t[0],1)] ) ^ rk[ 1];
303 outBlock[ 3] = (byte)(S2[ARIA_BRF(t[0],0)] ) ^ rk[ 0];
304 outBlock[ 4] = (byte)(X1[ARIA_BRF(t[1],3)] ) ^ rk[ 7];
305 outBlock[ 5] = (byte)(X2[ARIA_BRF(t[1],2)]>>8) ^ rk[ 6];
306 outBlock[ 6] = (byte)(S1[ARIA_BRF(t[1],1)] ) ^ rk[ 5];
307 outBlock[ 7] = (byte)(S2[ARIA_BRF(t[1],0)] ) ^ rk[ 4];
308 outBlock[ 8] = (byte)(X1[ARIA_BRF(t[2],3)] ) ^ rk[11];
309 outBlock[ 9] = (byte)(X2[ARIA_BRF(t[2],2)]>>8) ^ rk[10];
310 outBlock[10] = (byte)(S1[ARIA_BRF(t[2],1)] ) ^ rk[ 9];
311 outBlock[11] = (byte)(S2[ARIA_BRF(t[2],0)] ) ^ rk[ 8];
312 outBlock[12] = (byte)(X1[ARIA_BRF(t[3],3)] ) ^ rk[15];
313 outBlock[13] = (byte)(X2[ARIA_BRF(t[3],2)]>>8) ^ rk[14];
314 outBlock[14] = (byte)(S1[ARIA_BRF(t[3],1)] ) ^ rk[13];
315 outBlock[15] = (byte)(S2[ARIA_BRF(t[3],0)] ) ^ rk[12];
316 }
317#else
318 {
319 outBlock[ 0] = (byte)(X1[ARIA_BRF(t[0],3)] ) ^ rk[ 0];
320 outBlock[ 1] = (byte)(X2[ARIA_BRF(t[0],2)]>>8) ^ rk[ 1];
321 outBlock[ 2] = (byte)(S1[ARIA_BRF(t[0],1)] ) ^ rk[ 2];
322 outBlock[ 3] = (byte)(S2[ARIA_BRF(t[0],0)] ) ^ rk[ 3];
323 outBlock[ 4] = (byte)(X1[ARIA_BRF(t[1],3)] ) ^ rk[ 4];
324 outBlock[ 5] = (byte)(X2[ARIA_BRF(t[1],2)]>>8) ^ rk[ 5];
325 outBlock[ 6] = (byte)(S1[ARIA_BRF(t[1],1)] ) ^ rk[ 6];
326 outBlock[ 7] = (byte)(S2[ARIA_BRF(t[1],0)] ) ^ rk[ 7];
327 outBlock[ 8] = (byte)(X1[ARIA_BRF(t[2],3)] ) ^ rk[ 8];
328 outBlock[ 9] = (byte)(X2[ARIA_BRF(t[2],2)]>>8) ^ rk[ 9];
329 outBlock[10] = (byte)(S1[ARIA_BRF(t[2],1)] ) ^ rk[10];
330 outBlock[11] = (byte)(S2[ARIA_BRF(t[2],0)] ) ^ rk[11];
331 outBlock[12] = (byte)(X1[ARIA_BRF(t[3],3)] ) ^ rk[12];
332 outBlock[13] = (byte)(X2[ARIA_BRF(t[3],2)]>>8) ^ rk[13];
333 outBlock[14] = (byte)(S1[ARIA_BRF(t[3],1)] ) ^ rk[14];
334 outBlock[15] = (byte)(S2[ARIA_BRF(t[3],0)] ) ^ rk[15];
335 }
336#endif // CRYPTOPP_LITTLE_ENDIAN
337
338 if (xorBlock != NULLPTR)
339 for (unsigned int n=0; n<ARIA::BLOCKSIZE; ++n)
340 outBlock[n] ^= xorBlock[n];
341}
342
343NAMESPACE_END
Classes for the ARIA block cipher.
Access a block of memory.
Definition misc.h:2766
Interface for retrieving values given their names.
Definition cryptlib.h:322
A::pointer data()
Provides a pointer to the first element in the memory block.
Definition secblock.h:857
void New(size_type newSize)
Change size without preserving contents.
Definition secblock.h:1126
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition config_int.h:56
unsigned int word32
32-bit unsigned datatype
Definition config_int.h:62
Functions for CPU features and intrinsics.
int GetCacheLineSize()
Provides the cache line size.
Definition cpu.h:889
Utility functions for the Crypto++ library.
Precompiled header file.