Security Scol plugin
gcm.cpp
1// gcm.cpp - originally written and placed in the public domain by Wei Dai.
2// ARM and Aarch64 added by Jeffrey Walton. The ARM carryless
3// multiply routines are less efficient because they shadow x86.
4// The precomputed key table integration makes it tricky to use the
5// more efficient ARMv8 implementation of the multiply and reduce.
6
7// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM gcm.cpp" to generate MASM code
8
9#include "pch.h"
10#include "config.h"
11
12#ifndef CRYPTOPP_IMPORTS
13#ifndef CRYPTOPP_GENERATE_X64_MASM
14
15// Visual Studio .Net 2003 compiler crash
16#if defined(_MSC_VER) && (_MSC_VER < 1400)
17# pragma optimize("", off)
18#endif
19
20#include "gcm.h"
21#include "cpu.h"
22
23#if defined(CRYPTOPP_DISABLE_GCM_ASM)
24# undef CRYPTOPP_X86_ASM_AVAILABLE
25# undef CRYPTOPP_X32_ASM_AVAILABLE
26# undef CRYPTOPP_X64_ASM_AVAILABLE
27# undef CRYPTOPP_SSE2_ASM_AVAILABLE
28#endif
29
30NAMESPACE_BEGIN(CryptoPP)
31
32#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
33// Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
34// 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
35#if defined(CRYPTOPP_DISABLE_MIXED_ASM)
36// 'movd eax, xmm0' only. REG_WORD() macro not used. Clang path.
37# define USE_MOVD_REG32 1
38#elif defined(__GNUC__) || defined(_MSC_VER)
39// 'movd eax, xmm0' or 'movd rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
40# define USE_MOVD_REG32_OR_REG64 1
41#else
42// 'mov eax, xmm0' or 'mov rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
43# define USE_MOV_REG32_OR_REG64 1
44#endif
45#endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
46
47word16 GCM_Base::s_reductionTable[256];
48volatile bool GCM_Base::s_reductionTableInitialized = false;
49
50void GCM_Base::GCTR::IncrementCounterBy256()
51{
52 IncrementCounterByOne(m_counterArray+BlockSize()-4, 3);
53}
54
55static inline void Xor16(byte *a, const byte *b, const byte *c)
56{
57 CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
58 CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
59 CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
60 ((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
61 ((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
62}
63
64#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
65// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
66// a source file with a SSE architecture switch. Also see GH #226 and GH #284.
67extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
68#endif // SSE2
69
70#if CRYPTOPP_ARM_NEON_AVAILABLE
71extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
72#endif
73
74#if CRYPTOPP_POWER8_AVAILABLE
75extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c);
76#endif
77
78#if CRYPTOPP_CLMUL_AVAILABLE
79extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
80extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
81const unsigned int s_cltableSizeInBlocks = 8;
82extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer);
83#endif // CRYPTOPP_CLMUL_AVAILABLE
84
85#if CRYPTOPP_ARM_PMULL_AVAILABLE
86extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
87extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
88const unsigned int s_cltableSizeInBlocks = 8;
89extern void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer);
90#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
91
92#if CRYPTOPP_POWER8_VMULL_AVAILABLE
93extern void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
94extern size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
95const unsigned int s_cltableSizeInBlocks = 8;
96extern void GCM_ReverseHashBufferIfNeeded_VMULL(byte *hashBuffer);
97#endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
98
99void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
100{
101 BlockCipher &blockCipher = AccessBlockCipher();
102 blockCipher.SetKey(userKey, keylength, params);
103
104 // GCM is only defined for 16-byte block ciphers at the moment.
105 // However, variable blocksize support means we have to defer
106 // blocksize checks to runtime after the key is set. Also see
107 // https://github.com/weidai11/cryptopp/issues/408.
108 const unsigned int blockSize = blockCipher.BlockSize();
109 CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE);
110 if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE)
111 throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
112
113 int tableSize, i, j, k;
114
115#if CRYPTOPP_CLMUL_AVAILABLE
116 if (HasCLMUL())
117 {
118 // Avoid "parameter not used" error and suppress Coverity finding
119 (void)params.GetIntValue(Name::TableSize(), tableSize);
120 tableSize = s_cltableSizeInBlocks * blockSize;
121 CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
122 }
123 else
124#elif CRYPTOPP_ARM_PMULL_AVAILABLE
125 if (HasPMULL())
126 {
127 // Avoid "parameter not used" error and suppress Coverity finding
128 (void)params.GetIntValue(Name::TableSize(), tableSize);
129 tableSize = s_cltableSizeInBlocks * blockSize;
130 CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
131 }
132 else
133#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
134 if (HasPMULL())
135 {
136 // Avoid "parameter not used" error and suppress Coverity finding
137 (void)params.GetIntValue(Name::TableSize(), tableSize);
138 tableSize = s_cltableSizeInBlocks * blockSize;
139 CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
140 }
141 else
142#endif
143 {
144 if (params.GetIntValue(Name::TableSize(), tableSize))
145 tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024;
146 else
147 tableSize = (GetTablesOption() == GCM_64K_Tables) ? 64*1024 : 2*1024;
148
149 //#if defined(_MSC_VER) && (_MSC_VER < 1400)
150 // VC 2003 workaround: compiler generates bad code for 64K tables
151 //tableSize = 2*1024;
152 //#endif
153 }
154
155 m_buffer.resize(3*blockSize + tableSize);
156 byte *mulTable = MulTable();
157 byte *hashKey = HashKey();
158 memset(hashKey, 0, REQUIRED_BLOCKSIZE);
159 blockCipher.ProcessBlock(hashKey);
160
161#if CRYPTOPP_CLMUL_AVAILABLE
162 if (HasCLMUL())
163 {
164 GCM_SetKeyWithoutResync_CLMUL(hashKey, mulTable, tableSize);
165 return;
166 }
167#elif CRYPTOPP_ARM_PMULL_AVAILABLE
168 if (HasPMULL())
169 {
170 GCM_SetKeyWithoutResync_PMULL(hashKey, mulTable, tableSize);
171 return;
172 }
173#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
174 if (HasPMULL())
175 {
176 GCM_SetKeyWithoutResync_VMULL(hashKey, mulTable, tableSize);
177 return;
178 }
179#endif
180
181 word64 V0, V1;
183 Block::Get(hashKey)(V0)(V1);
184
185 if (tableSize == 64*1024)
186 {
187 for (i=0; i<128; i++)
188 {
189 k = i%8;
190 Block::Put(NULLPTR, mulTable+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1);
191
192 int x = (int)V1 & 1;
193 V1 = (V1>>1) | (V0<<63);
194 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
195 }
196
197 for (i=0; i<16; i++)
198 {
199 memset(mulTable+i*256*16, 0, 16);
200#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
201 if (HasSSE2())
202 for (j=2; j<=0x80; j*=2)
203 for (k=1; k<j; k++)
204 GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
205 else
206#elif CRYPTOPP_ARM_NEON_AVAILABLE
207 if (HasNEON())
208 for (j=2; j<=0x80; j*=2)
209 for (k=1; k<j; k++)
210 GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
211 else
212#elif CRYPTOPP_POWER8_AVAILABLE
213 if (HasPower8())
214 for (j=2; j<=0x80; j*=2)
215 for (k=1; k<j; k++)
216 GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
217 else
218#endif
219 for (j=2; j<=0x80; j*=2)
220 for (k=1; k<j; k++)
221 Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
222 }
223 }
224 else
225 {
226 if (!s_reductionTableInitialized)
227 {
228 s_reductionTable[0] = 0;
229 word16 x = 0x01c2;
230 s_reductionTable[1] = ByteReverse(x);
231 for (unsigned int ii=2; ii<=0x80; ii*=2)
232 {
233 x <<= 1;
234 s_reductionTable[ii] = ByteReverse(x);
235 for (unsigned int jj=1; jj<ii; jj++)
236 s_reductionTable[ii+jj] = s_reductionTable[ii] ^ s_reductionTable[jj];
237 }
238 s_reductionTableInitialized = true;
239 }
240
241 for (i=0; i<128-24; i++)
242 {
243 k = i%32;
244 if (k < 4)
245 Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1);
246 else if (k < 8)
247 Block::Put(NULLPTR, mulTable+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1);
248
249 int x = (int)V1 & 1;
250 V1 = (V1>>1) | (V0<<63);
251 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
252 }
253
254 for (i=0; i<4; i++)
255 {
256 memset(mulTable+i*256, 0, 16);
257 memset(mulTable+1024+i*256, 0, 16);
258#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
259 if (HasSSE2())
260 for (j=2; j<=8; j*=2)
261 for (k=1; k<j; k++)
262 {
263 GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
264 GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
265 }
266 else
267#elif CRYPTOPP_ARM_NEON_AVAILABLE
268 if (HasNEON())
269 for (j=2; j<=8; j*=2)
270 for (k=1; k<j; k++)
271 {
272 GCM_Xor16_NEON(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
273 GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
274 }
275 else
276#elif CRYPTOPP_POWER8_AVAILABLE
277 if (HasPower8())
278 for (j=2; j<=8; j*=2)
279 for (k=1; k<j; k++)
280 {
281 GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
282 GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
283 }
284 else
285#endif
286 for (j=2; j<=8; j*=2)
287 for (k=1; k<j; k++)
288 {
289 Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
290 Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
291 }
292 }
293 }
294}
295
296inline void GCM_Base::ReverseHashBufferIfNeeded()
297{
298#if CRYPTOPP_CLMUL_AVAILABLE
299 if (HasCLMUL())
300 {
301 GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer());
302 }
303#elif CRYPTOPP_ARM_PMULL_AVAILABLE
304 if (HasPMULL())
305 {
306 GCM_ReverseHashBufferIfNeeded_PMULL(HashBuffer());
307 }
308#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
309 if (HasPMULL())
310 {
311 GCM_ReverseHashBufferIfNeeded_VMULL(HashBuffer());
312 }
313#endif
314}
315
316void GCM_Base::Resync(const byte *iv, size_t len)
317{
318 BlockCipher &cipher = AccessBlockCipher();
319 byte *hashBuffer = HashBuffer();
320
321 if (len == 12)
322 {
323 memcpy(hashBuffer, iv, len);
324 memset(hashBuffer+len, 0, 3);
325 hashBuffer[len+3] = 1;
326 }
327 else
328 {
329 size_t origLen = len;
330 memset(hashBuffer, 0, HASH_BLOCKSIZE);
331
332 if (len >= HASH_BLOCKSIZE)
333 {
334 len = GCM_Base::AuthenticateBlocks(iv, len);
335 iv += (origLen - len);
336 }
337
338 if (len > 0)
339 {
340 memcpy(m_buffer, iv, len);
341 memset(m_buffer+len, 0, HASH_BLOCKSIZE-len);
342 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
343 }
344
345 PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(0)(origLen*8);
346 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
347
348 ReverseHashBufferIfNeeded();
349 }
350
351 if (m_state >= State_IVSet)
352 m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE);
353 else
354 m_ctr.SetCipherWithIV(cipher, hashBuffer);
355
356 m_ctr.Seek(HASH_BLOCKSIZE);
357
358 memset(hashBuffer, 0, HASH_BLOCKSIZE);
359}
360
362{
363 return
364#if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
365 HasSSE2() ? 16 :
366#elif CRYPTOPP_ARM_NEON_AVAILABLE
367 HasNEON() ? 4 :
368#elif CRYPTOPP_POWER8_AVAILABLE
369 HasPower8() ? 16 :
370#endif
371 GetBlockCipher().OptimalDataAlignment();
372}
373
374#if CRYPTOPP_MSC_VERSION
375# pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
376#endif
377
378#endif // Not CRYPTOPP_GENERATE_X64_MASM
379
380#ifdef CRYPTOPP_X64_MASM_AVAILABLE
381extern "C" {
382void GCM_AuthenticateBlocks_2K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer, const word16 *reductionTable);
383void GCM_AuthenticateBlocks_64K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer);
384}
385#endif
386
387#ifndef CRYPTOPP_GENERATE_X64_MASM
388
389size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
390{
391#if CRYPTOPP_CLMUL_AVAILABLE
392 if (HasCLMUL())
393 {
394 return GCM_AuthenticateBlocks_CLMUL(data, len, MulTable(), HashBuffer());
395 }
396#elif CRYPTOPP_ARM_PMULL_AVAILABLE
397 if (HasPMULL())
398 {
399 return GCM_AuthenticateBlocks_PMULL(data, len, MulTable(), HashBuffer());
400 }
401#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
402 if (HasPMULL())
403 {
404 return GCM_AuthenticateBlocks_VMULL(data, len, MulTable(), HashBuffer());
405 }
406#endif
407
409 word64 *hashBuffer = (word64 *)(void *)HashBuffer();
410 CRYPTOPP_ASSERT(IsAlignedOn(hashBuffer,GetAlignmentOf<word64>()));
411
412 switch (2*(m_buffer.size()>=64*1024)
413#if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
414 + HasSSE2()
415//#elif CRYPTOPP_ARM_NEON_AVAILABLE
416// + HasNEON()
417#endif
418 )
419 {
420 case 0: // non-SSE2 and 2K tables
421 {
422 byte *mulTable = MulTable();
423 word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
424
425 do
426 {
427 word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1;
428 Block::Get(data)(y0)(y1);
429 x0 ^= y0;
430 x1 ^= y1;
431
432 data += HASH_BLOCKSIZE;
433 len -= HASH_BLOCKSIZE;
434
435 #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8)
436
437 #if (CRYPTOPP_LITTLE_ENDIAN)
438 #if CRYPTOPP_BOOL_SLOW_WORD64
439 word32 z0 = (word32)x0;
440 word32 z1 = (word32)(x0>>32);
441 word32 z2 = (word32)x1;
442 word32 z3 = (word32)(x1>>32);
443 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e)
444 #else
445 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e)
446 #endif
447 #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8)
448 #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8;
449 #else
450 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e)
451 #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff)
452 #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8;
453 #endif
454
455 #define GF_MUL_32BY128(op, a, b, c) \
456 a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0); \
457 a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1); \
458 b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0); \
459 b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1); \
460 c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0); \
461 c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1); \
462 d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0); \
463 d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1); \
464
465 GF_MUL_32BY128(=, 0, 0, 0)
466 GF_MUL_32BY128(^=, 0, 1, 1)
467 GF_MUL_32BY128(^=, 1, 0, 2)
468 GF_MUL_32BY128(^=, 1, 1, 3)
469
470 word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16;
471 GF_SHIFT_8(d)
472 c0 ^= d0; c1 ^= d1;
473 r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8;
474 GF_SHIFT_8(c)
475 b0 ^= c0; b1 ^= c1;
476 r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)];
477 GF_SHIFT_8(b)
478 a0 ^= b0; a1 ^= b1;
479 a0 ^= ConditionalByteReverse<word64>(LITTLE_ENDIAN_ORDER, r);
480 x0 = a0; x1 = a1;
481 }
482 while (len >= HASH_BLOCKSIZE);
483
484 hashBuffer[0] = x0; hashBuffer[1] = x1;
485 return len;
486 }
487
488 case 2: // non-SSE2 and 64K tables
489 {
490 byte *mulTable = MulTable();
491 word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
492
493 do
494 {
495 word64 y0, y1, a0, a1;
496 Block::Get(data)(y0)(y1);
497 x0 ^= y0;
498 x1 ^= y1;
499
500 data += HASH_BLOCKSIZE;
501 len -= HASH_BLOCKSIZE;
502
503 #undef READ_TABLE_WORD64_COMMON
504 #undef READ_TABLE_WORD64
505
506 #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8)
507
508 #if (CRYPTOPP_LITTLE_ENDIAN)
509 #if CRYPTOPP_BOOL_SLOW_WORD64
510 word32 z0 = (word32)x0;
511 word32 z1 = (word32)(x0>>32);
512 word32 z2 = (word32)x1;
513 word32 z3 = (word32)(x1>>32);
514 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e)
515 #else
516 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
517 #endif
518 #else
519 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
520 #endif
521
522 #define GF_MUL_8BY128(op, b, c, d) \
523 a0 op READ_TABLE_WORD64(b, c, d, 0);\
524 a1 op READ_TABLE_WORD64(b, c, d, 1);\
525
526 GF_MUL_8BY128(=, 0, 0, 0)
527 GF_MUL_8BY128(^=, 0, 0, 1)
528 GF_MUL_8BY128(^=, 0, 0, 2)
529 GF_MUL_8BY128(^=, 0, 0, 3)
530 GF_MUL_8BY128(^=, 0, 1, 0)
531 GF_MUL_8BY128(^=, 0, 1, 1)
532 GF_MUL_8BY128(^=, 0, 1, 2)
533 GF_MUL_8BY128(^=, 0, 1, 3)
534 GF_MUL_8BY128(^=, 1, 2, 0)
535 GF_MUL_8BY128(^=, 1, 2, 1)
536 GF_MUL_8BY128(^=, 1, 2, 2)
537 GF_MUL_8BY128(^=, 1, 2, 3)
538 GF_MUL_8BY128(^=, 1, 3, 0)
539 GF_MUL_8BY128(^=, 1, 3, 1)
540 GF_MUL_8BY128(^=, 1, 3, 2)
541 GF_MUL_8BY128(^=, 1, 3, 3)
542
543 x0 = a0; x1 = a1;
544 }
545 while (len >= HASH_BLOCKSIZE);
546
547 hashBuffer[0] = x0; hashBuffer[1] = x1;
548 return len;
549 }
550#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
551
552#ifdef CRYPTOPP_X64_MASM_AVAILABLE
553 case 1: // SSE2 and 2K tables
554 GCM_AuthenticateBlocks_2K_SSE2(data, len/16, hashBuffer, s_reductionTable);
555 return len % 16;
556 case 3: // SSE2 and 64K tables
557 GCM_AuthenticateBlocks_64K_SSE2(data, len/16, hashBuffer);
558 return len % 16;
559#endif
560
561#if CRYPTOPP_SSE2_ASM_AVAILABLE
562
563 case 1: // SSE2 and 2K tables
564 {
565 #ifdef __GNUC__
566 __asm__ __volatile__
567 (
568 INTEL_NOPREFIX
569 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
570 ALIGN 8
571 GCM_AuthenticateBlocks_2K_SSE2 PROC FRAME
572 rex_push_reg rsi
573 push_reg rdi
574 push_reg rbx
575 .endprolog
576 mov rsi, r8
577 mov r11, r9
578 #else
579 AS2( mov WORD_REG(cx), data )
580 AS2( mov WORD_REG(dx), len )
581 AS2( mov WORD_REG(si), hashBuffer )
582 AS2( shr WORD_REG(dx), 4 )
583 #endif
584
585 #if CRYPTOPP_BOOL_X32
586 AS1(push rbx)
587 AS1(push rbp)
588 #else
589 AS_PUSH_IF86( bx)
590 AS_PUSH_IF86( bp)
591 #endif
592
593 #ifdef __GNUC__
594 AS2( mov AS_REG_7, WORD_REG(di))
595 #elif CRYPTOPP_BOOL_X86
596 AS2( lea AS_REG_7, s_reductionTable)
597 #endif
598
599 AS2( movdqa xmm0, [WORD_REG(si)] )
600
601 #define MUL_TABLE_0 WORD_REG(si) + 32
602 #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024
603 #define RED_TABLE AS_REG_7
604
605 ASL(0)
606 AS2( movdqu xmm4, [WORD_REG(cx)] )
607 AS2( pxor xmm0, xmm4 )
608
609 AS2( movd ebx, xmm0 )
610 AS2( mov eax, AS_HEX(f0f0f0f0) )
611 AS2( and eax, ebx )
612 AS2( shl ebx, 4 )
613 AS2( and ebx, AS_HEX(f0f0f0f0) )
614 AS2( movzx edi, ah )
615 AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
616 AS2( movzx edi, al )
617 AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
618 AS2( shr eax, 16 )
619 AS2( movzx edi, ah )
620 AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
621 AS2( movzx edi, al )
622 AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
623
624 #define SSE2_MUL_32BITS(i) \
625 AS2( psrldq xmm0, 4 )\
626 AS2( movd eax, xmm0 )\
627 AS2( and eax, AS_HEX(f0f0f0f0) )\
628 AS2( movzx edi, bh )\
629 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
630 AS2( movzx edi, bl )\
631 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
632 AS2( shr ebx, 16 )\
633 AS2( movzx edi, bh )\
634 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
635 AS2( movzx edi, bl )\
636 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
637 AS2( movd ebx, xmm0 )\
638 AS2( shl ebx, 4 )\
639 AS2( and ebx, AS_HEX(f0f0f0f0) )\
640 AS2( movzx edi, ah )\
641 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
642 AS2( movzx edi, al )\
643 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
644 AS2( shr eax, 16 )\
645 AS2( movzx edi, ah )\
646 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
647 AS2( movzx edi, al )\
648 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
649
650 SSE2_MUL_32BITS(1)
651 SSE2_MUL_32BITS(2)
652 SSE2_MUL_32BITS(3)
653
654 AS2( movzx edi, bh )
655 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
656 AS2( movzx edi, bl )
657 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
658 AS2( shr ebx, 16 )
659 AS2( movzx edi, bh )
660 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
661 AS2( movzx edi, bl )
662 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
663
664 AS2( movdqa xmm0, xmm3 )
665 AS2( pslldq xmm3, 1 )
666 AS2( pxor xmm2, xmm3 )
667 AS2( movdqa xmm1, xmm2 )
668 AS2( pslldq xmm2, 1 )
669 AS2( pxor xmm5, xmm2 )
670
671 AS2( psrldq xmm0, 15 )
672#if USE_MOVD_REG32
673 AS2( movd edi, xmm0 )
674#elif USE_MOV_REG32_OR_REG64
675 AS2( mov WORD_REG(di), xmm0 )
676#else // GNU Assembler
677 AS2( movd WORD_REG(di), xmm0 )
678#endif
679 AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
680 AS2( shl eax, 8 )
681
682 AS2( movdqa xmm0, xmm5 )
683 AS2( pslldq xmm5, 1 )
684 AS2( pxor xmm4, xmm5 )
685
686 AS2( psrldq xmm1, 15 )
687#if USE_MOVD_REG32
688 AS2( movd edi, xmm1 )
689#elif USE_MOV_REG32_OR_REG64
690 AS2( mov WORD_REG(di), xmm1 )
691#else
692 AS2( movd WORD_REG(di), xmm1 )
693#endif
694 AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
695 AS2( shl eax, 8 )
696
697 AS2( psrldq xmm0, 15 )
698#if USE_MOVD_REG32
699 AS2( movd edi, xmm0 )
700#elif USE_MOV_REG32_OR_REG64
701 AS2( mov WORD_REG(di), xmm0 )
702#else
703 AS2( movd WORD_REG(di), xmm0 )
704#endif
705 AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
706
707 AS2( movd xmm0, eax )
708 AS2( pxor xmm0, xmm4 )
709
710 AS2( add WORD_REG(cx), 16 )
711 AS2( sub WORD_REG(dx), 1 )
712 // ATT_NOPREFIX
713 ASJ( jnz, 0, b )
714 INTEL_NOPREFIX
715 AS2( movdqa [WORD_REG(si)], xmm0 )
716
717 #if CRYPTOPP_BOOL_X32
718 AS1(pop rbp)
719 AS1(pop rbx)
720 #else
721 AS_POP_IF86( bp)
722 AS_POP_IF86( bx)
723 #endif
724
725 #ifdef __GNUC__
726 ATT_PREFIX
727 :
728 : "c" (data), "d" (len/16), "S" (hashBuffer), "D" (s_reductionTable)
729 : "memory", "cc", "%eax", "%ebx", PERCENT_REG(AS_REG_7), "%xmm0",
730 "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5"
731 );
732 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
733 pop rbx
734 pop rdi
735 pop rsi
736 ret
737 GCM_AuthenticateBlocks_2K_SSE2 ENDP
738 #endif
739
740 return len%16;
741 }
742 case 3: // SSE2 and 64K tables
743 {
744 #ifdef __GNUC__
745 __asm__ __volatile__
746 (
747 INTEL_NOPREFIX
748 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
749 ALIGN 8
750 GCM_AuthenticateBlocks_64K_SSE2 PROC FRAME
751 rex_push_reg rsi
752 push_reg rdi
753 .endprolog
754 mov rsi, r8
755 #else
756 AS2( mov WORD_REG(cx), data )
757 AS2( mov WORD_REG(dx), len )
758 AS2( mov WORD_REG(si), hashBuffer )
759 AS2( shr WORD_REG(dx), 4 )
760 #endif
761
762 AS2( movdqa xmm0, [WORD_REG(si)] )
763
764 #undef MUL_TABLE
765 #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16
766
767 ASL(1)
768 AS2( movdqu xmm1, [WORD_REG(cx)] )
769 AS2( pxor xmm1, xmm0 )
770 AS2( pxor xmm0, xmm0 )
771
772 #undef SSE2_MUL_32BITS
773 #define SSE2_MUL_32BITS(i) \
774 AS2( movd eax, xmm1 )\
775 AS2( psrldq xmm1, 4 )\
776 AS2( movzx edi, al )\
777 AS2( add WORD_REG(di), WORD_REG(di) )\
778 AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\
779 AS2( movzx edi, ah )\
780 AS2( add WORD_REG(di), WORD_REG(di) )\
781 AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\
782 AS2( shr eax, 16 )\
783 AS2( movzx edi, al )\
784 AS2( add WORD_REG(di), WORD_REG(di) )\
785 AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\
786 AS2( movzx edi, ah )\
787 AS2( add WORD_REG(di), WORD_REG(di) )\
788 AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\
789
790 SSE2_MUL_32BITS(0)
791 SSE2_MUL_32BITS(1)
792 SSE2_MUL_32BITS(2)
793 SSE2_MUL_32BITS(3)
794
795 AS2( add WORD_REG(cx), 16 )
796 AS2( sub WORD_REG(dx), 1 )
797 // ATT_NOPREFIX
798 ASJ( jnz, 1, b )
799 INTEL_NOPREFIX
800 AS2( movdqa [WORD_REG(si)], xmm0 )
801
802 #ifdef __GNUC__
803 ATT_PREFIX
804 :
805 : "c" (data), "d" (len/16), "S" (hashBuffer)
806 : "memory", "cc", "%edi", "%eax", "%xmm0", "%xmm1"
807 );
808 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
809 pop rdi
810 pop rsi
811 ret
812 GCM_AuthenticateBlocks_64K_SSE2 ENDP
813 #endif
814
815 return len%16;
816 }
817#endif
818#ifndef CRYPTOPP_GENERATE_X64_MASM
819 }
820
821 return len%16;
822}
823
824void GCM_Base::AuthenticateLastHeaderBlock()
825{
826 if (m_bufferedDataLength > 0)
827 {
828 memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength);
829 m_bufferedDataLength = 0;
830 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
831 }
832}
833
834void GCM_Base::AuthenticateLastConfidentialBlock()
835{
836 GCM_Base::AuthenticateLastHeaderBlock();
837 PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(m_totalHeaderLength*8)(m_totalMessageLength*8);
838 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
839}
840
841void GCM_Base::AuthenticateLastFooterBlock(byte *mac, size_t macSize)
842{
843 m_ctr.Seek(0);
844 ReverseHashBufferIfNeeded();
845 m_ctr.ProcessData(mac, HashBuffer(), macSize);
846}
847
848NAMESPACE_END
849
850#endif // Not CRYPTOPP_GENERATE_X64_MASM
851#endif
void ProcessData(byte *outString, const byte *inString, size_t length)
Apply keystream to data.
Definition strciphr.cpp:91
void Seek(lword position)
Seeks to a random position in the stream.
Definition strciphr.cpp:168
void Resynchronize(const byte *iv, int length=-1)
Resynchronize the cipher.
Definition strciphr.cpp:159
Interface for one direction (encryption or decryption) of a block cipher.
Definition cryptlib.h:1283
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition cryptlib.h:879
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition cryptlib.cpp:190
virtual unsigned int BlockSize() const =0
void SetCipherWithIV(BlockCipher &cipher, const byte *iv, int feedbackSize=0)
Set external block cipher and IV.
Definition modes.h:117
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition gcm.cpp:361
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition gcm.h:36
virtual unsigned int BlockSize() const
Provides the block size of the compression function.
Definition cryptlib.h:1165
An invalid argument was detected.
Definition cryptlib.h:203
Interface for retrieving values given their names.
Definition cryptlib.h:322
CRYPTOPP_DLL bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition cryptlib.h:415
Access a block of memory.
Definition misc.h:2807
size_type size() const
Provides the count of elements in the SecBlock.
Definition secblock.h:867
void resize(size_type newSize)
Change size and preserve contents.
Definition secblock.h:1198
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition cryptlib.cpp:58
Library configuration file.
unsigned int word32
32-bit unsigned datatype
Definition config_int.h:62
unsigned short word16
16-bit unsigned datatype
Definition config_int.h:59
Functions for CPU features and intrinsics.
GCM block cipher mode of operation.
@ GCM_64K_Tables
Use a table with 64K entries.
Definition gcm.h:27
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition misc.h:2022
void IncrementCounterByOne(byte *inout, unsigned int size)
Performs an addition with carry on a block of bytes.
Definition misc.h:1299
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition misc.h:1227
Precompiled header file.
Access a block of memory.
Definition misc.h:2844