Security Scol plugin
vmac.cpp
1// vmac.cpp - originally written and placed in the public domain by Wei Dai
2// based on Ted Krovetz's public domain vmac.c and draft-krovetz-vmac-01.txt
3
4#include "pch.h"
5#include "config.h"
6
7#include "vmac.h"
8#include "cpu.h"
9#include "argnames.h"
10#include "secblock.h"
11
12#if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
13#include <intrin.h>
14#endif
15
16#if defined(CRYPTOPP_DISABLE_VMAC_ASM)
17# undef CRYPTOPP_X86_ASM_AVAILABLE
18# undef CRYPTOPP_X32_ASM_AVAILABLE
19# undef CRYPTOPP_X64_ASM_AVAILABLE
20# undef CRYPTOPP_SSE2_ASM_AVAILABLE
21#endif
22
23#if CRYPTOPP_MSC_VERSION
24# pragma warning(disable: 4731)
25#endif
26
27ANONYMOUS_NAMESPACE_BEGIN
28
29#if defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE)
30using CryptoPP::word128;
31using CryptoPP::word64;
32# define VMAC_BOOL_WORD128 1
33#else
34using CryptoPP::word64;
35# define VMAC_BOOL_WORD128 0
36#endif
37
38#ifdef __BORLANDC__
39#define const // Turbo C++ 2006 workaround
40#endif
41const word64 p64 = W64LIT(0xfffffffffffffeff); /* 2^64 - 257 prime */
42const word64 m62 = W64LIT(0x3fffffffffffffff); /* 62-bit mask */
43const word64 m63 = W64LIT(0x7fffffffffffffff); /* 63-bit mask */
44const word64 m64 = W64LIT(0xffffffffffffffff); /* 64-bit mask */
45const word64 mpoly = W64LIT(0x1fffffff1fffffff); /* Poly key mask */
46#ifdef __BORLANDC__
47#undef const
48#endif
49
50#if VMAC_BOOL_WORD128
51// workaround GCC Bug 31690: ICE with const __uint128_t and C++ front-end
52# if defined(__powerpc__) && defined (CRYPTOPP_GCC_VERSION) && (CRYPTOPP_GCC_VERSION < 50300)
53# define m126 ((word128(m62)<<64)|m64)
54# else
55const word128 m126 = (word128(m62)<<64)|m64; /* 126-bit mask */
56# endif
57#endif
58
59ANONYMOUS_NAMESPACE_END
60
61NAMESPACE_BEGIN(CryptoPP)
62
63void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs &params)
64{
65 int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
66 if (digestLength != 8 && digestLength != 16)
67 throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
68 m_is128 = digestLength == 16;
69
70 m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
71 if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
72 throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
73
74 AllocateBlocks();
75
76 BlockCipher &cipher = AccessCipher();
77 cipher.SetKey(userKey, keylength, params);
78 const unsigned int blockSize = cipher.BlockSize();
79 const unsigned int blockSizeInWords = blockSize / sizeof(word64);
82 in.CleanNew(blockSize);
83 size_t i;
84
85 /* Fill nh key */
86 in[0] = 0x80;
87 cipher.AdvancedProcessBlocks(in, NULLPTR, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
88 ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
89
90 /* Fill poly key */
91 in[0] = 0xC0;
92 in[15] = 0;
93 for (i = 0; i <= (size_t)m_is128; i++)
94 {
95 cipher.ProcessBlock(in, out.BytePtr());
96 m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
97 m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
98 in[15]++;
99 }
100
101 /* Fill ip key */
102 in[0] = 0xE0;
103 in[15] = 0;
104 word64 *l3Key = m_l3Key();
105 CRYPTOPP_ASSERT(IsAlignedOn(l3Key,GetAlignmentOf<word64>()));
106
107 for (i = 0; i <= (size_t)m_is128; i++)
108 do
109 {
110 cipher.ProcessBlock(in, out.BytePtr());
111 l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
112 l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
113 in[15]++;
114 } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
115
116 m_padCached = false;
117 size_t nonceLength;
118 const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
119 Resynchronize(nonce, (int)nonceLength);
120}
121
123{
125 IV[0] &= 0x7f;
126}
127
128void VMAC_Base::Resynchronize(const byte *nonce, int len)
129{
130 size_t length = ThrowIfInvalidIVLength(len);
131 size_t s = IVSize();
132 byte *storedNonce = m_nonce();
133
134 if (m_is128)
135 {
136 memset(storedNonce, 0, s-length);
137 memcpy(storedNonce+s-length, nonce, length);
138 AccessCipher().ProcessBlock(storedNonce, m_pad());
139 }
140 else
141 {
142 if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
143 {
144 m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
145 for (size_t i=0; m_padCached && i<s-length; i++)
146 m_padCached = (storedNonce[i] == 0);
147 }
148 if (!m_padCached)
149 {
150 memset(storedNonce, 0, s-length);
151 memcpy(storedNonce+s-length, nonce, length-1);
152 storedNonce[s-1] = nonce[length-1] & 0xfe;
153 AccessCipher().ProcessBlock(storedNonce, m_pad());
154 m_padCached = true;
155 }
156 storedNonce[s-1] = nonce[length-1];
157 }
158 m_isFirstBlock = true;
159 Restart();
160}
161
162void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
163{
164 CRYPTOPP_UNUSED(data);
165 CRYPTOPP_ASSERT(false);
166 throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
167}
168
170{
171 return
172#if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
173 HasSSE2() ? 16 :
174#endif
175 GetCipher().OptimalDataAlignment();
176}
177
178#if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
179#if CRYPTOPP_MSC_VERSION
180# pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
181#endif
182
183CRYPTOPP_NOINLINE
184void VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
185{
186 const word64 *nhK = m_nhKey();
187 word64 *polyS = (word64*)(void*)m_polyState();
188 word32 L1KeyLength = m_L1KeyLength;
189
190 // These are used in the ASM, but some analysis services miss it.
191 CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart);
192 CRYPTOPP_UNUSED(L1KeyLength);
193 CRYPTOPP_UNUSED(blocksRemainingInWord64);
194
195 // This inline ASM is tricky, and down right difficult on 32-bit when
196 // PIC is in effect. The ASM uses all the general purpose registers
197 // and all the XMM registers on 32-bit machines. When PIC is in effect
198 // on a 32-bit machine, GCC uses EBX as a base register for PLT. Saving
199 // EBX with 'mov %%ebx, %0' and restoring EBX with 'mov %0, %%ebx'
200 // causes GCC to generate 'mov -0x40(%ebx), %ebx' for the restore. That
201 // obviously won't work because EBX is no longer valid. We can push and
202 // pop EBX, but that breaks the stack-based references. Attempting to
203 // sidestep with clobber lists results in "error: ‘asm’ operand has
204 // impossible constraints". Eventually, we found we could save EBX to
205 // ESP-20, which is one word below our stack in the frame.
206#ifdef __GNUC__
207 __asm__ __volatile__
208 (
209# if CRYPTOPP_BOOL_X86
210 // Hack. Save EBX for PIC. Do NOT 'push EBX' here.
211 // GCC issues 'mov ESP+8, EBX' to load L1KeyLength.
212 // A push breaks the reference to L1KeyLength.
213 AS2( mov %%ebx, -20(%%esp))
214# endif
215 // L1KeyLength into EBX.
216 // GCC generates 'mov ESP+8, EBX'.
217 AS2( mov %0, %%ebx)
218 INTEL_NOPREFIX
219#else
220 #if defined(__INTEL_COMPILER)
221 char isFirstBlock = m_isFirstBlock;
222 AS2( mov ebx, [L1KeyLength])
223 AS2( mov dl, [isFirstBlock])
224 #else
225 AS2( mov ecx, this)
226 AS2( mov ebx, [ecx+m_L1KeyLength])
227 AS2( mov dl, [ecx+m_isFirstBlock])
228 #endif
229 AS2( mov eax, tagPart)
230 AS2( shl eax, 4)
231 AS2( mov edi, nhK)
232 AS2( add edi, eax)
233 AS2( add eax, eax)
234 AS2( add eax, polyS)
235
236 AS2( mov esi, data)
237 AS2( mov ecx, blocksRemainingInWord64)
238#endif
239
240 AS2( shr ebx, 3)
241 AS_PUSH_IF86( bp)
242 AS2( sub esp, 12)
243 ASL(4)
244 AS2( mov ebp, ebx)
245 AS2( cmp ecx, ebx)
246 AS2( cmovl ebp, ecx)
247 AS2( sub ecx, ebp)
248 AS2( lea ebp, [edi+8*ebp]) // end of nhK
249 AS2( movq mm6, [esi])
250 AS2( paddq mm6, [edi])
251 AS2( movq mm5, [esi+8])
252 AS2( paddq mm5, [edi+8])
253 AS2( add esi, 16)
254 AS2( add edi, 16)
255 AS2( movq mm4, mm6)
256 ASS( pshufw mm2, mm6, 1, 0, 3, 2)
257 AS2( pmuludq mm6, mm5)
258 ASS( pshufw mm3, mm5, 1, 0, 3, 2)
259 AS2( pmuludq mm5, mm2)
260 AS2( pmuludq mm2, mm3)
261 AS2( pmuludq mm3, mm4)
262 AS2( pxor mm7, mm7)
263 AS2( movd [esp], mm6)
264 AS2( psrlq mm6, 32)
265 AS2( movd [esp+4], mm5)
266 AS2( psrlq mm5, 32)
267 AS2( cmp edi, ebp)
268 ASJ( je, 1, f)
269 ASL(0)
270 AS2( movq mm0, [esi])
271 AS2( paddq mm0, [edi])
272 AS2( movq mm1, [esi+8])
273 AS2( paddq mm1, [edi+8])
274 AS2( add esi, 16)
275 AS2( add edi, 16)
276 AS2( movq mm4, mm0)
277 AS2( paddq mm5, mm2)
278 ASS( pshufw mm2, mm0, 1, 0, 3, 2)
279 AS2( pmuludq mm0, mm1)
280 AS2( movd [esp+8], mm3)
281 AS2( psrlq mm3, 32)
282 AS2( paddq mm5, mm3)
283 ASS( pshufw mm3, mm1, 1, 0, 3, 2)
284 AS2( pmuludq mm1, mm2)
285 AS2( pmuludq mm2, mm3)
286 AS2( pmuludq mm3, mm4)
287 AS2( movd mm4, [esp])
288 AS2( paddq mm7, mm4)
289 AS2( movd mm4, [esp+4])
290 AS2( paddq mm6, mm4)
291 AS2( movd mm4, [esp+8])
292 AS2( paddq mm6, mm4)
293 AS2( movd [esp], mm0)
294 AS2( psrlq mm0, 32)
295 AS2( paddq mm6, mm0)
296 AS2( movd [esp+4], mm1)
297 AS2( psrlq mm1, 32)
298 AS2( paddq mm5, mm1)
299 AS2( cmp edi, ebp)
300 ASJ( jne, 0, b)
301 ASL(1)
302 AS2( paddq mm5, mm2)
303 AS2( movd [esp+8], mm3)
304 AS2( psrlq mm3, 32)
305 AS2( paddq mm5, mm3)
306 AS2( movd mm4, [esp])
307 AS2( paddq mm7, mm4)
308 AS2( movd mm4, [esp+4])
309 AS2( paddq mm6, mm4)
310 AS2( movd mm4, [esp+8])
311 AS2( paddq mm6, mm4)
312 AS2( lea ebp, [8*ebx])
313 AS2( sub edi, ebp) // reset edi to start of nhK
314
315 AS2( movd [esp], mm7)
316 AS2( psrlq mm7, 32)
317 AS2( paddq mm6, mm7)
318 AS2( movd [esp+4], mm6)
319 AS2( psrlq mm6, 32)
320 AS2( paddq mm5, mm6)
321 AS2( psllq mm5, 2)
322 AS2( psrlq mm5, 2)
323
324#define a0 [eax+2*4]
325#define a1 [eax+3*4]
326#define a2 [eax+0*4]
327#define a3 [eax+1*4]
328#define k0 [eax+2*8+2*4]
329#define k1 [eax+2*8+3*4]
330#define k2 [eax+2*8+0*4]
331#define k3 [eax+2*8+1*4]
332
333 AS2( test dl, dl)
334 ASJ( jz, 2, f)
335 AS2( movd mm1, k0)
336 AS2( movd mm0, [esp])
337 AS2( paddq mm0, mm1)
338 AS2( movd a0, mm0)
339 AS2( psrlq mm0, 32)
340 AS2( movd mm1, k1)
341 AS2( movd mm2, [esp+4])
342 AS2( paddq mm1, mm2)
343 AS2( paddq mm0, mm1)
344 AS2( movd a1, mm0)
345 AS2( psrlq mm0, 32)
346 AS2( paddq mm5, k2)
347 AS2( paddq mm0, mm5)
348 AS2( movq a2, mm0)
349 AS2( xor edx, edx)
350 ASJ( jmp, 3, f)
351 ASL(2)
352 AS2( movd mm0, a3)
353 AS2( movq mm4, mm0)
354 AS2( pmuludq mm0, k3) // a3*k3
355 AS2( movd mm1, a0)
356 AS2( pmuludq mm1, k2) // a0*k2
357 AS2( movd mm2, a1)
358 AS2( movd mm6, k1)
359 AS2( pmuludq mm2, mm6) // a1*k1
360 AS2( movd mm3, a2)
361 AS2( psllq mm0, 1)
362 AS2( paddq mm0, mm5)
363 AS2( movq mm5, mm3)
364 AS2( movd mm7, k0)
365 AS2( pmuludq mm3, mm7) // a2*k0
366 AS2( pmuludq mm4, mm7) // a3*k0
367 AS2( pmuludq mm5, mm6) // a2*k1
368 AS2( paddq mm0, mm1)
369 AS2( movd mm1, a1)
370 AS2( paddq mm4, mm5)
371 AS2( movq mm5, mm1)
372 AS2( pmuludq mm1, k2) // a1*k2
373 AS2( paddq mm0, mm2)
374 AS2( movd mm2, a0)
375 AS2( paddq mm0, mm3)
376 AS2( movq mm3, mm2)
377 AS2( pmuludq mm2, k3) // a0*k3
378 AS2( pmuludq mm3, mm7) // a0*k0
379 AS2( movd [esp+8], mm0)
380 AS2( psrlq mm0, 32)
381 AS2( pmuludq mm7, mm5) // a1*k0
382 AS2( pmuludq mm5, k3) // a1*k3
383 AS2( paddq mm0, mm1)
384 AS2( movd mm1, a2)
385 AS2( pmuludq mm1, k2) // a2*k2
386 AS2( paddq mm0, mm2)
387 AS2( paddq mm0, mm4)
388 AS2( movq mm4, mm0)
389 AS2( movd mm2, a3)
390 AS2( pmuludq mm2, mm6) // a3*k1
391 AS2( pmuludq mm6, a0) // a0*k1
392 AS2( psrlq mm0, 31)
393 AS2( paddq mm0, mm3)
394 AS2( movd mm3, [esp])
395 AS2( paddq mm0, mm3)
396 AS2( movd mm3, a2)
397 AS2( pmuludq mm3, k3) // a2*k3
398 AS2( paddq mm5, mm1)
399 AS2( movd mm1, a3)
400 AS2( pmuludq mm1, k2) // a3*k2
401 AS2( paddq mm5, mm2)
402 AS2( movd mm2, [esp+4])
403 AS2( psllq mm5, 1)
404 AS2( paddq mm0, mm5)
405 AS2( psllq mm4, 33)
406 AS2( movd a0, mm0)
407 AS2( psrlq mm0, 32)
408 AS2( paddq mm6, mm7)
409 AS2( movd mm7, [esp+8])
410 AS2( paddq mm0, mm6)
411 AS2( paddq mm0, mm2)
412 AS2( paddq mm3, mm1)
413 AS2( psllq mm3, 1)
414 AS2( paddq mm0, mm3)
415 AS2( psrlq mm4, 1)
416 AS2( movd a1, mm0)
417 AS2( psrlq mm0, 32)
418 AS2( por mm4, mm7)
419 AS2( paddq mm0, mm4)
420 AS2( movq a2, mm0)
421
422#undef a0
423#undef a1
424#undef a2
425#undef a3
426#undef k0
427#undef k1
428#undef k2
429#undef k3
430
431 ASL(3)
432 AS2( test ecx, ecx)
433 ASJ( jnz, 4, b)
434 AS2( add esp, 12)
435 AS_POP_IF86( bp)
436 AS1( emms)
437#ifdef __GNUC__
438 ATT_PREFIX
439# if CRYPTOPP_BOOL_X86
440 // Restore EBX for PIC
441 AS2( mov -20(%%esp), %%ebx)
442# endif
443 :
444 : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data),
445 "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
446 : "memory", "cc"
447 );
448#endif
449}
450#endif
451
452#if VMAC_BOOL_WORD128
453 #define DeclareNH(a) word128 a=0
454 #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
455 #define AccumulateNH(a, b, c) a += word128(b)*(c)
456 #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
457#else
458 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64))
459 #define MUL32(a, b) __emulu(word32(a), word32(b))
460 #else
461 #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
462 #endif
463 #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
464 #define DeclareNH(a) word64 a##0=0, a##1=0
465 #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
466 #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
467 #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
468 #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
469 #define DeclareNH(a) word64 a##0=0, a##1=0
470 #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
471 #define AccumulateNH(a, b, c) {\
472 word64 ph, pl;\
473 pl = _umul128(b,c,&ph);\
474 a##0 += pl;\
475 a##1 += ph + (a##0 < pl);}
476 #else
477 #define VMAC_BOOL_32BIT 1
478 #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
479 #define MUL64(rh,rl,i1,i2) \
480 { word64 _i1 = (i1), _i2 = (i2); \
481 word64 m1= MUL32(_i1,_i2>>32); \
482 word64 m2= MUL32(_i1>>32,_i2); \
483 rh = MUL32(_i1>>32,_i2>>32); \
484 rl = MUL32(_i1,_i2); \
485 ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
486 ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
487 }
488 #define AccumulateNH(a, b, c) {\
489 word64 p = MUL32(b, c);\
490 a##1 += word32((p)>>32);\
491 a##0 += word32(p);\
492 p = MUL32((b)>>32, c);\
493 a##2 += word32((p)>>32);\
494 a##1 += word32(p);\
495 p = MUL32((b)>>32, (c)>>32);\
496 a##2 += p;\
497 p = MUL32(b, (c)>>32);\
498 a##1 += word32(p);\
499 a##2 += word32(p>>32);}
500 #endif
501#endif
502#ifndef VMAC_BOOL_32BIT
503 #define VMAC_BOOL_32BIT 0
504#endif
505#ifndef ADD128
506 #define ADD128(rh,rl,ih,il) \
507 { word64 _il = (il); \
508 (rl) += (_il); \
509 (rh) += (ih) + ((rl) < (_il)); \
510 }
511#endif
512
513template <bool T_128BitTag>
514void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
515{
516 CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
517 CRYPTOPP_ASSERT(IsAlignedOn(m_nhKey(),GetAlignmentOf<word64>()));
518
519 #define INNER_LOOP_ITERATION(j) {\
520 word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
521 word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
522 AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
523 if (T_128BitTag)\
524 AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
525 }
526
527 size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
528 size_t innerLoopEnd = L1KeyLengthInWord64;
529 const word64 *nhK = m_nhKey();
530 word64 *polyS = (word64*)(void*)m_polyState();
531 bool isFirstBlock = true;
532 size_t i;
533
534 #if !VMAC_BOOL_32BIT
535 #if VMAC_BOOL_WORD128
536 word128 a1=0, a2=0;
537 #else
538 word64 ah1=0, al1=0, ah2=0, al2=0;
539 #endif
540 word64 kh1, kl1, kh2, kl2;
541 kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
542 if (T_128BitTag)
543 {
544 kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
545 }
546 #endif
547
548 do
549 {
550 DeclareNH(nhA);
551 DeclareNH(nhB);
552
553 i = 0;
554 if (blocksRemainingInWord64 < L1KeyLengthInWord64)
555 {
556 if (blocksRemainingInWord64 % 8)
557 {
558 innerLoopEnd = blocksRemainingInWord64 % 8;
559 for (; i<innerLoopEnd; i+=2)
560 INNER_LOOP_ITERATION(0);
561 }
562 innerLoopEnd = blocksRemainingInWord64;
563 }
564 for (; i<innerLoopEnd; i+=8)
565 {
566 INNER_LOOP_ITERATION(0);
567 INNER_LOOP_ITERATION(1);
568 INNER_LOOP_ITERATION(2);
569 INNER_LOOP_ITERATION(3);
570 }
571 blocksRemainingInWord64 -= innerLoopEnd;
572 data += innerLoopEnd;
573
574 #if VMAC_BOOL_32BIT
575 word32 nh0[2], nh1[2];
576 word64 nh2[2];
577
578 nh0[0] = word32(nhA0);
579 nhA1 += (nhA0 >> 32);
580 nh1[0] = word32(nhA1);
581 nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
582
583 if (T_128BitTag)
584 {
585 nh0[1] = word32(nhB0);
586 nhB1 += (nhB0 >> 32);
587 nh1[1] = word32(nhB1);
588 nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
589 }
590
591 #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
592 #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
593 #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
594 #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
595 #define aHi ((polyS+i*4)[0])
596 #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
597 #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
598 #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
599 #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
600 #define kHi ((polyS+i*4+2)[0])
601
602 if (isFirstBlock)
603 {
604 isFirstBlock = false;
605 if (m_isFirstBlock)
606 {
607 m_isFirstBlock = false;
608 for (i=0; i<=(size_t)T_128BitTag; i++)
609 {
610 word64 t = (word64)nh0[i] + k0;
611 a0 = (word32)t;
612 t = (t >> 32) + nh1[i] + k1;
613 a1 = (word32)t;
614 aHi = (t >> 32) + nh2[i] + kHi;
615 }
616 continue;
617 }
618 }
619 for (i=0; i<=(size_t)T_128BitTag; i++)
620 {
621 word64 p, t;
622 word32 t2;
623
624 p = MUL32(a3, 2*k3);
625 p += nh2[i];
626 p += MUL32(a0, k2);
627 p += MUL32(a1, k1);
628 p += MUL32(a2, k0);
629 t2 = (word32)p;
630 p >>= 32;
631 p += MUL32(a0, k3);
632 p += MUL32(a1, k2);
633 p += MUL32(a2, k1);
634 p += MUL32(a3, k0);
635 t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
636 p >>= 31;
637 p += nh0[i];
638 p += MUL32(a0, k0);
639 p += MUL32(a1, 2*k3);
640 p += MUL32(a2, 2*k2);
641 p += MUL32(a3, 2*k1);
642 t2 = (word32)p;
643 p >>= 32;
644 p += nh1[i];
645 p += MUL32(a0, k1);
646 p += MUL32(a1, k0);
647 p += MUL32(a2, 2*k3);
648 p += MUL32(a3, 2*k2);
649 a0 = t2;
650 a1 = (word32)p;
651 aHi = (p >> 32) + t;
652 }
653
654 #undef a0
655 #undef a1
656 #undef a2
657 #undef a3
658 #undef aHi
659 #undef k0
660 #undef k1
661 #undef k2
662 #undef k3
663 #undef kHi
664 #else // #if VMAC_BOOL_32BIT
665 if (isFirstBlock)
666 {
667 isFirstBlock = false;
668 if (m_isFirstBlock)
669 {
670 m_isFirstBlock = false;
671 #if VMAC_BOOL_WORD128
672 #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
673
674 first_poly_step(a1, kh1, kl1, nhA);
675 if (T_128BitTag)
676 first_poly_step(a2, kh2, kl2, nhB);
677 #else
678 #define first_poly_step(ah, al, kh, kl, mh, ml) {\
679 mh &= m62;\
680 ADD128(mh, ml, kh, kl); \
681 ah = mh; al = ml;}
682
683 first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
684 if (T_128BitTag)
685 first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
686 #endif
687 continue;
688 }
689 else
690 {
691 #if VMAC_BOOL_WORD128
692 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
693 #else
694 ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
695 #endif
696 if (T_128BitTag)
697 {
698 #if VMAC_BOOL_WORD128
699 a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
700 #else
701 ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
702 #endif
703 }
704 }
705 }
706
707 #if VMAC_BOOL_WORD128
708 #define poly_step(a, kh, kl, m) \
709 { word128 t1, t2, t3, t4;\
710 Multiply128(t2, a>>64, kl);\
711 Multiply128(t3, a, kh);\
712 Multiply128(t1, a, kl);\
713 Multiply128(t4, a>>64, 2*kh);\
714 t2 += t3;\
715 t4 += t1;\
716 t2 += t4>>64;\
717 a = (word128(word64(t2)&m63) << 64) | word64(t4);\
718 t2 *= 2;\
719 a += m & m126;\
720 a += t2>>64;}
721
722 poly_step(a1, kh1, kl1, nhA);
723 if (T_128BitTag)
724 poly_step(a2, kh2, kl2, nhB);
725 #else
726 #define poly_step(ah, al, kh, kl, mh, ml) \
727 { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
728 /* compute ab*cd, put bd into result registers */ \
729 MUL64(t2h,t2l,ah,kl); \
730 MUL64(t3h,t3l,al,kh); \
731 MUL64(t1h,t1l,ah,2*kh); \
732 MUL64(ah,al,al,kl); \
733 /* add together ad + bc */ \
734 ADD128(t2h,t2l,t3h,t3l); \
735 /* add 2 * ac to result */ \
736 ADD128(ah,al,t1h,t1l); \
737 /* now (ah,al), (t2l,2*t2h) need summing */ \
738 /* first add the high registers, carrying into t2h */ \
739 ADD128(t2h,ah,z,t2l); \
740 /* double t2h and add top bit of ah */ \
741 t2h += t2h + (ah >> 63); \
742 ah &= m63; \
743 /* now add the low registers */ \
744 mh &= m62; \
745 ADD128(ah,al,mh,ml); \
746 ADD128(ah,al,z,t2h); \
747 }
748
749 poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
750 if (T_128BitTag)
751 poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
752 #endif
753 #endif // #if VMAC_BOOL_32BIT
754 } while (blocksRemainingInWord64);
755
756 #if VMAC_BOOL_WORD128
757 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
758 if (T_128BitTag)
759 {
760 (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
761 }
762 #elif !VMAC_BOOL_32BIT
763 (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
764 if (T_128BitTag)
765 {
766 (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
767 }
768 #endif
769}
770
771inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
772{
773#if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
774 if (HasSSE2())
775 {
776 VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
777 if (m_is128)
778 VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
779 m_isFirstBlock = false;
780 }
781 else
782#endif
783 {
784 if (m_is128)
785 VHASH_Update_Template<true>(data, blocksRemainingInWord64);
786 else
787 VHASH_Update_Template<false>(data, blocksRemainingInWord64);
788 }
789}
790
791size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
792{
793 size_t remaining = ModPowerOf2(length, m_L1KeyLength);
794 VHASH_Update(data, (length-remaining)/8);
795 return remaining;
796}
797
798word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
799{
800 word64 rh, rl, t, z=0;
801 word64 p1 = input[0], p2 = input[1];
802 word64 k1 = l3Key[0], k2 = l3Key[1];
803
804 /* fully reduce (p1,p2)+(len,0) mod p127 */
805 t = p1 >> 63;
806 p1 &= m63;
807 ADD128(p1, p2, len, t);
808 /* At this point, (p1,p2) is at most 2^127+(len<<64) */
809 t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
810 ADD128(p1, p2, z, t);
811 p1 &= m63;
812
813 /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
814 t = p1 + (p2 >> 32);
815 t += (t >> 32);
816 t += (word32)t > 0xfffffffeU;
817 p1 += (t >> 32);
818 p2 += (p1 << 32);
819
820 /* compute (p1+k1)%p64 and (p2+k2)%p64 */
821 p1 += k1;
822 p1 += (0 - (p1 < k1)) & 257;
823 p2 += k2;
824 p2 += (0 - (p2 < k2)) & 257;
825
826 /* compute (p1+k1)*(p2+k2)%p64 */
827 MUL64(rh, rl, p1, p2);
828 t = rh >> 56;
829 ADD128(t, rl, z, rh);
830 rh <<= 8;
831 ADD128(t, rl, z, rh);
832 t += t << 8;
833 rl += t;
834 rl += (0 - (rl < t)) & 257;
835 rl += (0 - (rl > p64-1)) & 257;
836 return rl;
837}
838
839void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
840{
841 CRYPTOPP_ASSERT(IsAlignedOn(DataBuf(),GetAlignmentOf<word64>()));
842 CRYPTOPP_ASSERT(IsAlignedOn(m_polyState(),GetAlignmentOf<word64>()));
843 size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
844
845 if (len)
846 {
847 memset(m_data()+len, 0, (0-len)%16);
848 VHASH_Update(DataBuf(), ((len+15)/16)*2);
849 len *= 8; // convert to bits
850 }
851 else if (m_isFirstBlock)
852 {
853 // special case for empty string
854 m_polyState()[0] = m_polyState()[2];
855 m_polyState()[1] = m_polyState()[3];
856 if (m_is128)
857 {
858 m_polyState()[4] = m_polyState()[6];
859 m_polyState()[5] = m_polyState()[7];
860 }
861 }
862
863 if (m_is128)
864 {
865 word64 t[2];
866 t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
867 t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
868 if (size == 16)
869 {
870 PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
871 PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
872 }
873 else
874 {
877 memcpy(mac, t, size);
878 }
879 }
880 else
881 {
882 word64 t = L3Hash(m_polyState(), m_l3Key(), len);
883 t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
884 if (size == 8)
885 PutWord(false, BIG_ENDIAN_ORDER, mac, t);
886 else
887 {
889 memcpy(mac, &t, size);
890 }
891 }
892}
893
894NAMESPACE_END
Standard names for retrieving values by name when working with NameValuePairs.
Interface for one direction (encryption or decryption) of a block cipher.
Definition cryptlib.h:1283
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
Definition cryptlib.cpp:141
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition cryptlib.h:879
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition cryptlib.cpp:190
@ BT_InBlockIsCounter
inBlock is a counter
Definition cryptlib.h:917
virtual unsigned int BlockSize() const =0
An invalid argument was detected.
Definition cryptlib.h:203
Interface for retrieving values given their names.
Definition cryptlib.h:322
A method was called which was not implemented.
Definition cryptlib.h:233
Interface for random number generators.
Definition cryptlib.h:1435
Secure memory block with allocator and cleanup.
Definition secblock.h:731
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition secblock.h:1143
byte * BytePtr()
Provides a byte pointer to the first element in the memory block.
Definition secblock.h:876
virtual void GetNextIV(RandomNumberGenerator &rng, byte *iv)
Retrieves a secure IV for the next message.
Definition cryptlib.cpp:136
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition cryptlib.cpp:58
size_t ThrowIfInvalidIVLength(int length)
Validates the IV length.
Definition cryptlib.cpp:92
VMAC message authentication code base class.
Definition vmac.h:25
void TruncatedFinal(byte *mac, size_t size)
Computes the hash of the current message.
Definition vmac.cpp:839
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
Definition vmac.cpp:169
unsigned int IVSize() const
Returns length of the IV accepted by this object.
Definition vmac.h:29
void Resynchronize(const byte *nonce, int length=-1)
Resynchronize with an IV.
Definition vmac.cpp:128
void GetNextIV(RandomNumberGenerator &rng, byte *IV)
Retrieves a secure IV for the next message.
Definition vmac.cpp:122
Library configuration file.
unsigned int word32
32-bit unsigned datatype
Definition config_int.h:62
Functions for CPU features and intrinsics.
@ BIG_ENDIAN_ORDER
byte order is big-endian
Definition cryptlib.h:147
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULLPTR)
Access a block of memory.
Definition misc.h:2739
T2 ModPowerOf2(const T1 &a, const T2 &b)
Reduces a value to a power of 2.
Definition misc.h:1125
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition misc.h:1227
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition misc.h:2208
Precompiled header file.
Classes and functions for secure memory allocations.
Classes for the VMAC message authentication code.