Security Scol plugin
xts.cpp
1// xts.cpp - written and placed in the public domain by Jeffrey Walton
2
3// Aarch32, Aarch64, Altivec and X86_64 include SIMD as part of the
4// base architecture. We can use the SIMD code below without an
5// architecture option. No runtime tests are required. Unfortunately,
6// we can't use it on Altivec because an architecture switch is required.
7// The updated XorBuffer gains 0.3 to 1.5 cpb on the architectures for
8// 16-byte block sizes.
9
10#include "pch.h"
11
12#include "xts.h"
13#include "misc.h"
14#include "modes.h"
15#include "cpu.h"
16
17#if defined(CRYPTOPP_DEBUG)
18# include "aes.h"
19# include "threefish.h"
20#endif
21
22// 0.3 to 0.4 cpb profit
23#if defined(__SSE2__) || defined(_M_X64)
24# include <emmintrin.h>
25#endif
26
27#if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64)
28# if (CRYPTOPP_ARM_NEON_HEADER) || (CRYPTOPP_ARM_ASIMD_AVAILABLE)
29# include <arm_neon.h>
30# endif
31#endif
32
33#if defined(__ALTIVEC__)
34# include "ppc_simd.h"
35#endif
36
37ANONYMOUS_NAMESPACE_BEGIN
38
39using namespace CryptoPP;
40
41#if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING)
42
43using CryptoPP::AES;
44using CryptoPP::XTS_Mode;
45using CryptoPP::Threefish512;
46
47void Modes_TestInstantiations()
48{
49 XTS_Mode<AES>::Encryption m0;
50 XTS_Mode<AES>::Decryption m1;
51 XTS_Mode<AES>::Encryption m2;
52 XTS_Mode<AES>::Decryption m3;
53
54#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
55 XTS_Mode<Threefish512>::Encryption m4;
56 XTS_Mode<Threefish512>::Decryption m5;
57#endif
58}
59#endif // CRYPTOPP_DEBUG
60
61inline void XorBuffer(byte *output, const byte *input, const byte *mask, size_t count)
62{
63 CRYPTOPP_ASSERT(count >= 16 && (count % 16 == 0));
64
65#if defined(CRYPTOPP_DISABLE_ASM)
66 xorbuf(output, input, mask, count);
67
68#elif defined(__SSE2__) || defined(_M_X64)
69 for (size_t i=0; i<count; i+=16)
70 _mm_storeu_si128(M128_CAST(output+i),
71 _mm_xor_si128(
72 _mm_loadu_si128(CONST_M128_CAST(input+i)),
73 _mm_loadu_si128(CONST_M128_CAST(mask+i))));
74
75#elif defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64)
76 for (size_t i=0; i<count; i+=16)
77 vst1q_u8(output+i, veorq_u8(vld1q_u8(input+i), vld1q_u8(mask+i)));
78
79#elif defined(__ALTIVEC__)
80 for (size_t i=0; i<count; i+=16)
81 VecStore(VecXor(VecLoad(input+i), VecLoad(mask+i)), output+i);
82
83#else
84 xorbuf(output, input, mask, count);
85#endif
86}
87
88inline void XorBuffer(byte *buf, const byte *mask, size_t count)
89{
90 XorBuffer(buf, buf, mask, count);
91}
92
93// Borrowed from CMAC, but little-endian representation
94inline void GF_Double(byte *out, const byte* in, unsigned int len)
95{
96#if defined(CRYPTOPP_WORD128_AVAILABLE)
97 word128 carry = 0, x;
98 for (size_t i=0, idx=0; i<len/16; ++i, idx+=16)
99 {
100 x = GetWord<word128>(false, LITTLE_ENDIAN_ORDER, in+idx);
101 word128 y = (x >> 127); x = (x << 1) + carry;
102 PutWord<word128>(false, LITTLE_ENDIAN_ORDER, out+idx, x);
103 carry = y;
104 }
105#elif defined(_M_X64) || defined(_M_ARM64) || defined(_LP64) || defined(__LP64__)
106 word64 carry = 0, x;
107 for (size_t i=0, idx=0; i<len/8; ++i, idx+=8)
108 {
109 x = GetWord<word64>(false, LITTLE_ENDIAN_ORDER, in+idx);
110 word64 y = (x >> 63); x = (x << 1) + carry;
111 PutWord<word64>(false, LITTLE_ENDIAN_ORDER, out+idx, x);
112 carry = y;
113 }
114#else
115 word32 carry = 0, x;
116 for (size_t i=0, idx=0; i<len/4; ++i, idx+=4)
117 {
118 x = GetWord<word32>(false, LITTLE_ENDIAN_ORDER, in+idx);
119 word32 y = (x >> 31); x = (x << 1) + carry;
120 PutWord<word32>(false, LITTLE_ENDIAN_ORDER, out+idx, x);
121 carry = y;
122 }
123#endif
124
125#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
126
127 CRYPTOPP_ASSERT(IsPowerOf2(len));
128 CRYPTOPP_ASSERT(len >= 16);
129 CRYPTOPP_ASSERT(len <= 128);
130
131 byte* k = out;
132 if (carry)
133 {
134 switch (len)
135 {
136 case 16:
137 {
138 const size_t LEIDX = 16-1;
139 k[LEIDX-15] ^= 0x87;
140 break;
141 }
142 case 32:
143 {
144 // https://crypto.stackexchange.com/q/9815/10496
145 // Polynomial x^256 + x^10 + x^5 + x^2 + 1
146 const size_t LEIDX = 32-1;
147 k[LEIDX-30] ^= 4;
148 k[LEIDX-31] ^= 0x25;
149 break;
150 }
151 case 64:
152 {
153 // https://crypto.stackexchange.com/q/9815/10496
154 // Polynomial x^512 + x^8 + x^5 + x^2 + 1
155 const size_t LEIDX = 64-1;
156 k[LEIDX-62] ^= 1;
157 k[LEIDX-63] ^= 0x25;
158 break;
159 }
160 case 128:
161 {
162 // https://crypto.stackexchange.com/q/9815/10496
163 // Polynomial x^1024 + x^19 + x^6 + x + 1
164 const size_t LEIDX = 128-1;
165 k[LEIDX-125] ^= 8;
166 k[LEIDX-126] ^= 0x00;
167 k[LEIDX-127] ^= 0x43;
168 break;
169 }
170 default:
171 CRYPTOPP_ASSERT(0);
172 }
173 }
174#else
175 CRYPTOPP_ASSERT(len == 16);
176
177 byte* k = out;
178 if (carry)
179 {
180 k[0] ^= 0x87;
181 return;
182 }
183#endif // CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
184}
185
186inline void GF_Double(byte *inout, unsigned int len)
187{
188 GF_Double(inout, inout, len);
189}
190
191ANONYMOUS_NAMESPACE_END
192
193NAMESPACE_BEGIN(CryptoPP)
194
195void XTS_ModeBase::ThrowIfInvalidBlockSize(size_t length)
196{
197#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
198 CRYPTOPP_ASSERT(length >= 16 && length <= 128 && IsPowerOf2(length));
199 if (length < 16 || length > 128 || !IsPowerOf2(length))
200 throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not valid");
201#else
202 CRYPTOPP_ASSERT(length == 16);
203 if (length != 16)
204 throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
205#endif
206}
207
209{
210 CRYPTOPP_ASSERT(length % 2 == 0);
211 if (!GetBlockCipher().IsValidKeyLength((length+1)/2))
212 throw InvalidKeyLength(AlgorithmName(), length);
213}
214
215void XTS_ModeBase::SetKey(const byte *key, size_t length, const NameValuePairs &params)
216{
219
220 const size_t klen = length/2;
221 AccessBlockCipher().SetKey(key+0, klen, params);
222 AccessTweakCipher().SetKey(key+klen, klen, params);
223
224 ResizeBuffers();
225
226 size_t ivLength;
227 const byte *iv = GetIVAndThrowIfInvalid(params, ivLength);
228 Resynchronize(iv, (int)ivLength);
229}
230
231void XTS_ModeBase::Resynchronize(const byte *iv, int ivLength)
232{
234 std::memcpy(m_xregister, m_register, ivLength);
235 GetTweakCipher().ProcessBlock(m_xregister);
236}
237
238void XTS_ModeBase::Resynchronize(word64 sector, ByteOrder order)
239{
240 SecByteBlock iv(GetTweakCipher().BlockSize());
241 PutWord<word64>(false, order, iv, sector);
242 std::memset(iv+8, 0x00, iv.size()-8);
243
245 std::memcpy(m_xregister, iv, iv.size());
246 GetTweakCipher().ProcessBlock(m_xregister);
247}
248
249void XTS_ModeBase::ResizeBuffers()
250{
251 BlockOrientedCipherModeBase::ResizeBuffers();
252 m_xworkspace.New(GetBlockCipher().BlockSize()*ParallelBlocks);
253 m_xregister.New(GetBlockCipher().BlockSize()*ParallelBlocks);
254}
255
256// ProcessData runs either 12-4-1 blocks, 8-2-1 or 4-1 blocks. Which is
257// selected depends on ParallelBlocks in the header file. 12-4-1 or 8-2-1
258// can be used on Aarch64 and PowerPC. Intel should use 4-1 due to lack
259// of registers. The unneeded code paths should be removed by optimizer.
260// The extra gyrations save us 1.8 cpb on Aarch64 and 2.1 cpb on PowerPC.
261void XTS_ModeBase::ProcessData(byte *outString, const byte *inString, size_t length)
262{
263 // data unit is multiple of 16 bytes
264 CRYPTOPP_ASSERT(length % BlockSize() == 0);
265
266 enum { lastParallelBlock = ParallelBlocks-1 };
267 const unsigned int blockSize = GetBlockCipher().BlockSize();
268 const size_t parallelSize = blockSize*ParallelBlocks;
269
270 // encrypt the data unit, optimal size at a time
271 while (length >= parallelSize)
272 {
273 // m_xregister[0] always points to the next tweak.
274 GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
275 GF_Double(m_xregister+2*blockSize, m_xregister+1*blockSize, blockSize);
276 GF_Double(m_xregister+3*blockSize, m_xregister+2*blockSize, blockSize);
277
278 if (ParallelBlocks > 4)
279 {
280 GF_Double(m_xregister+4*blockSize, m_xregister+3*blockSize, blockSize);
281 GF_Double(m_xregister+5*blockSize, m_xregister+4*blockSize, blockSize);
282 GF_Double(m_xregister+6*blockSize, m_xregister+5*blockSize, blockSize);
283 GF_Double(m_xregister+7*blockSize, m_xregister+6*blockSize, blockSize);
284 }
285 if (ParallelBlocks > 8)
286 {
287 GF_Double(m_xregister+8*blockSize, m_xregister+7*blockSize, blockSize);
288 GF_Double(m_xregister+9*blockSize, m_xregister+8*blockSize, blockSize);
289 GF_Double(m_xregister+10*blockSize, m_xregister+9*blockSize, blockSize);
290 GF_Double(m_xregister+11*blockSize, m_xregister+10*blockSize, blockSize);
291 }
292
293 // merge the tweak into the input block
294 XorBuffer(m_xworkspace, inString, m_xregister, parallelSize);
295
296 // encrypt one block, merge the tweak into the output block
297 GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
298 outString, parallelSize, BlockTransformation::BT_AllowParallel);
299
300 // m_xregister[0] always points to the next tweak.
301 GF_Double(m_xregister+0, m_xregister+lastParallelBlock*blockSize, blockSize);
302
303 inString += parallelSize;
304 outString += parallelSize;
305 length -= parallelSize;
306 }
307
308 // encrypt the data unit, 4 blocks at a time
309 while (ParallelBlocks == 12 && length >= blockSize*4)
310 {
311 // m_xregister[0] always points to the next tweak.
312 GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
313 GF_Double(m_xregister+2*blockSize, m_xregister+1*blockSize, blockSize);
314 GF_Double(m_xregister+3*blockSize, m_xregister+2*blockSize, blockSize);
315
316 // merge the tweak into the input block
317 XorBuffer(m_xworkspace, inString, m_xregister, blockSize*4);
318
319 // encrypt one block, merge the tweak into the output block
320 GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
321 outString, blockSize*4, BlockTransformation::BT_AllowParallel);
322
323 // m_xregister[0] always points to the next tweak.
324 GF_Double(m_xregister+0, m_xregister+3*blockSize, blockSize);
325
326 inString += blockSize*4;
327 outString += blockSize*4;
328 length -= blockSize*4;
329 }
330
331 // encrypt the data unit, 2 blocks at a time
332 while (ParallelBlocks == 8 && length >= blockSize*2)
333 {
334 // m_xregister[0] always points to the next tweak.
335 GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
336
337 // merge the tweak into the input block
338 XorBuffer(m_xworkspace, inString, m_xregister, blockSize*2);
339
340 // encrypt one block, merge the tweak into the output block
341 GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
342 outString, blockSize*2, BlockTransformation::BT_AllowParallel);
343
344 // m_xregister[0] always points to the next tweak.
345 GF_Double(m_xregister+0, m_xregister+1*blockSize, blockSize);
346
347 inString += blockSize*2;
348 outString += blockSize*2;
349 length -= blockSize*2;
350 }
351
352 // encrypt the data unit, blocksize at a time
353 while (length)
354 {
355 // merge the tweak into the input block
356 XorBuffer(m_xworkspace, inString, m_xregister, blockSize);
357
358 // encrypt one block
359 GetBlockCipher().ProcessBlock(m_xworkspace);
360
361 // merge the tweak into the output block
362 XorBuffer(outString, m_xworkspace, m_xregister, blockSize);
363
364 // Multiply T by alpha
365 GF_Double(m_xregister, blockSize);
366
367 inString += blockSize;
368 outString += blockSize;
369 length -= blockSize;
370 }
371}
372
373size_t XTS_ModeBase::ProcessLastBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
374{
375 // need at least a full AES block
376 CRYPTOPP_ASSERT(inLength >= BlockSize());
377
378 if (inLength < BlockSize())
379 throw InvalidArgument("XTS: message is too short for ciphertext stealing");
380
382 return ProcessLastPlainBlock(outString, outLength, inString, inLength);
383 else
384 return ProcessLastCipherBlock(outString, outLength, inString, inLength);
385}
386
387size_t XTS_ModeBase::ProcessLastPlainBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
388{
389 // ensure output buffer is large enough
390 CRYPTOPP_ASSERT(outLength >= inLength);
391
392 const unsigned int blockSize = GetBlockCipher().BlockSize();
393 const size_t blocks = inLength / blockSize;
394 const size_t tail = inLength % blockSize;
395 outLength = inLength;
396
397 if (tail == 0)
398 {
399 // Allow ProcessData to handle all the full blocks
400 ProcessData(outString, inString, inLength);
401 return inLength;
402 }
403 else if (blocks > 1)
404 {
405 // Allow ProcessData to handle full blocks except one
406 const size_t head = (blocks-1)*blockSize;
407 ProcessData(outString, inString, inLength-head);
408
409 outString += head;
410 inString += head; inLength -= head;
411 }
412
414
415 // merge the tweak into the input block
416 XorBuffer(m_xworkspace, inString, m_xregister, blockSize);
417
418 // encrypt one block
419 GetBlockCipher().ProcessBlock(m_xworkspace);
420
421 // merge the tweak into the output block
422 XorBuffer(outString, m_xworkspace, m_xregister, blockSize);
423
424 // Multiply T by alpha
425 GF_Double(m_xregister, blockSize);
426
428
429 inString += blockSize;
430 outString += blockSize;
431 const size_t len = inLength-blockSize;
432
433 // copy in the final plaintext bytes
434 std::memcpy(m_xworkspace, inString, len);
435 // and copy out the final ciphertext bytes
436 std::memcpy(outString, outString-blockSize, len);
437 // "steal" ciphertext to complete the block
438 std::memcpy(m_xworkspace+len, outString-blockSize+len, blockSize-len);
439
440 // merge the tweak into the input block
441 XorBuffer(m_xworkspace, m_xregister, blockSize);
442
443 // encrypt one block
444 GetBlockCipher().ProcessBlock(m_xworkspace);
445
446 // merge the tweak into the previous output block
447 XorBuffer(outString-blockSize, m_xworkspace, m_xregister, blockSize);
448
449 return outLength;
450}
451
452size_t XTS_ModeBase::ProcessLastCipherBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
453{
454 // ensure output buffer is large enough
455 CRYPTOPP_ASSERT(outLength >= inLength);
456
457 const unsigned int blockSize = GetBlockCipher().BlockSize();
458 const size_t blocks = inLength / blockSize;
459 const size_t tail = inLength % blockSize;
460 outLength = inLength;
461
462 if (tail == 0)
463 {
464 // Allow ProcessData to handle all the full blocks
465 ProcessData(outString, inString, inLength);
466 return inLength;
467 }
468 else if (blocks > 1)
469 {
470 // Allow ProcessData to handle full blocks except one
471 const size_t head = (blocks-1)*blockSize;
472 ProcessData(outString, inString, inLength-head);
473
474 outString += head;
475 inString += head; inLength -= head;
476 }
477
478 #define poly1 (m_xregister+0*blockSize)
479 #define poly2 (m_xregister+1*blockSize)
480 GF_Double(poly2, poly1, blockSize);
481
483
484 inString += blockSize;
485 outString += blockSize;
486 const size_t len = inLength-blockSize;
487
488 // merge the tweak into the input block
489 XorBuffer(m_xworkspace, inString-blockSize, poly2, blockSize);
490
491 // encrypt one block
492 GetBlockCipher().ProcessBlock(m_xworkspace);
493
494 // merge the tweak into the output block
495 XorBuffer(m_xworkspace, poly2, blockSize);
496
497 // copy in the final plaintext bytes
498 std::memcpy(outString-blockSize, inString, len);
499 // and copy out the final ciphertext bytes
500 std::memcpy(outString, m_xworkspace, len);
501 // "steal" ciphertext to complete the block
502 std::memcpy(outString-blockSize+len, m_xworkspace+len, blockSize-len);
503
505
506 inString -= blockSize;
507 outString -= blockSize;
508
509 // merge the tweak into the input block
510 XorBuffer(m_xworkspace, outString, poly1, blockSize);
511
512 // encrypt one block
513 GetBlockCipher().ProcessBlock(m_xworkspace);
514
515 // merge the tweak into the output block
516 XorBuffer(outString, m_xworkspace, poly1, blockSize);
517
518 return outLength;
519}
520
521NAMESPACE_END
Class file for the AES cipher (Rijndael)
bool IsForwardTransformation() const
Determines if the cipher is being operated in its forward direction.
Definition modes.h:258
void Resynchronize(const byte *iv, int length=-1)
Resynchronize with an IV.
Definition modes.h:260
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
Definition cryptlib.cpp:141
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition cryptlib.h:879
@ BT_AllowParallel
Allow parallel transformations.
Definition cryptlib.h:925
virtual unsigned int BlockSize() const =0
An invalid argument was detected.
Definition cryptlib.h:203
Exception thrown when an invalid key length is encountered.
Definition simple.h:56
Interface for retrieving values given their names.
Definition cryptlib.h:322
const byte * GetIVAndThrowIfInvalid(const NameValuePairs &params, size_t &size)
Retrieves and validates the IV.
Definition cryptlib.cpp:107
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition cryptlib.cpp:58
XTS block cipher mode of operation default implementation.
Definition xts.h:50
bool IsValidKeyLength(size_t keylength) const
Returns whether keylength is a valid key length.
Definition xts.h:74
void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition xts.cpp:215
void ProcessData(byte *outString, const byte *inString, size_t length)
Encrypt or decrypt an array of bytes.
Definition xts.cpp:261
void Resynchronize(const byte *iv, int ivLength=-1)
Resynchronize with an IV.
Definition xts.cpp:231
void ThrowIfInvalidBlockSize(size_t length)
Validates the block size.
Definition xts.cpp:195
size_t ProcessLastBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
Encrypt or decrypt the last block of data.
Definition xts.cpp:373
unsigned int BlockSize() const
Definition xts.h:84
void ThrowIfInvalidKeyLength(size_t length)
Validates the key length.
Definition xts.cpp:208
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition xts.h:61
unsigned int word32
32-bit unsigned datatype
Definition config_int.h:62
Functions for CPU features and intrinsics.
ByteOrder
Provides the byte ordering.
Definition cryptlib.h:143
Utility functions for the Crypto++ library.
bool IsPowerOf2(const T &value)
Tests whether a value is a power of 2.
Definition misc.h:1010
Classes for block cipher modes of operation.
Precompiled header file.
Support functions for PowerPC and vector operations.
Classes for the Threefish block cipher.
Classes for XTS block cipher mode of operation.