Security Scol plugin
lsh512_avx.cpp
1// lsh.cpp - written and placed in the public domain by Jeffrey Walton
2// Based on the specification and source code provided by
3// Korea Internet & Security Agency (KISA) website. Also
4// see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5// and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6
7// We are hitting some sort of GCC bug in the LSH AVX2 code path.
8// Clang is OK on the AVX2 code path. We believe it is GCC Issue
9// 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10// makes using zeroupper a little tricky.
11
12#include "pch.h"
13#include "config.h"
14
15#include "lsh.h"
16#include "misc.h"
17
18// Squash MS LNK4221 and libtool warnings
19extern const char LSH512_AVX_FNAME[] = __FILE__;
20
21#if defined(CRYPTOPP_AVX2_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
22
23#if defined(CRYPTOPP_AVX2_AVAILABLE)
24# include <emmintrin.h>
25# include <immintrin.h>
26#endif
27
28// GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
29#if (CRYPTOPP_GCC_VERSION >= 40500)
30# include <x86intrin.h>
31#endif
32
33ANONYMOUS_NAMESPACE_BEGIN
34
35/* LSH Constants */
36
37const unsigned int LSH512_MSG_BLK_BYTE_LEN = 256;
38// const unsigned int LSH512_MSG_BLK_BIT_LEN = 2048;
39// const unsigned int LSH512_CV_BYTE_LEN = 128;
40const unsigned int LSH512_HASH_VAL_MAX_BYTE_LEN = 64;
41
42// const unsigned int MSG_BLK_WORD_LEN = 32;
43const unsigned int CV_WORD_LEN = 16;
44const unsigned int CONST_WORD_LEN = 8;
45// const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
46const unsigned int NUM_STEPS = 28;
47
48const unsigned int ROT_EVEN_ALPHA = 23;
49const unsigned int ROT_EVEN_BETA = 59;
50const unsigned int ROT_ODD_ALPHA = 7;
51const unsigned int ROT_ODD_BETA = 3;
52
53const unsigned int LSH_TYPE_512_512 = 0x0010040;
54const unsigned int LSH_TYPE_512_384 = 0x0010030;
55const unsigned int LSH_TYPE_512_256 = 0x0010020;
56const unsigned int LSH_TYPE_512_224 = 0x001001C;
57
58// const unsigned int LSH_TYPE_384 = LSH_TYPE_512_384;
59// const unsigned int LSH_TYPE_512 = LSH_TYPE_512_512;
60
61/* Error Code */
62
63const unsigned int LSH_SUCCESS = 0x0;
64// const unsigned int LSH_ERR_NULL_PTR = 0x2401;
65// const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
66const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
67const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
68
69/* Index into our state array */
70
71const unsigned int AlgorithmType = 80;
72const unsigned int RemainingBits = 81;
73
74NAMESPACE_END
75
76NAMESPACE_BEGIN(CryptoPP)
77NAMESPACE_BEGIN(LSH)
78
79// lsh512.cpp
80extern const word64 LSH512_IV224[CV_WORD_LEN];
81extern const word64 LSH512_IV256[CV_WORD_LEN];
82extern const word64 LSH512_IV384[CV_WORD_LEN];
83extern const word64 LSH512_IV512[CV_WORD_LEN];
84extern const word64 LSH512_StepConstants[CONST_WORD_LEN * NUM_STEPS];
85
86NAMESPACE_END // LSH
87NAMESPACE_END // Crypto++
88
89ANONYMOUS_NAMESPACE_BEGIN
90
91using CryptoPP::byte;
92using CryptoPP::word32;
93using CryptoPP::word64;
94using CryptoPP::rotlFixed;
95using CryptoPP::rotlConstant;
96
97using CryptoPP::GetBlock;
98using CryptoPP::LittleEndian;
99using CryptoPP::ConditionalByteReverse;
100using CryptoPP::LITTLE_ENDIAN_ORDER;
101
102using CryptoPP::LSH::LSH512_IV224;
103using CryptoPP::LSH::LSH512_IV256;
104using CryptoPP::LSH::LSH512_IV384;
105using CryptoPP::LSH::LSH512_IV512;
106using CryptoPP::LSH::LSH512_StepConstants;
107
108typedef byte lsh_u8;
109typedef word32 lsh_u32;
110typedef word64 lsh_u64;
111typedef word32 lsh_uint;
112typedef word32 lsh_err;
113typedef word32 lsh_type;
114
115struct LSH512_AVX2_Context
116{
117 LSH512_AVX2_Context(word64* state, word64 algType, word64& remainingBitLength) :
118 cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
119 last_block(reinterpret_cast<byte*>(state+48)),
120 remain_databitlen(remainingBitLength),
121 alg_type(static_cast<lsh_type>(algType)) {}
122
123 lsh_u64* cv_l; // start of our state block
124 lsh_u64* cv_r;
125 lsh_u64* sub_msgs;
126 lsh_u8* last_block;
127 lsh_u64& remain_databitlen;
128 lsh_type alg_type;
129};
130
131struct LSH512_AVX2_Internal
132{
133 LSH512_AVX2_Internal(word64* state) :
134 submsg_e_l(state+16), submsg_e_r(state+24),
135 submsg_o_l(state+32), submsg_o_r(state+40) { }
136
137 lsh_u64* submsg_e_l; /* even left sub-message */
138 lsh_u64* submsg_e_r; /* even right sub-message */
139 lsh_u64* submsg_o_l; /* odd left sub-message */
140 lsh_u64* submsg_o_r; /* odd right sub-message */
141};
142
143// Zero the upper 128 bits of all YMM registers on exit.
144// It avoids AVX state transition penalties when saving state.
145// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735
146// makes using zeroupper a little tricky.
147
148struct AVX_Cleanup
149{
150 ~AVX_Cleanup() {
151 _mm256_zeroupper();
152 }
153};
154
155// const lsh_u32 g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
156
157/* LSH AlgType Macro */
158
159inline bool LSH_IS_LSH512(lsh_uint val) {
160 return (val & 0xf0000) == 0x10000;
161}
162
163inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
164 return val >> 24;
165}
166
167inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
168 return val & 0xffff;
169}
170
171inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
172 return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
173}
174
175inline lsh_u64 loadLE64(lsh_u64 v) {
176 return ConditionalByteReverse(LITTLE_ENDIAN_ORDER, v);
177}
178
179lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
180 return rotlFixed(x, r);
181}
182
183// Original code relied upon unaligned lsh_u64 buffer
184inline void load_msg_blk(LSH512_AVX2_Internal* i_state, const lsh_u8 msgblk[LSH512_MSG_BLK_BYTE_LEN])
185{
186 lsh_u64* submsg_e_l = i_state->submsg_e_l;
187 lsh_u64* submsg_e_r = i_state->submsg_e_r;
188 lsh_u64* submsg_o_l = i_state->submsg_o_l;
189 lsh_u64* submsg_o_r = i_state->submsg_o_r;
190
191 _mm256_storeu_si256(M256_CAST(submsg_e_l+0),
192 _mm256_loadu_si256(CONST_M256_CAST(msgblk+0)));
193 _mm256_storeu_si256(M256_CAST(submsg_e_l+4),
194 _mm256_loadu_si256(CONST_M256_CAST(msgblk+32)));
195
196 _mm256_storeu_si256(M256_CAST(submsg_e_r+0),
197 _mm256_loadu_si256(CONST_M256_CAST(msgblk+64)));
198 _mm256_storeu_si256(M256_CAST(submsg_e_r+4),
199 _mm256_loadu_si256(CONST_M256_CAST(msgblk+96)));
200
201 _mm256_storeu_si256(M256_CAST(submsg_o_l+0),
202 _mm256_loadu_si256(CONST_M256_CAST(msgblk+128)));
203 _mm256_storeu_si256(M256_CAST(submsg_o_l+4),
204 _mm256_loadu_si256(CONST_M256_CAST(msgblk+160)));
205
206 _mm256_storeu_si256(M256_CAST(submsg_o_r+0),
207 _mm256_loadu_si256(CONST_M256_CAST(msgblk+192)));
208 _mm256_storeu_si256(M256_CAST(submsg_o_r+4),
209 _mm256_loadu_si256(CONST_M256_CAST(msgblk+224)));
210}
211
212inline void msg_exp_even(LSH512_AVX2_Internal* i_state)
213{
214 CRYPTOPP_ASSERT(i_state != NULLPTR);
215
216 lsh_u64* submsg_e_l = i_state->submsg_e_l;
217 lsh_u64* submsg_e_r = i_state->submsg_e_r;
218 lsh_u64* submsg_o_l = i_state->submsg_o_l;
219 lsh_u64* submsg_o_r = i_state->submsg_o_r;
220
221 _mm256_storeu_si256(M256_CAST(submsg_e_l+0), _mm256_add_epi64(
222 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
223 _mm256_permute4x64_epi64(
224 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
225 _MM_SHUFFLE(1,0,2,3))));
226 _mm256_storeu_si256(M256_CAST(submsg_e_l+4), _mm256_add_epi64(
227 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4)),
228 _mm256_permute4x64_epi64(
229 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4)),
230 _MM_SHUFFLE(2,1,0,3))));
231
232 _mm256_storeu_si256(M256_CAST(submsg_e_r+0), _mm256_add_epi64(
233 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
234 _mm256_permute4x64_epi64(
235 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
236 _MM_SHUFFLE(1,0,2,3))));
237 _mm256_storeu_si256(M256_CAST(submsg_e_r+4), _mm256_add_epi64(
238 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4)),
239 _mm256_permute4x64_epi64(
240 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4)),
241 _MM_SHUFFLE(2,1,0,3))));
242}
243
244inline void msg_exp_odd(LSH512_AVX2_Internal* i_state)
245{
246 CRYPTOPP_ASSERT(i_state != NULLPTR);
247
248 lsh_u64* submsg_e_l = i_state->submsg_e_l;
249 lsh_u64* submsg_e_r = i_state->submsg_e_r;
250 lsh_u64* submsg_o_l = i_state->submsg_o_l;
251 lsh_u64* submsg_o_r = i_state->submsg_o_r;
252
253 _mm256_storeu_si256(M256_CAST(submsg_o_l+0),
254 _mm256_add_epi64(
255 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+0)),
256 _mm256_permute4x64_epi64(
257 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+0)),
258 _MM_SHUFFLE(1,0,2,3))));
259 _mm256_storeu_si256(M256_CAST(submsg_o_l+4),
260 _mm256_add_epi64(
261 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4)),
262 _mm256_permute4x64_epi64(
263 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4)),
264 _MM_SHUFFLE(2,1,0,3))));
265
266 _mm256_storeu_si256(M256_CAST(submsg_o_r+0),
267 _mm256_add_epi64(
268 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+0)),
269 _mm256_permute4x64_epi64(
270 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+0)),
271 _MM_SHUFFLE(1,0,2,3))));
272 _mm256_storeu_si256(M256_CAST(submsg_o_r+4),
273 _mm256_add_epi64(
274 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4)),
275 _mm256_permute4x64_epi64(
276 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4)),
277 _MM_SHUFFLE(2,1,0,3))));
278}
279
280inline void load_sc(const lsh_u64** p_const_v, size_t i)
281{
282 *p_const_v = &LSH512_StepConstants[i];
283}
284
285inline void msg_add_even(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_AVX2_Internal* i_state)
286{
287 CRYPTOPP_ASSERT(i_state != NULLPTR);
288
289 lsh_u64* submsg_e_l = i_state->submsg_e_l;
290 lsh_u64* submsg_e_r = i_state->submsg_e_r;
291
292 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
293 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
294 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l))));
295 _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
296 _mm256_loadu_si256(CONST_M256_CAST(cv_r)),
297 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r))));
298
299 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256(
300 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
301 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_l+4))));
302 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_xor_si256(
303 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)),
304 _mm256_loadu_si256(CONST_M256_CAST(submsg_e_r+4))));
305}
306
307inline void msg_add_odd(lsh_u64 cv_l[8], lsh_u64 cv_r[8], LSH512_AVX2_Internal* i_state)
308{
309 CRYPTOPP_ASSERT(i_state != NULLPTR);
310
311 lsh_u64* submsg_o_l = i_state->submsg_o_l;
312 lsh_u64* submsg_o_r = i_state->submsg_o_r;
313
314 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
315 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
316 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l))));
317 _mm256_storeu_si256(M256_CAST(cv_r), _mm256_xor_si256(
318 _mm256_loadu_si256(CONST_M256_CAST(cv_r)),
319 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r))));
320
321 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256(
322 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
323 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_l+4))));
324 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_xor_si256(
325 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)),
326 _mm256_loadu_si256(CONST_M256_CAST(submsg_o_r+4))));
327}
328
329inline void add_blk(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
330{
331 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_add_epi64(
332 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
333 _mm256_loadu_si256(CONST_M256_CAST(cv_r))));
334 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_add_epi64(
335 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
336 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4))));
337}
338
339template <unsigned int R>
340inline void rotate_blk(lsh_u64 cv[8])
341{
342 _mm256_storeu_si256(M256_CAST(cv), _mm256_or_si256(
343 _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), R),
344 _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv)), 64-R)));
345 _mm256_storeu_si256(M256_CAST(cv+4), _mm256_or_si256(
346 _mm256_slli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), R),
347 _mm256_srli_epi64(_mm256_loadu_si256(CONST_M256_CAST(cv+4)), 64-R)));
348}
349
350inline void xor_with_const(lsh_u64 cv_l[8], const lsh_u64 const_v[8])
351{
352 _mm256_storeu_si256(M256_CAST(cv_l), _mm256_xor_si256(
353 _mm256_loadu_si256(CONST_M256_CAST(cv_l)),
354 _mm256_loadu_si256(CONST_M256_CAST(const_v))));
355 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_xor_si256(
356 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)),
357 _mm256_loadu_si256(CONST_M256_CAST(const_v+4))));
358}
359
360inline void rotate_msg_gamma(lsh_u64 cv_r[8])
361{
362 // g_gamma512[8] = { 0, 16, 32, 48, 8, 24, 40, 56 };
363 _mm256_storeu_si256(M256_CAST(cv_r+0),
364 _mm256_shuffle_epi8(
365 _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)),
366 _mm256_set_epi8(
367 /* hi lane */ 9,8,15,14, 13,12,11,10, 3,2,1,0, 7,6,5,4,
368 /* lo lane */ 13,12,11,10, 9,8,15,14, 7,6,5,4, 3,2,1,0)));
369 _mm256_storeu_si256(M256_CAST(cv_r+4),
370 _mm256_shuffle_epi8(
371 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)),
372 _mm256_set_epi8(
373 /* hi lane */ 8,15,14,13, 12,11,10,9, 2,1,0,7, 6,5,4,3,
374 /* lo lane */ 12,11,10,9, 8,15,14,13, 6,5,4,3, 2,1,0,7)));
375}
376
377inline void word_perm(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
378{
379 __m256i temp[2];
380 _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_permute4x64_epi64(
381 _mm256_loadu_si256(CONST_M256_CAST(cv_l+0)), _MM_SHUFFLE(3,1,0,2)));
382 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_permute4x64_epi64(
383 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)), _MM_SHUFFLE(3,1,0,2)));
384 _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_permute4x64_epi64(
385 _mm256_loadu_si256(CONST_M256_CAST(cv_r+0)), _MM_SHUFFLE(1,2,3,0)));
386 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_permute4x64_epi64(
387 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)), _MM_SHUFFLE(1,2,3,0)));
388
389 temp[0] = _mm256_loadu_si256(CONST_M256_CAST(cv_l+0));
390 temp[1] = _mm256_loadu_si256(CONST_M256_CAST(cv_r+0));
391
392 _mm256_storeu_si256(M256_CAST(cv_l+0),
393 _mm256_loadu_si256(CONST_M256_CAST(cv_l+4)));
394 _mm256_storeu_si256(M256_CAST(cv_l+4),
395 _mm256_loadu_si256(CONST_M256_CAST(cv_r+4)));
396
397 _mm256_storeu_si256(M256_CAST(cv_r+0), temp[0]);
398 _mm256_storeu_si256(M256_CAST(cv_r+4), temp[1]);
399};
400
401/* -------------------------------------------------------- *
402* step function
403* -------------------------------------------------------- */
404
405template <unsigned int Alpha, unsigned int Beta>
406inline void mix(lsh_u64 cv_l[8], lsh_u64 cv_r[8], const lsh_u64 const_v[8])
407{
408 add_blk(cv_l, cv_r);
409 rotate_blk<Alpha>(cv_l);
410 xor_with_const(cv_l, const_v);
411 add_blk(cv_r, cv_l);
412 rotate_blk<Beta>(cv_r);
413 add_blk(cv_l, cv_r);
414 rotate_msg_gamma(cv_r);
415}
416
417/* -------------------------------------------------------- *
418* compression function
419* -------------------------------------------------------- */
420
421inline void compress(LSH512_AVX2_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_BYTE_LEN])
422{
423 CRYPTOPP_ASSERT(ctx != NULLPTR);
424
425 LSH512_AVX2_Internal s_state(ctx->cv_l);
426 LSH512_AVX2_Internal* i_state = &s_state;
427
428 const lsh_u64* const_v = NULL;
429 lsh_u64 *cv_l = ctx->cv_l;
430 lsh_u64 *cv_r = ctx->cv_r;
431
432 load_msg_blk(i_state, pdMsgBlk);
433
434 msg_add_even(cv_l, cv_r, i_state);
435 load_sc(&const_v, 0);
436 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
437 word_perm(cv_l, cv_r);
438
439 msg_add_odd(cv_l, cv_r, i_state);
440 load_sc(&const_v, 8);
441 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
442 word_perm(cv_l, cv_r);
443
444 for (size_t i = 1; i < NUM_STEPS / 2; i++)
445 {
446 msg_exp_even(i_state);
447 msg_add_even(cv_l, cv_r, i_state);
448 load_sc(&const_v, 16 * i);
449 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
450 word_perm(cv_l, cv_r);
451
452 msg_exp_odd(i_state);
453 msg_add_odd(cv_l, cv_r, i_state);
454 load_sc(&const_v, 16 * i + 8);
455 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
456 word_perm(cv_l, cv_r);
457 }
458
459 msg_exp_even(i_state);
460 msg_add_even(cv_l, cv_r, i_state);
461}
462
463/* -------------------------------------------------------- */
464
465inline void load_iv(word64 cv_l[8], word64 cv_r[8], const word64 iv[16])
466{
467 // The IV's are 32-byte aligned so we can use aligned loads.
468 _mm256_storeu_si256(M256_CAST(cv_l+0),
469 _mm256_load_si256(CONST_M256_CAST(iv+0)));
470 _mm256_storeu_si256(M256_CAST(cv_l+4),
471 _mm256_load_si256(CONST_M256_CAST(iv+4)));
472
473 _mm256_storeu_si256(M256_CAST(cv_r+0),
474 _mm256_load_si256(CONST_M256_CAST(iv+8)));
475 _mm256_storeu_si256(M256_CAST(cv_r+4),
476 _mm256_load_si256(CONST_M256_CAST(iv+12)));
477}
478
479inline void zero_iv(lsh_u64 cv_l[8], lsh_u64 cv_r[8])
480{
481 _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256());
482 _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_setzero_si256());
483 _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256());
484 _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_setzero_si256());
485}
486
487inline void zero_submsgs(LSH512_AVX2_Context* ctx)
488{
489 lsh_u64* sub_msgs = ctx->sub_msgs;
490
491 _mm256_storeu_si256(M256_CAST(sub_msgs+ 0),
492 _mm256_setzero_si256());
493 _mm256_storeu_si256(M256_CAST(sub_msgs+ 4),
494 _mm256_setzero_si256());
495
496 _mm256_storeu_si256(M256_CAST(sub_msgs+ 8),
497 _mm256_setzero_si256());
498 _mm256_storeu_si256(M256_CAST(sub_msgs+12),
499 _mm256_setzero_si256());
500}
501
502inline void init224(LSH512_AVX2_Context* ctx)
503{
504 CRYPTOPP_ASSERT(ctx != NULLPTR);
505
506 zero_submsgs(ctx);
507 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV224);
508}
509
510inline void init256(LSH512_AVX2_Context* ctx)
511{
512 CRYPTOPP_ASSERT(ctx != NULLPTR);
513
514 zero_submsgs(ctx);
515 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV256);
516}
517
518inline void init384(LSH512_AVX2_Context* ctx)
519{
520 CRYPTOPP_ASSERT(ctx != NULLPTR);
521
522 zero_submsgs(ctx);
523 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV384);
524}
525
526inline void init512(LSH512_AVX2_Context* ctx)
527{
528 CRYPTOPP_ASSERT(ctx != NULLPTR);
529
530 zero_submsgs(ctx);
531 load_iv(ctx->cv_l, ctx->cv_r, LSH512_IV512);
532}
533
534/* -------------------------------------------------------- */
535
536inline void fin(LSH512_AVX2_Context* ctx)
537{
538 CRYPTOPP_ASSERT(ctx != NULLPTR);
539
540 _mm256_storeu_si256(M256_CAST(ctx->cv_l+0), _mm256_xor_si256(
541 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+0)),
542 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+0))));
543
544 _mm256_storeu_si256(M256_CAST(ctx->cv_l+4), _mm256_xor_si256(
545 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_l+4)),
546 _mm256_loadu_si256(CONST_M256_CAST(ctx->cv_r+4))));
547}
548
549/* -------------------------------------------------------- */
550
551inline void get_hash(LSH512_AVX2_Context* ctx, lsh_u8* pbHashVal)
552{
553 CRYPTOPP_ASSERT(ctx != NULLPTR);
554 CRYPTOPP_ASSERT(ctx->alg_type != 0);
555 CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
556
557 lsh_uint alg_type = ctx->alg_type;
558 lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
559 lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
560
561 // Multiplying by sizeof(lsh_u8) looks odd...
562 memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
563 if (hash_val_bit_len){
564 pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
565 }
566}
567
568/* -------------------------------------------------------- */
569
570lsh_err lsh512_init_avx2(LSH512_AVX2_Context* ctx)
571{
572 CRYPTOPP_ASSERT(ctx != NULLPTR);
573 CRYPTOPP_ASSERT(ctx->alg_type != 0);
574
575 lsh_u32 alg_type = ctx->alg_type;
576 const lsh_u64* const_v = NULL;
577 ctx->remain_databitlen = 0;
578
579 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
580 AVX_Cleanup cleanup;
581
582 switch (alg_type){
583 case LSH_TYPE_512_512:
584 init512(ctx);
585 return LSH_SUCCESS;
586 case LSH_TYPE_512_384:
587 init384(ctx);
588 return LSH_SUCCESS;
589 case LSH_TYPE_512_256:
590 init256(ctx);
591 return LSH_SUCCESS;
592 case LSH_TYPE_512_224:
593 init224(ctx);
594 return LSH_SUCCESS;
595 default:
596 break;
597 }
598
599 lsh_u64* cv_l = ctx->cv_l;
600 lsh_u64* cv_r = ctx->cv_r;
601
602 zero_iv(cv_l, cv_r);
603 cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN;
604 cv_l[1] = LSH_GET_HASHBIT(alg_type);
605
606 for (size_t i = 0; i < NUM_STEPS / 2; i++)
607 {
608 //Mix
609 load_sc(&const_v, i * 16);
610 mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
611 word_perm(cv_l, cv_r);
612
613 load_sc(&const_v, i * 16 + 8);
614 mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
615 word_perm(cv_l, cv_r);
616 }
617
618 return LSH_SUCCESS;
619}
620
621lsh_err lsh512_update_avx2(LSH512_AVX2_Context* ctx, const lsh_u8* data, size_t databitlen)
622{
623 CRYPTOPP_ASSERT(ctx != NULLPTR);
624 CRYPTOPP_ASSERT(data != NULLPTR);
625 CRYPTOPP_ASSERT(databitlen % 8 == 0);
626 CRYPTOPP_ASSERT(ctx->alg_type != 0);
627
628 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
629 AVX_Cleanup cleanup;
630
631 if (databitlen == 0){
632 return LSH_SUCCESS;
633 }
634
635 // We are byte oriented. tail bits will always be 0.
636 size_t databytelen = databitlen >> 3;
637 // lsh_uint pos2 = databitlen & 0x7;
638 const size_t pos2 = 0;
639
640 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
641 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
642 const size_t remain_msg_bit = 0;
643
644 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
645 return LSH_ERR_INVALID_STATE;
646 }
647 if (remain_msg_bit > 0){
648 return LSH_ERR_INVALID_DATABITLEN;
649 }
650
651 if (databytelen + remain_msg_byte < LSH512_MSG_BLK_BYTE_LEN){
652 memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
653 ctx->remain_databitlen += (lsh_uint)databitlen;
654 remain_msg_byte += (lsh_uint)databytelen;
655 if (pos2){
656 ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
657 }
658 return LSH_SUCCESS;
659 }
660
661 if (remain_msg_byte > 0){
662 size_t more_byte = LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte;
663 memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
664 compress(ctx, ctx->last_block);
665 data += more_byte;
666 databytelen -= more_byte;
667 remain_msg_byte = 0;
668 ctx->remain_databitlen = 0;
669 }
670
671 while (databytelen >= LSH512_MSG_BLK_BYTE_LEN)
672 {
673 // This call to compress caused some trouble.
674 // The data pointer can become unaligned in the
675 // previous block.
676 compress(ctx, data);
677 data += LSH512_MSG_BLK_BYTE_LEN;
678 databytelen -= LSH512_MSG_BLK_BYTE_LEN;
679 }
680
681 if (databytelen > 0){
682 memcpy(ctx->last_block, data, databytelen);
683 ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
684 }
685
686 if (pos2){
687 ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
688 ctx->remain_databitlen += pos2;
689 }
690 return LSH_SUCCESS;
691}
692
693lsh_err lsh512_final_avx2(LSH512_AVX2_Context* ctx, lsh_u8* hashval)
694{
695 CRYPTOPP_ASSERT(ctx != NULLPTR);
696 CRYPTOPP_ASSERT(hashval != NULLPTR);
697
698 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735.
699 AVX_Cleanup cleanup;
700
701 // We are byte oriented. tail bits will always be 0.
702 size_t remain_msg_byte = static_cast<size_t>(ctx->remain_databitlen >> 3);
703 // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
704 const size_t remain_msg_bit = 0;
705
706 if (remain_msg_byte >= LSH512_MSG_BLK_BYTE_LEN){
707 return LSH_ERR_INVALID_STATE;
708 }
709
710 if (remain_msg_bit){
711 ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
712 }
713 else{
714 ctx->last_block[remain_msg_byte] = 0x80;
715 }
716 memset(ctx->last_block + remain_msg_byte + 1, 0, LSH512_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
717
718 compress(ctx, ctx->last_block);
719
720 fin(ctx);
721 get_hash(ctx, hashval);
722
723 return LSH_SUCCESS;
724}
725
726ANONYMOUS_NAMESPACE_END
727
728NAMESPACE_BEGIN(CryptoPP)
729
730extern
731void LSH512_Base_Restart_AVX2(word64* state)
732{
733 state[RemainingBits] = 0;
734 LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
735 lsh_err err = lsh512_init_avx2(&ctx);
736
737 if (err != LSH_SUCCESS)
738 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_init_avx2 failed");
739}
740
741extern
742void LSH512_Base_Update_AVX2(word64* state, const byte *input, size_t size)
743{
744 LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
745 lsh_err err = lsh512_update_avx2(&ctx, input, 8*size);
746
747 if (err != LSH_SUCCESS)
748 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_update_avx2 failed");
749}
750
751extern
752void LSH512_Base_TruncatedFinal_AVX2(word64* state, byte *hash, size_t)
753{
754 LSH512_AVX2_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
755 lsh_err err = lsh512_final_avx2(&ctx, hash);
756
757 if (err != LSH_SUCCESS)
758 throw Exception(Exception::OTHER_ERROR, "LSH512_Base: lsh512_final_avx2 failed");
759}
760
761NAMESPACE_END
762
763#endif // CRYPTOPP_AVX2_AVAILABLE
Base class for all exceptions thrown by the library.
Definition cryptlib.h:159
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition cryptlib.h:177
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition config_int.h:56
unsigned int word32
32-bit unsigned datatype
Definition config_int.h:62
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition misc.h:2208
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition misc.h:1599
Precompiled header file.