Security Scol plugin
arm_simd.h
Go to the documentation of this file.
1// arm_simd.h - written and placed in public domain by Jeffrey Walton
2
5
6#ifndef CRYPTOPP_ARM_SIMD_H
7#define CRYPTOPP_ARM_SIMD_H
8
9#include "config.h"
10
11#if (CRYPTOPP_ARM_NEON_HEADER)
12# include <stdint.h>
13# include <arm_neon.h>
14#endif
15
16#if (CRYPTOPP_ARM_ACLE_HEADER)
17# include <stdint.h>
18# include <arm_acle.h>
19#endif
20
21#if (CRYPTOPP_ARM_CRC32_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
23
24
30inline uint32_t CRC32B (uint32_t crc, uint8_t val)
31{
32#if defined(_MSC_VER)
33 return __crc32b(crc, val);
34#else
35 __asm__ ("crc32b %w0, %w0, %w1 \n\t"
36 :"+r" (crc) : "r" (val) );
37 return crc;
38#endif
39}
40
46inline uint32_t CRC32W (uint32_t crc, uint32_t val)
47{
48#if defined(_MSC_VER)
49 return __crc32w(crc, val);
50#else
51 __asm__ ("crc32w %w0, %w0, %w1 \n\t"
52 :"+r" (crc) : "r" (val) );
53 return crc;
54#endif
55}
56
62inline uint32_t CRC32Wx4 (uint32_t crc, const uint32_t vals[4])
63{
64#if defined(_MSC_VER)
65 return __crc32w(__crc32w(__crc32w(__crc32w(
66 crc, vals[0]), vals[1]), vals[2]), vals[3]);
67#else
68 __asm__ ("crc32w %w0, %w0, %w1 \n\t"
69 "crc32w %w0, %w0, %w2 \n\t"
70 "crc32w %w0, %w0, %w3 \n\t"
71 "crc32w %w0, %w0, %w4 \n\t"
72 :"+r" (crc) : "r" (vals[0]), "r" (vals[1]),
73 "r" (vals[2]), "r" (vals[3]));
74 return crc;
75#endif
76}
77
79
80
86inline uint32_t CRC32CB (uint32_t crc, uint8_t val)
87{
88#if defined(_MSC_VER)
89 return __crc32cb(crc, val);
90#else
91 __asm__ ("crc32cb %w0, %w0, %w1 \n\t"
92 :"+r" (crc) : "r" (val) );
93 return crc;
94#endif
95}
96
102inline uint32_t CRC32CW (uint32_t crc, uint32_t val)
103{
104#if defined(_MSC_VER)
105 return __crc32cw(crc, val);
106#else
107 __asm__ ("crc32cw %w0, %w0, %w1 \n\t"
108 :"+r" (crc) : "r" (val) );
109 return crc;
110#endif
111}
112
118inline uint32_t CRC32CWx4 (uint32_t crc, const uint32_t vals[4])
119{
120#if defined(_MSC_VER)
121 return __crc32cw(__crc32cw(__crc32cw(__crc32cw(
122 crc, vals[0]), vals[1]), vals[2]), vals[3]);
123#else
124 __asm__ ("crc32cw %w0, %w0, %w1 \n\t"
125 "crc32cw %w0, %w0, %w2 \n\t"
126 "crc32cw %w0, %w0, %w3 \n\t"
127 "crc32cw %w0, %w0, %w4 \n\t"
128 :"+r" (crc) : "r" (vals[0]), "r" (vals[1]),
129 "r" (vals[2]), "r" (vals[3]));
130 return crc;
131#endif
132}
134#endif // CRYPTOPP_ARM_CRC32_AVAILABLE
135
136#if (CRYPTOPP_ARM_PMULL_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
138
139
152inline uint64x2_t PMULL_00(const uint64x2_t a, const uint64x2_t b)
153{
154#if defined(_MSC_VER)
155 const __n64 x = { vgetq_lane_u64(a, 0) };
156 const __n64 y = { vgetq_lane_u64(b, 0) };
157 return vmull_p64(x, y);
158#elif defined(__GNUC__)
159 uint64x2_t r;
160 __asm__ ("pmull %0.1q, %1.1d, %2.1d \n\t"
161 :"=w" (r) : "w" (a), "w" (b) );
162 return r;
163#else
164 return (uint64x2_t)(vmull_p64(
165 vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
166 vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
167#endif
168}
169
182inline uint64x2_t PMULL_01(const uint64x2_t a, const uint64x2_t b)
183{
184#if defined(_MSC_VER)
185 const __n64 x = { vgetq_lane_u64(a, 0) };
186 const __n64 y = { vgetq_lane_u64(b, 1) };
187 return vmull_p64(x, y);
188#elif defined(__GNUC__)
189 uint64x2_t r;
190 __asm__ ("pmull %0.1q, %1.1d, %2.1d \n\t"
191 :"=w" (r) : "w" (a), "w" (vget_high_u64(b)) );
192 return r;
193#else
194 return (uint64x2_t)(vmull_p64(
195 vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
196 vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
197#endif
198}
199
212inline uint64x2_t PMULL_10(const uint64x2_t a, const uint64x2_t b)
213{
214#if defined(_MSC_VER)
215 const __n64 x = { vgetq_lane_u64(a, 1) };
216 const __n64 y = { vgetq_lane_u64(b, 0) };
217 return vmull_p64(x, y);
218#elif defined(__GNUC__)
219 uint64x2_t r;
220 __asm__ ("pmull %0.1q, %1.1d, %2.1d \n\t"
221 :"=w" (r) : "w" (vget_high_u64(a)), "w" (b) );
222 return r;
223#else
224 return (uint64x2_t)(vmull_p64(
225 vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
226 vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
227#endif
228}
229
242inline uint64x2_t PMULL_11(const uint64x2_t a, const uint64x2_t b)
243{
244#if defined(_MSC_VER)
245 const __n64 x = { vgetq_lane_u64(a, 1) };
246 const __n64 y = { vgetq_lane_u64(b, 1) };
247 return vmull_p64(x, y);
248#elif defined(__GNUC__)
249 uint64x2_t r;
250 __asm__ ("pmull2 %0.1q, %1.2d, %2.2d \n\t"
251 :"=w" (r) : "w" (a), "w" (b) );
252 return r;
253#else
254 return (uint64x2_t)(vmull_p64(
255 vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
256 vgetq_lane_u64(vreinterpretq_u64_u8(b),1)));
257#endif
258}
259
267inline uint64x2_t PMULL(const uint64x2_t a, const uint64x2_t b)
268{
269#if defined(_MSC_VER)
270 const __n64 x = { vgetq_lane_u64(a, 0) };
271 const __n64 y = { vgetq_lane_u64(b, 0) };
272 return vmull_p64(x, y);
273#elif defined(__GNUC__)
274 uint64x2_t r;
275 __asm__ ("pmull %0.1q, %1.1d, %2.1d \n\t"
276 :"=w" (r) : "w" (a), "w" (b) );
277 return r;
278#else
279 return (uint64x2_t)(vmull_p64(
280 vgetq_lane_u64(vreinterpretq_u64_u8(a),0),
281 vgetq_lane_u64(vreinterpretq_u64_u8(b),0)));
282#endif
283}
284
292inline uint64x2_t PMULL_HIGH(const uint64x2_t a, const uint64x2_t b)
293{
294#if defined(_MSC_VER)
295 const __n64 x = { vgetq_lane_u64(a, 1) };
296 const __n64 y = { vgetq_lane_u64(b, 1) };
297 return vmull_p64(x, y);
298#elif defined(__GNUC__)
299 uint64x2_t r;
300 __asm__ ("pmull2 %0.1q, %1.2d, %2.2d \n\t"
301 :"=w" (r) : "w" (a), "w" (b) );
302 return r;
303#else
304 return (uint64x2_t)(vmull_p64(
305 vgetq_lane_u64(vreinterpretq_u64_u8(a),1),
306 vgetq_lane_u64(vreinterpretq_u64_u8(b),1))));
307#endif
308}
309
319inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b, unsigned int c)
320{
321#if defined(_MSC_VER)
322 return vreinterpretq_u64_u8(vextq_u8(
323 vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c));
324#else
325 uint64x2_t r;
326 __asm__ ("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
327 :"=w" (r) : "w" (a), "w" (b), "I" (c) );
328 return r;
329#endif
330}
331
341template <unsigned int C>
342inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
343{
344 // https://github.com/weidai11/cryptopp/issues/366
345#if defined(_MSC_VER)
346 return vreinterpretq_u64_u8(vextq_u8(
347 vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C));
348#else
349 uint64x2_t r;
350 __asm__ ("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
351 :"=w" (r) : "w" (a), "w" (b), "I" (C) );
352 return r;
353#endif
354}
355
357#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
358
359#if CRYPTOPP_ARM_SHA3_AVAILABLE || defined(CRYPTOPP_DOXYGEN_PROCESSING)
361
362
372inline uint64x2_t VEOR3(uint64x2_t a, uint64x2_t b, uint64x2_t c)
373{
374#if defined(_MSC_VER)
375 return veor3q_u64(a, b, c);
376#else
377 uint64x2_t r;
378 __asm__ ("eor3 %0.16b, %1.16b, %2.16b, %3.16b \n\t"
379 :"=w" (r) : "w" (a), "w" (b), "w" (c));
380 return r;
381#endif
382}
383
393inline uint64x2_t VXAR(uint64x2_t a, uint64x2_t b, const int c)
394{
395#if defined(_MSC_VER)
396 return vxarq_u64(a, b, c);
397#else
398 uint64x2_t r;
399 __asm__ ("xar %0.2d, %1.2d, %2.2d, %3 \n\t"
400 :"=w" (r) : "w" (a), "w" (b), "I" (c));
401 return r;
402#endif
403}
404
414template <unsigned int C>
415inline uint64x2_t VXAR(uint64x2_t a, uint64x2_t b)
416{
417#if defined(_MSC_VER)
418 return vxarq_u64(a, b, C);
419#else
420 uint64x2_t r;
421 __asm__ ("xar %0.2d, %1.2d, %2.2d, %3 \n\t"
422 :"=w" (r) : "w" (a), "w" (b), "I" (C));
423 return r;
424#endif
425}
426
435inline uint64x2_t VRAX1(uint64x2_t a, uint64x2_t b)
436{
437#if defined(_MSC_VER)
438 return vrax1q_u64(a, b);
439#else
440 uint64x2_t r;
441 __asm__ ("rax1 %0.2d, %1.2d, %2.2d \n\t"
442 :"=w" (r) : "w" (a), "w" (b));
443 return r;
444#endif
445}
447#endif // CRYPTOPP_ARM_SHA3_AVAILABLE
448
449#endif // CRYPTOPP_ARM_SIMD_H
Library configuration file.