1 ;; rdrand.asm - written and placed in public domain by Jeffrey Walton and Uri Blumenthal.
2 ;; Copyright assigned to the Crypto++ project.
4 ;; This ASM file provides RDRAND and RDSEED to downlevel Unix and Linux tool
5 ;; chains. You will need a modern Nasm, however. You can also use it in place
6 ;; of intrinsics. The routines below run a little faster than the intrinsic
9 ;; nasm -f elf32 rdrand.s -DX86 -g -o rdrand-x86.o
10 ;; nasm -f elfx32 rdrand.s -DX32 -g -o rdrand-x32.o
11 ;; nasm -f elf64 rdrand.s -DX64 -g -o rdrand-x64.o
13 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16 ;; C/C++ Function prototypes
18 ;; extern "C" void NASM_RDRAND_GenerateBlock(byte* ptr, size_t size);
19 ;; extern "C" void NASM_RDSEED_GenerateBlock(byte* ptr, size_t size);
21 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
22 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24 %ifdef X86 ;; Set via the command line
25 %define arg1 [esp+04h]
26 %define arg2 [esp+08h]
29 %define lsize dl ;; Used for tail bytes, 1-byte constants
30 %define MWSIZE 04h ;; machine word size
32 %elifdef X32 ;; Set via the command line
33 %define buffer edi ;; Linux ABI
34 %define bsize esi ;; Linux ABI
36 %define MWSIZE 04h ;; machine word size
38 %elifdef X64 ;; Set via the command line
39 %ifdef CYGWIN ;; Cygwin follows Windows ABI here, not Linux ABI
40 %define buffer rcx ;; Windows ABI
41 %define bsize rdx ;; Windows ABI
42 %define lsize dx ;; Used for tail bytes, 2-byte constants
44 %define buffer rdi ;; Linux ABI
45 %define bsize rsi ;; Linux ABI
46 %define lsize si ;; Used for tail bytes, 2-byte constants
48 %define MWSIZE 08h ;; machine word size
51 %error Missing or unknown architecture
54 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
55 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
60 %define NASM_RDRAND_GenerateBlock _NASM_RDRAND_GenerateBlock
61 %define NASM_RDSEED_GenerateBlock _NASM_RDSEED_GenerateBlock
66 %define NASM_RDRAND_GenerateBlock _NASM_RDRAND_GenerateBlock
67 %define NASM_RDSEED_GenerateBlock _NASM_RDSEED_GenerateBlock
71 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
72 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
74 %ifdef X86 ;; Set via the command line
76 global NASM_RDRAND_GenerateBlock
80 NASM_RDRAND_GenerateBlock:
87 ;; A block of 16-bytes appears to be optimal. Adding
88 ;; more rdrand calls degrades performance.
96 jnc .Call_RDRAND_EAX_4
101 jnc .Call_RDRAND_EAX_3
106 jnc .Call_RDRAND_EAX_2
111 jnc .Call_RDRAND_EAX_1
118 jae .GenerateBlock_16
120 ;; Fewer than 16 bytes remain
124 je .GenerateBlock_Return
129 jnc .Call_RDRAND_EAX_0
132 jb .Partial_Machine_Word
143 ;; 1,2,3 bytes remain
144 .Partial_Machine_Word:
146 ;; Test bit 1 to see if size is at least 2
156 ;; Test bit 0 to see if size is at least 1
164 ;; We've hit all the bits
166 .GenerateBlock_Return:
173 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
174 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
176 %ifdef X64 or X32 ;; Set via the command line
178 global NASM_RDRAND_GenerateBlock
182 NASM_RDRAND_GenerateBlock:
184 ;; No need for Load_Arguments due to fastcall
186 ;; A block of 32-bytes appears to be optimal. Adding
187 ;; more rdrand calls degrades performance.
195 jnc .Call_RDRAND_RAX_4
200 jnc .Call_RDRAND_RAX_3
205 jnc .Call_RDRAND_RAX_2
210 jnc .Call_RDRAND_RAX_1
217 jae .GenerateBlock_32
219 ;; Fewer than 32 bytes remain
223 je .GenerateBlock_Return
227 jnc .Call_RDRAND_RAX_0
230 jb .Partial_Machine_Word
241 ;; 1,2,3,4,5,6,7 bytes remain
242 .Partial_Machine_Word:
244 ;; Test bit 2 to see if size is at least 4
254 ;; Test bit 1 to see if size is at least 2
264 ;; Test bit 0 to see if size is at least 1
272 ;; We've hit all the bits
274 .GenerateBlock_Return:
281 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
282 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
284 %ifdef X86 ;; Set via the command line
286 global NASM_RDSEED_GenerateBlock
290 NASM_RDSEED_GenerateBlock:
297 ;; A block of 16-bytes appears to be optimal. Adding
298 ;; more rdrand calls degrades performance.
306 jnc .Call_RDSEED_EAX_4
311 jnc .Call_RDSEED_EAX_3
316 jnc .Call_RDSEED_EAX_2
321 jnc .Call_RDSEED_EAX_1
328 jae .GenerateBlock_16
330 ;; Fewer than 16 bytes remain
334 je .GenerateBlock_Return
339 jnc .Call_RDSEED_EAX_0
342 jb .Partial_Machine_Word
353 ;; 1,2,3 bytes remain
354 .Partial_Machine_Word:
356 ;; Test bit 1 to see if size is at least 2
366 ;; Test bit 0 to see if size is at least 1
374 ;; We've hit all the bits
376 .GenerateBlock_Return:
383 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
384 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
386 %ifdef X64 or X32 ;; Set via the command line
388 global NASM_RDSEED_GenerateBlock
392 NASM_RDSEED_GenerateBlock:
394 ;; No need for Load_Arguments due to fastcall
396 ;; A block of 32-bytes appears to be optimal. Adding
397 ;; more rdrand calls degrades performance.
405 jnc .Call_RDSEED_RAX_4
410 jnc .Call_RDSEED_RAX_3
415 jnc .Call_RDSEED_RAX_2
420 jnc .Call_RDSEED_RAX_1
427 jae .GenerateBlock_32
429 ;; Fewer than 32 bytes remain
433 je .GenerateBlock_Return
437 jnc .Call_RDSEED_RAX_0
440 jb .Partial_Machine_Word
451 ;; 1,2,3,4,5,6,7 bytes remain
452 .Partial_Machine_Word:
454 ;; Test bit 2 to see if size is at least 4
464 ;; Test bit 1 to see if size is at least 2
474 ;; Test bit 0 to see if size is at least 1
482 ;; We've hit all the bits
484 .GenerateBlock_Return:
491 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
492 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;