52 #ifndef CRYPTOPP_IMPORTS 72 #if (_MSC_VER >= 1400) && !defined(_M_ARM) 81 #if (__SUNPRO_CC >= 0x5130) 83 # define MAYBE_UNCONST_CAST(x) const_cast<word*>(x) 85 # define MAYBE_CONST const 86 # define MAYBE_UNCONST_CAST(x) x 91 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_MIXED_ASM) 92 # undef CRYPTOPP_X86_ASM_AVAILABLE 93 # undef CRYPTOPP_X32_ASM_AVAILABLE 94 # undef CRYPTOPP_X64_ASM_AVAILABLE 95 # undef CRYPTOPP_SSE2_ASM_AVAILABLE 96 # undef CRYPTOPP_SSSE3_ASM_AVAILABLE 98 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86)) 106 static void SetFunctionPointers();
114 InitializeInteger::InitializeInteger()
120 SetFunctionPointers();
137 inline static int Compare(
const word *A,
const word *B,
size_t N)
142 else if (A[N] < B[N])
148 inline static int Increment(word *A,
size_t N, word B=1)
155 for (
unsigned i=1; i<N; i++)
161 inline static int Decrement(word *A,
size_t N, word B=1)
168 for (
unsigned i=1; i<N; i++)
174 static void TwosComplement(word *A,
size_t N)
177 for (
unsigned i=0; i<N; i++)
181 static word AtomicInverseModPower2(word A)
187 for (
unsigned i=3; i<WORD_BITS; i*=2)
196 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE)) 197 #define TWO_64_BIT_WORDS 1 198 #define Declare2Words(x) word x##0, x##1; 199 #define AssignWord(a, b) a##0 = b; a##1 = 0; 200 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c); 201 #define LowWord(a) a##0 202 #define HighWord(a) a##1 204 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1); 205 #ifndef __INTEL_COMPILER 206 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2; 208 #elif defined(__DECCXX) 209 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b); 210 #elif defined(__x86_64__) 211 #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100 213 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc"); 215 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); 216 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); 217 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc"); 218 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc"); 219 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc"); 220 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc"); 223 #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b) 225 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2; 228 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1; 230 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);} 231 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);} 232 #define GetCarry(u) u##1 233 #define GetBorrow(u) u##1 235 #define Declare2Words(x) dword x; 236 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64)) 237 #define MultiplyWords(p, a, b) p = __emulu(a, b); 239 #define MultiplyWords(p, a, b) p = (dword)a*b; 241 #define AssignWord(a, b) a = b; 242 #define Add2WordsBy1(a, b, c) a = b + c; 243 #define Acc2WordsBy2(a, b) a += b; 244 #define LowWord(a) word(a) 245 #define HighWord(a) word(a>>WORD_BITS) 246 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2; 247 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u); 248 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u); 249 #define GetCarry(u) HighWord(u) 250 #define GetBorrow(u) word(u>>(WORD_BITS*2-1)) 253 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p)); 256 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b) 259 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e)); 265 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) 266 DWord() {std::memset(&m_whole, 0x00,
sizeof(m_whole));}
268 DWord() {std::memset(&m_halfs, 0x00,
sizeof(m_halfs));}
271 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 272 explicit DWord(word low) : m_whole(low) { }
274 explicit DWord(word low)
281 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) 282 DWord(word low, word high) : m_whole()
284 DWord(word low, word high) : m_halfs()
287 #if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) 288 # if (CRYPTOPP_LITTLE_ENDIAN) 289 const word t[2] = {low,high};
290 memcpy(&m_whole, t,
sizeof(m_whole));
292 const word t[2] = {high,low};
293 memcpy(&m_whole, t,
sizeof(m_whole));
301 static DWord Multiply(word a, word b)
304 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 305 r.m_whole = (dword)a * b;
306 #elif defined(MultiplyWordsLoHi) 307 MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
314 static DWord MultiplyAndAdd(word a, word b, word c)
316 DWord r = Multiply(a, b);
320 DWord & operator+=(word a)
322 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 323 m_whole = m_whole + a;
326 m_halfs.high += (m_halfs.low < a);
331 DWord operator+(word a)
334 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 335 r.m_whole = m_whole + a;
337 r.m_halfs.low = m_halfs.low + a;
338 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
346 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 347 r.m_whole = m_whole - a.m_whole;
349 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
350 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
355 DWord operator-(word a)
358 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 359 r.m_whole = m_whole - a;
361 r.m_halfs.low = m_halfs.low - a;
362 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
368 word operator/(word divisor);
370 word operator%(word a);
372 bool operator!()
const 374 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 377 return !m_halfs.high && !m_halfs.low;
383 word GetLowHalf()
const {
return m_halfs.low;}
384 word GetHighHalf()
const {
return m_halfs.high;}
385 word GetHighHalfAsBorrow()
const {
return 0-m_halfs.high;}
393 #if (CRYPTOPP_LITTLE_ENDIAN) 403 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 413 Word() : m_whole(0) {}
414 Word(word value) : m_whole(value) {}
415 Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
417 static Word Multiply(hword a, hword b)
420 r.m_whole = (word)a * b;
427 r.m_whole = m_whole - a.m_whole;
431 Word operator-(hword a)
434 r.m_whole = m_whole - a;
439 hword operator/(hword divisor)
441 return hword(m_whole / divisor);
444 bool operator!()
const 449 word GetWhole()
const {
return m_whole;}
450 hword GetLowHalf()
const {
return hword(m_whole);}
451 hword GetHighHalf()
const {
return hword(m_whole>>(WORD_BITS/2));}
452 hword GetHighHalfAsBorrow()
const {
return 0-hword(m_whole>>(WORD_BITS/2));}
459 template <
class S,
class D>
460 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULLPTR)
462 CRYPTOPP_UNUSED(dummy);
471 S Q;
bool pre = (S(B1+1) == 0);
473 Q = D(A[1], A[2]) / S(B1+1);
477 Q = D(A[0], A[1]) / B0;
480 D p = D::Multiply(B0, Q);
481 D u = (D) A[0] - p.GetLowHalf();
482 A[0] = u.GetLowHalf();
483 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
484 A[1] = u.GetLowHalf();
485 A[2] += u.GetHighHalf();
488 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
491 A[0] = u.GetLowHalf();
492 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
493 A[1] = u.GetLowHalf();
494 A[2] += u.GetHighHalf();
503 template <
class S,
class D>
504 inline D DivideFourWordsByTwo(S *T,
const D &Al,
const D &Ah,
const D &B)
513 T[0] = Al.GetLowHalf();
514 T[1] = Al.GetHighHalf();
515 T[2] = Ah.GetLowHalf();
516 T[3] = Ah.GetHighHalf();
517 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
518 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
519 return D(Q[0], Q[1]);
523 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
528 inline word DWord::operator/(word a)
530 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 531 return word(m_whole / a);
534 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
538 inline word DWord::operator%(word a)
540 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 541 return word(m_whole % a);
543 if (a < (word(1) << (WORD_BITS/2)))
546 word r = m_halfs.high % h;
547 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
548 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
553 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
554 return Word(r[0], r[1]).GetWhole();
562 #if defined(__GNUC__) 563 #define AddPrologue \ 565 __asm__ __volatile__ \ 568 #define AddEpilogue \ 571 : "d" (C), "a" (A), "D" (B), "c" (N) \ 572 : "%esi", "memory", "cc" \ 575 #define MulPrologue \ 576 __asm__ __volatile__ \ 581 #define MulEpilogue \ 585 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \ 586 : "%esi", "memory", "cc" \ 588 #define SquPrologue MulPrologue 589 #define SquEpilogue \ 593 : "d" (s_maskLow16), "c" (C), "a" (A) \ 594 : "%esi", "%edi", "memory", "cc" \ 596 #define TopPrologue MulPrologue 597 #define TopEpilogue \ 601 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \ 605 #define AddPrologue \ 608 __asm mov eax, [esp+12] \ 609 __asm mov edi, [esp+16] 610 #define AddEpilogue \ 616 #define SquPrologue \ 620 AS2( lea ebx, s_maskLow16) 621 #define MulPrologue \ 626 AS2( lea ebx, s_maskLow16) 627 #define TopPrologue \ 633 AS2( lea ebx, s_maskLow16) 634 #define SquEpilogue RestoreEBX 635 #define MulEpilogue RestoreEBX 636 #define TopEpilogue RestoreEBX 639 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 641 int Baseline_Add(
size_t N, word *C,
const word *A,
const word *B);
642 int Baseline_Sub(
size_t N, word *C,
const word *A,
const word *B);
644 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE) 645 int Baseline_Add(
size_t N, word *C,
const word *A,
const word *B)
653 AS2( mov %0,[%3+8*%1])
654 AS2( add %0,[%4+8*%1])
655 AS2( mov [%2+8*%1],%0)
657 AS2( mov %0,[%3+8*%1+8])
658 AS2( adc %0,[%4+8*%1+8])
659 AS2( mov [%2+8*%1+8],%0)
662 AS2( mov %0,[%3+8*%1])
663 AS2( adc %0,[%4+8*%1])
664 AS2( mov [%2+8*%1],%0)
670 :
"=&r" (result),
"+c" (N)
671 :
"r" (C+N),
"r" (A+N),
"r" (B+N)
677 int Baseline_Sub(
size_t N, word *C,
const word *A,
const word *B)
685 AS2( mov %0,[%3+8*%1])
686 AS2( sub %0,[%4+8*%1])
687 AS2( mov [%2+8*%1],%0)
689 AS2( mov %0,[%3+8*%1+8])
690 AS2( sbb %0,[%4+8*%1+8])
691 AS2( mov [%2+8*%1+8],%0)
694 AS2( mov %0,[%3+8*%1])
695 AS2( sbb %0,[%4+8*%1])
696 AS2( mov [%2+8*%1],%0)
702 :
"=&r" (result),
"+c" (N)
703 :
"r" (C+N),
"r" (A+N),
"r" (B+N)
708 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86 709 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL Baseline_Add(
size_t N, word *C,
const word *A,
const word *B)
714 AS2( lea eax, [eax+4*ecx])
715 AS2( lea edi, [edi+4*ecx])
716 AS2( lea edx, [edx+4*ecx])
726 AS2( mov esi,[eax+4*ecx])
727 AS2( adc esi,[edi+4*ecx])
728 AS2( mov [edx+4*ecx],esi)
729 AS2( mov esi,[eax+4*ecx+4])
730 AS2( adc esi,[edi+4*ecx+4])
731 AS2( mov [edx+4*ecx+4],esi)
733 AS2( mov esi,[eax+4*ecx+8])
734 AS2( adc esi,[edi+4*ecx+8])
735 AS2( mov [edx+4*ecx+8],esi)
736 AS2( mov esi,[eax+4*ecx+12])
737 AS2( adc esi,[edi+4*ecx+12])
738 AS2( mov [edx+4*ecx+12],esi)
740 AS2( lea ecx,[ecx+4])
750 CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
751 CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
754 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL Baseline_Sub(
size_t N, word *C, const word *A, const word *B)
759 AS2( lea eax, [eax+4*ecx])
760 AS2( lea edi, [edi+4*ecx])
761 AS2( lea edx, [edx+4*ecx])
771 AS2( mov esi,[eax+4*ecx])
772 AS2( sbb esi,[edi+4*ecx])
773 AS2( mov [edx+4*ecx],esi)
774 AS2( mov esi,[eax+4*ecx+4])
775 AS2( sbb esi,[edi+4*ecx+4])
776 AS2( mov [edx+4*ecx+4],esi)
778 AS2( mov esi,[eax+4*ecx+8])
779 AS2( sbb esi,[edi+4*ecx+8])
780 AS2( mov [edx+4*ecx+8],esi)
781 AS2( mov esi,[eax+4*ecx+12])
782 AS2( sbb esi,[edi+4*ecx+12])
783 AS2( mov [edx+4*ecx+12],esi)
785 AS2( lea ecx,[ecx+4])
795 CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
796 CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
799 #if CRYPTOPP_INTEGER_SSE2 800 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL SSE2_Add(
size_t N, word *C,
const word *A,
const word *B)
805 AS2( lea eax, [eax+4*ecx])
806 AS2( lea edi, [edi+4*ecx])
807 AS2( lea edx, [edx+4*ecx])
818 AS2( movd mm0, DWORD PTR [eax+4*ecx])
819 AS2( movd mm1, DWORD PTR [edi+4*ecx])
822 AS2( movd DWORD PTR [edx+4*ecx], mm2)
825 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
826 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
829 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
833 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
834 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
837 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
840 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
841 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
844 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
857 CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
858 CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
860 CRYPTOPP_NAKED
int CRYPTOPP_FASTCALL SSE2_Sub(
size_t N, word *C, const word *A, const word *B)
865 AS2( lea eax, [eax+4*ecx])
866 AS2( lea edi, [edi+4*ecx])
867 AS2( lea edx, [edx+4*ecx])
878 AS2( movd mm0, DWORD PTR [eax+4*ecx])
879 AS2( movd mm1, DWORD PTR [edi+4*ecx])
882 AS2( movd DWORD PTR [edx+4*ecx], mm0)
885 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
886 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
889 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
893 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
894 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
897 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
900 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
901 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
904 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
917 CRYPTOPP_UNUSED(A); CRYPTOPP_UNUSED(B);
918 CRYPTOPP_UNUSED(C); CRYPTOPP_UNUSED(N);
920 #endif // CRYPTOPP_INTEGER_SSE2 921 #else // CRYPTOPP_SSE2_ASM_AVAILABLE 922 int CRYPTOPP_FASTCALL Baseline_Add(
size_t N, word *C,
const word *A,
const word *B)
928 for (
size_t i=0; i<N; i+=2)
930 AddWithCarry(u, A[i], B[i]);
932 AddWithCarry(u, A[i+1], B[i+1]);
935 return int(GetCarry(u));
938 int CRYPTOPP_FASTCALL Baseline_Sub(
size_t N, word *C,
const word *A,
const word *B)
944 for (
size_t i=0; i<N; i+=2)
946 SubtractWithBorrow(u, A[i], B[i]);
948 SubtractWithBorrow(u, A[i+1], B[i+1]);
951 return int(GetBorrow(u));
955 static word LinearMultiply(word *C,
const word *AA, word B,
size_t N)
958 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
961 for(
unsigned i=0; i<N; i++)
964 MultiplyWords(p, A[i], B);
965 Acc2WordsBy1(p, carry);
972 #ifndef CRYPTOPP_DOXYGEN_PROCESSING 976 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 981 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 982 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 983 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 984 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ 985 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \ 990 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 991 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 992 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 993 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 994 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 995 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 996 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ 997 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ 998 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ 999 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ 1000 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ 1001 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ 1002 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \ 1007 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 1008 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 1009 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 1010 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 1011 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 1012 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 1013 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ 1014 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ 1015 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ 1016 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ 1017 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ 1018 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ 1019 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ 1020 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ 1021 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ 1022 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ 1023 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ 1024 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ 1025 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ 1026 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ 1027 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ 1028 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ 1029 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ 1030 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ 1031 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ 1032 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ 1033 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ 1034 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ 1035 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \ 1044 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ 1045 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ 1046 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \ 1047 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \ 1052 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ 1053 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ 1054 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ 1055 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ 1056 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ 1057 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ 1058 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ 1059 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ 1060 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ 1061 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \ 1062 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \ 1063 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \ 1068 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ 1069 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ 1070 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ 1071 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ 1072 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ 1073 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ 1074 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ 1075 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ 1076 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ 1077 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \ 1078 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \ 1079 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \ 1080 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \ 1081 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \ 1082 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \ 1083 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \ 1084 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \ 1085 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \ 1086 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \ 1087 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \ 1088 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \ 1089 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \ 1090 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \ 1091 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \ 1092 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \ 1093 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \ 1094 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \ 1095 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \ 1100 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \ 1105 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 1106 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \ 1107 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \ 1112 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 1113 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 1114 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 1115 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 1116 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 1117 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 1118 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \ 1123 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 1124 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 1125 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 1126 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 1127 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 1128 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 1129 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ 1130 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ 1131 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ 1132 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ 1133 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ 1134 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ 1135 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ 1136 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ 1137 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \ 1143 #define Mul_Begin(n) \ 1147 MultiplyWords(p, A[0], B[0]) \ 1148 AssignWord(c, LowWord(p)) \ 1149 AssignWord(d, HighWord(p)) 1151 #define Mul_Acc(i, j) \ 1152 MultiplyWords(p, A[i], B[j]) \ 1153 Acc2WordsBy1(c, LowWord(p)) \ 1154 Acc2WordsBy1(d, HighWord(p)) 1156 #define Mul_SaveAcc(k, i, j) \ 1157 R[k] = LowWord(c); \ 1158 Add2WordsBy1(c, d, HighWord(c)) \ 1159 MultiplyWords(p, A[i], B[j]) \ 1160 AssignWord(d, HighWord(p)) \ 1161 Acc2WordsBy1(c, LowWord(p)) 1163 #define Mul_End(n) \ 1164 R[2*n-3] = LowWord(c); \ 1165 Acc2WordsBy1(d, HighWord(c)) \ 1166 MultiplyWords(p, A[n-1], B[n-1])\ 1167 Acc2WordsBy2(d, p) \ 1168 R[2*n-2] = LowWord(d); \ 1169 R[2*n-1] = HighWord(d); 1171 #define Bot_SaveAcc(k, i, j) \ 1172 R[k] = LowWord(c); \ 1173 word e = LowWord(d) + HighWord(c); \ 1176 #define Bot_Acc(i, j) \ 1179 #define Bot_End(n) \ 1182 #define Mul_Begin(n) \ 1186 MultiplyWords(p, A[0], B[0]) \ 1188 AssignWord(d, HighWord(p)) 1190 #define Mul_Acc(i, j) \ 1191 MulAcc(c, d, A[i], B[j]) 1193 #define Mul_SaveAcc(k, i, j) \ 1196 AssignWord(d, HighWord(d)) \ 1197 MulAcc(c, d, A[i], B[j]) 1199 #define Mul_End(k, i) \ 1201 MultiplyWords(p, A[i], B[i]) \ 1202 Acc2WordsBy2(p, d) \ 1203 R[k+1] = LowWord(p); \ 1204 R[k+2] = HighWord(p); 1206 #define Bot_SaveAcc(k, i, j) \ 1211 #define Bot_Acc(i, j) \ 1214 #define Bot_End(n) \ 1218 #define Squ_Begin(n) \ 1223 MultiplyWords(p, A[0], A[0]) \ 1224 R[0] = LowWord(p); \ 1225 AssignWord(e, HighWord(p)) \ 1226 MultiplyWords(p, A[0], A[1]) \ 1228 AssignWord(d, HighWord(p)) \ 1231 #define Squ_NonDiag \ 1234 #define Squ_SaveAcc(k, i, j) \ 1235 Acc3WordsBy2(c, d, e) \ 1237 MultiplyWords(p, A[i], A[j]) \ 1239 AssignWord(d, HighWord(p)) \ 1241 #define Squ_Acc(i, j) \ 1242 MulAcc(c, d, A[i], A[j]) 1244 #define Squ_Diag(i) \ 1246 MulAcc(c, d, A[i], A[i]) 1248 #define Squ_End(n) \ 1249 Acc3WordsBy2(c, d, e) \ 1251 MultiplyWords(p, A[n-1], A[n-1])\ 1252 Acc2WordsBy2(p, e) \ 1253 R[2*n-2] = LowWord(p); \ 1254 R[2*n-1] = HighWord(p); 1257 void Baseline_Multiply2(word *R,
const word *AA,
const word *BB)
1260 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1261 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1266 void Baseline_Multiply4(word *R,
const word *AA,
const word *BB)
1269 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1270 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1275 void Baseline_Multiply8(word *R,
const word *AA,
const word *BB)
1278 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1279 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1284 void Baseline_Square2(word *R,
const word *AA)
1287 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1292 void Baseline_Square4(word *R,
const word *AA)
1295 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1300 void Baseline_Square8(word *R,
const word *AA)
1303 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1308 void Baseline_MultiplyBottom2(word *R,
const word *AA,
const word *BB)
1311 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1312 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1317 #if defined(TWO_64_BIT_WORDS) 1318 CRYPTOPP_UNUSED(d0); CRYPTOPP_UNUSED(d1);
1322 void Baseline_MultiplyBottom4(word *R,
const word *AA,
const word *BB)
1325 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1326 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1331 void Baseline_MultiplyBottom8(word *R,
const word *AA,
const word *BB)
1334 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1335 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1340 #define Top_Begin(n) \ 1344 MultiplyWords(p, A[0], B[n-2]);\ 1345 AssignWord(d, HighWord(p)); 1347 #define Top_Acc(i, j) \ 1348 MultiplyWords(p, A[i], B[j]);\ 1349 Acc2WordsBy1(d, HighWord(p)); 1351 #define Top_SaveAcc0(i, j) \ 1353 AssignWord(d, HighWord(d)) \ 1354 MulAcc(c, d, A[i], B[j]) 1356 #define Top_SaveAcc1(i, j) \ 1358 Acc2WordsBy1(d, c); \ 1360 AssignWord(d, HighWord(d)) \ 1361 MulAcc(c, d, A[i], B[j]) 1363 void Baseline_MultiplyTop2(word *R,
const word *A,
const word *B, word L)
1367 Baseline_Multiply2(T, A, B);
1372 void Baseline_MultiplyTop4(word *R,
const word *AA,
const word *BB, word L)
1375 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1376 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1379 Top_Acc(1, 1) Top_Acc(2, 0) \
1380 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1381 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1382 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1386 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1389 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1390 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1393 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1394 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1395 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1396 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1397 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1398 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1399 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1400 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1404 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available 1405 void Baseline_Multiply16(word *R,
const word *AA,
const word *BB)
1408 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1409 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1414 void Baseline_Square16(word *R,
const word *AA)
1417 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1422 void Baseline_MultiplyBottom16(word *R,
const word *AA,
const word *BB)
1425 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1426 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1431 void Baseline_MultiplyTop16(word *R,
const word *AA,
const word *BB, word L)
1434 MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1435 MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1438 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1439 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1440 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1441 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1442 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1443 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1444 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1445 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1446 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1447 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1448 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1449 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1450 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1451 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1452 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1453 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1460 #if CRYPTOPP_INTEGER_SSE2 1462 CRYPTOPP_ALIGN_DATA(16)
1464 const word32 s_maskLow16[4] = {
1465 0xffff,0xffff,0xffff,0xffff
1484 #define SSE2_FinalSave(k) \ 1485 AS2( psllq xmm5, 16) \ 1486 AS2( paddq xmm4, xmm5) \ 1487 AS2( movq QWORD PTR [ecx+8*(k)], xmm4) 1489 #define SSE2_SaveShift(k) \ 1490 AS2( movq xmm0, xmm6) \ 1491 AS2( punpckhqdq xmm6, xmm0) \ 1492 AS2( movq xmm1, xmm7) \ 1493 AS2( punpckhqdq xmm7, xmm1) \ 1494 AS2( paddd xmm6, xmm0) \ 1495 AS2( pslldq xmm6, 4) \ 1496 AS2( paddd xmm7, xmm1) \ 1497 AS2( paddd xmm4, xmm6) \ 1498 AS2( pslldq xmm7, 4) \ 1499 AS2( movq xmm6, xmm4) \ 1500 AS2( paddd xmm5, xmm7) \ 1501 AS2( movq xmm7, xmm5) \ 1502 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ 1503 AS2( psrlq xmm6, 16) \ 1504 AS2( paddq xmm6, xmm7) \ 1505 AS2( punpckhqdq xmm4, xmm0) \ 1506 AS2( punpckhqdq xmm5, xmm0) \ 1507 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \ 1508 AS2( psrlq xmm6, 3*16) \ 1509 AS2( paddd xmm4, xmm6) \ 1511 #define Squ_SSE2_SaveShift(k) \ 1512 AS2( movq xmm0, xmm6) \ 1513 AS2( punpckhqdq xmm6, xmm0) \ 1514 AS2( movq xmm1, xmm7) \ 1515 AS2( punpckhqdq xmm7, xmm1) \ 1516 AS2( paddd xmm6, xmm0) \ 1517 AS2( pslldq xmm6, 4) \ 1518 AS2( paddd xmm7, xmm1) \ 1519 AS2( paddd xmm4, xmm6) \ 1520 AS2( pslldq xmm7, 4) \ 1521 AS2( movhlps xmm6, xmm4) \ 1522 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ 1523 AS2( paddd xmm5, xmm7) \ 1524 AS2( movhps QWORD PTR [esp+12], xmm5)\ 1525 AS2( psrlq xmm4, 16) \ 1526 AS2( paddq xmm4, xmm5) \ 1527 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \ 1528 AS2( psrlq xmm4, 3*16) \ 1529 AS2( paddd xmm4, xmm6) \ 1530 AS2( movq QWORD PTR [esp+4], xmm4)\ 1532 #define SSE2_FirstMultiply(i) \ 1533 AS2( movdqa xmm7, [esi+(i)*16])\ 1534 AS2( movdqa xmm5, [edi-(i)*16])\ 1535 AS2( pmuludq xmm5, xmm7) \ 1536 AS2( movdqa xmm4, [ebx])\ 1537 AS2( movdqa xmm6, xmm4) \ 1538 AS2( pand xmm4, xmm5) \ 1539 AS2( psrld xmm5, 16) \ 1540 AS2( pmuludq xmm7, [edx-(i)*16])\ 1541 AS2( pand xmm6, xmm7) \ 1542 AS2( psrld xmm7, 16) 1544 #define Squ_Begin(n) \ 1547 AS2( and esp, 0xfffffff0)\ 1548 AS2( lea edi, [esp-32*n])\ 1549 AS2( sub esp, 32*n+16)\ 1551 AS2( mov esi, edi) \ 1552 AS2( xor edx, edx) \ 1554 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ 1555 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ 1556 AS2( movdqa [edi+2*edx], xmm0) \ 1557 AS2( psrlq xmm0, 32) \ 1558 AS2( movdqa [edi+2*edx+16], xmm0) \ 1559 AS2( movdqa [edi+16*n+2*edx], xmm1) \ 1560 AS2( psrlq xmm1, 32) \ 1561 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \ 1563 AS2( cmp edx, 8*(n)) \ 1565 AS2( lea edx, [edi+16*n])\ 1566 SSE2_FirstMultiply(0) \ 1568 #define Squ_Acc(i) \ 1570 AS2( movdqa xmm1, [esi+(i)*16]) \ 1571 AS2( movdqa xmm0, [edi-(i)*16]) \ 1572 AS2( movdqa xmm2, [ebx]) \ 1573 AS2( pmuludq xmm0, xmm1) \ 1574 AS2( pmuludq xmm1, [edx-(i)*16]) \ 1575 AS2( movdqa xmm3, xmm2) \ 1576 AS2( pand xmm2, xmm0) \ 1577 AS2( psrld xmm0, 16) \ 1578 AS2( paddd xmm4, xmm2) \ 1579 AS2( paddd xmm5, xmm0) \ 1580 AS2( pand xmm3, xmm1) \ 1581 AS2( psrld xmm1, 16) \ 1582 AS2( paddd xmm6, xmm3) \ 1583 AS2( paddd xmm7, xmm1) \ 1586 #define Squ_Acc2(i) ASC(call, LSqu##i) 1587 #define Squ_Acc3(i) Squ_Acc2(i) 1588 #define Squ_Acc4(i) Squ_Acc2(i) 1589 #define Squ_Acc5(i) Squ_Acc2(i) 1590 #define Squ_Acc6(i) Squ_Acc2(i) 1591 #define Squ_Acc7(i) Squ_Acc2(i) 1592 #define Squ_Acc8(i) Squ_Acc2(i) 1594 #define SSE2_End(E, n) \ 1595 SSE2_SaveShift(2*(n)-3) \ 1596 AS2( movdqa xmm7, [esi+16]) \ 1597 AS2( movdqa xmm0, [edi]) \ 1598 AS2( pmuludq xmm0, xmm7) \ 1599 AS2( movdqa xmm2, [ebx]) \ 1600 AS2( pmuludq xmm7, [edx]) \ 1601 AS2( movdqa xmm6, xmm2) \ 1602 AS2( pand xmm2, xmm0) \ 1603 AS2( psrld xmm0, 16) \ 1604 AS2( paddd xmm4, xmm2) \ 1605 AS2( paddd xmm5, xmm0) \ 1606 AS2( pand xmm6, xmm7) \ 1607 AS2( psrld xmm7, 16) \ 1608 SSE2_SaveShift(2*(n)-2) \ 1609 SSE2_FinalSave(2*(n)-1) \ 1613 #define Squ_End(n) SSE2_End(SquEpilogue, n) 1614 #define Mul_End(n) SSE2_End(MulEpilogue, n) 1615 #define Top_End(n) SSE2_End(TopEpilogue, n) 1617 #define Squ_Column1(k, i) \ 1618 Squ_SSE2_SaveShift(k) \ 1620 SSE2_FirstMultiply(1)\ 1622 AS2( paddd xmm4, xmm4) \ 1623 AS2( paddd xmm5, xmm5) \ 1624 AS2( movdqa xmm3, [esi]) \ 1625 AS2( movq xmm1, QWORD PTR [esi+8]) \ 1626 AS2( pmuludq xmm1, xmm3) \ 1627 AS2( pmuludq xmm3, xmm3) \ 1628 AS2( movdqa xmm0, [ebx])\ 1629 AS2( movdqa xmm2, xmm0) \ 1630 AS2( pand xmm0, xmm1) \ 1631 AS2( psrld xmm1, 16) \ 1632 AS2( paddd xmm6, xmm0) \ 1633 AS2( paddd xmm7, xmm1) \ 1634 AS2( pand xmm2, xmm3) \ 1635 AS2( psrld xmm3, 16) \ 1636 AS2( paddd xmm6, xmm6) \ 1637 AS2( paddd xmm7, xmm7) \ 1638 AS2( paddd xmm4, xmm2) \ 1639 AS2( paddd xmm5, xmm3) \ 1640 AS2( movq xmm0, QWORD PTR [esp+4])\ 1641 AS2( movq xmm1, QWORD PTR [esp+12])\ 1642 AS2( paddd xmm4, xmm0)\ 1643 AS2( paddd xmm5, xmm1)\ 1645 #define Squ_Column0(k, i) \ 1646 Squ_SSE2_SaveShift(k) \ 1649 SSE2_FirstMultiply(1)\ 1651 AS2( paddd xmm6, xmm6) \ 1652 AS2( paddd xmm7, xmm7) \ 1653 AS2( paddd xmm4, xmm4) \ 1654 AS2( paddd xmm5, xmm5) \ 1655 AS2( movq xmm0, QWORD PTR [esp+4])\ 1656 AS2( movq xmm1, QWORD PTR [esp+12])\ 1657 AS2( paddd xmm4, xmm0)\ 1658 AS2( paddd xmm5, xmm1)\ 1660 #define SSE2_MulAdd45 \ 1661 AS2( movdqa xmm7, [esi]) \ 1662 AS2( movdqa xmm0, [edi]) \ 1663 AS2( pmuludq xmm0, xmm7) \ 1664 AS2( movdqa xmm2, [ebx]) \ 1665 AS2( pmuludq xmm7, [edx]) \ 1666 AS2( movdqa xmm6, xmm2) \ 1667 AS2( pand xmm2, xmm0) \ 1668 AS2( psrld xmm0, 16) \ 1669 AS2( paddd xmm4, xmm2) \ 1670 AS2( paddd xmm5, xmm0) \ 1671 AS2( pand xmm6, xmm7) \ 1672 AS2( psrld xmm7, 16) 1674 #define Mul_Begin(n) \ 1677 AS2( and esp, 0xfffffff0)\ 1678 AS2( sub esp, 48*n+16)\ 1680 AS2( xor edx, edx) \ 1682 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ 1683 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ 1684 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \ 1685 AS2( movdqa [esp+20+2*edx], xmm0) \ 1686 AS2( psrlq xmm0, 32) \ 1687 AS2( movdqa [esp+20+2*edx+16], xmm0) \ 1688 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \ 1689 AS2( psrlq xmm1, 32) \ 1690 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \ 1691 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \ 1692 AS2( psrlq xmm2, 32) \ 1693 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \ 1695 AS2( cmp edx, 8*(n)) \ 1697 AS2( lea edi, [esp+20])\ 1698 AS2( lea edx, [esp+20+16*n])\ 1699 AS2( lea esi, [esp+20+32*n])\ 1700 SSE2_FirstMultiply(0) \ 1702 #define Mul_Acc(i) \ 1704 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \ 1705 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \ 1706 AS2( movdqa xmm2, [ebx]) \ 1707 AS2( pmuludq xmm0, xmm1) \ 1708 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ 1709 AS2( movdqa xmm3, xmm2) \ 1710 AS2( pand xmm2, xmm0) \ 1711 AS2( psrld xmm0, 16) \ 1712 AS2( paddd xmm4, xmm2) \ 1713 AS2( paddd xmm5, xmm0) \ 1714 AS2( pand xmm3, xmm1) \ 1715 AS2( psrld xmm1, 16) \ 1716 AS2( paddd xmm6, xmm3) \ 1717 AS2( paddd xmm7, xmm1) \ 1720 #define Mul_Acc2(i) ASC(call, LMul##i) 1721 #define Mul_Acc3(i) Mul_Acc2(i) 1722 #define Mul_Acc4(i) Mul_Acc2(i) 1723 #define Mul_Acc5(i) Mul_Acc2(i) 1724 #define Mul_Acc6(i) Mul_Acc2(i) 1725 #define Mul_Acc7(i) Mul_Acc2(i) 1726 #define Mul_Acc8(i) Mul_Acc2(i) 1727 #define Mul_Acc9(i) Mul_Acc2(i) 1728 #define Mul_Acc10(i) Mul_Acc2(i) 1729 #define Mul_Acc11(i) Mul_Acc2(i) 1730 #define Mul_Acc12(i) Mul_Acc2(i) 1731 #define Mul_Acc13(i) Mul_Acc2(i) 1732 #define Mul_Acc14(i) Mul_Acc2(i) 1733 #define Mul_Acc15(i) Mul_Acc2(i) 1734 #define Mul_Acc16(i) Mul_Acc2(i) 1736 #define Mul_Column1(k, i) \ 1742 #define Mul_Column0(k, i) \ 1749 #define Bot_Acc(i) \ 1750 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \ 1751 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \ 1752 AS2( pmuludq xmm0, xmm1) \ 1753 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ 1754 AS2( paddq xmm4, xmm0) \ 1755 AS2( paddd xmm6, xmm1) 1757 #define Bot_SaveAcc(k) \ 1761 AS2( movdqa xmm6, [esi]) \ 1762 AS2( movdqa xmm0, [edi]) \ 1763 AS2( pmuludq xmm0, xmm6) \ 1764 AS2( paddq xmm4, xmm0) \ 1765 AS2( psllq xmm5, 16) \ 1766 AS2( paddq xmm4, xmm5) \ 1767 AS2( pmuludq xmm6, [edx]) 1769 #define Bot_End(n) \ 1770 AS2( movhlps xmm7, xmm6) \ 1771 AS2( paddd xmm6, xmm7) \ 1772 AS2( psllq xmm6, 32) \ 1773 AS2( paddd xmm4, xmm6) \ 1774 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \ 1778 #define Top_Begin(n) \ 1781 AS2( and esp, 0xfffffff0)\ 1782 AS2( sub esp, 48*n+16)\ 1784 AS2( xor edx, edx) \ 1786 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ 1787 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ 1788 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \ 1789 AS2( movdqa [esp+20+2*edx], xmm0) \ 1790 AS2( psrlq xmm0, 32) \ 1791 AS2( movdqa [esp+20+2*edx+16], xmm0) \ 1792 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \ 1793 AS2( psrlq xmm1, 32) \ 1794 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \ 1795 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \ 1796 AS2( psrlq xmm2, 32) \ 1797 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \ 1799 AS2( cmp edx, 8*(n)) \ 1801 AS2( mov eax, esi) \ 1802 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\ 1803 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\ 1804 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\ 1805 AS2( pxor xmm4, xmm4)\ 1806 AS2( pxor xmm5, xmm5) 1808 #define Top_Acc(i) \ 1809 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \ 1810 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ 1811 AS2( psrlq xmm0, 48) \ 1812 AS2( paddd xmm5, xmm0)\ 1814 #define Top_Column0(i) \ 1815 AS2( psllq xmm5, 32) \ 1821 #define Top_Column1(i) \ 1827 AS2( movd xmm0, eax)\ 1828 AS2( movd xmm1, [ecx+4])\ 1829 AS2( psrld xmm1, 16)\ 1830 AS2( pcmpgtd xmm1, xmm0)\ 1831 AS2( psrld xmm1, 31)\ 1832 AS2( paddd xmm4, xmm1)\ 1834 void SSE2_Square4(word *C,
const word *A)
1841 void SSE2_Square8(word *C, const word *A)
1857 void SSE2_Square16(word *C, const word *A)
1862 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1881 void SSE2_Square32(word *C, const word *A)
1885 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1919 void SSE2_Multiply4(word *C, const word *A, const word *B)
1931 void SSE2_Multiply8(word *C, const word *A, const word *B)
1936 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1947 void SSE2_Multiply16(word *C, const word *A, const word *B)
1952 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1971 void SSE2_Multiply32(word *C, const word *A, const word *B)
1975 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2009 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
2012 Bot_SaveAcc(0) Bot_Acc(2)
2016 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
2021 Mul_Acc(3) Mul_Acc(2)
2026 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2030 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
2035 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2044 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2048 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
2053 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2070 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2074 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
2077 Top_Acc(3) Top_Acc(2) Top_Acc(1)
2080 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2089 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
2092 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2095 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2108 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2111 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2114 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2135 #endif // #if CRYPTOPP_INTEGER_SSE2 2139 typedef int (CRYPTOPP_FASTCALL * PAdd)(
size_t N, word *C,
const word *A,
const word *B);
2140 typedef void (* PMul)(word *C,
const word *A,
const word *B);
2141 typedef void (* PSqu)(word *C,
const word *A);
2142 typedef void (* PMulTop)(word *C,
const word *A,
const word *B, word L);
2144 #if CRYPTOPP_INTEGER_SSE2 2145 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2146 static size_t s_recursionLimit = 8;
2148 static const size_t s_recursionLimit = 16;
2149 #endif // CRYPTOPP_INTEGER_SSE2 2151 static PMul s_pMul[9], s_pBot[9];
2152 static PSqu s_pSqu[9];
2153 static PMulTop s_pTop[9];
2155 void SetFunctionPointers()
2157 s_pMul[0] = &Baseline_Multiply2;
2158 s_pBot[0] = &Baseline_MultiplyBottom2;
2159 s_pSqu[0] = &Baseline_Square2;
2160 s_pTop[0] = &Baseline_MultiplyTop2;
2161 s_pTop[1] = &Baseline_MultiplyTop4;
2163 #if CRYPTOPP_INTEGER_SSE2 2172 s_recursionLimit = 32;
2174 s_pMul[1] = &SSE2_Multiply4;
2175 s_pMul[2] = &SSE2_Multiply8;
2176 s_pMul[4] = &SSE2_Multiply16;
2177 s_pMul[8] = &SSE2_Multiply32;
2179 s_pBot[1] = &SSE2_MultiplyBottom4;
2180 s_pBot[2] = &SSE2_MultiplyBottom8;
2181 s_pBot[4] = &SSE2_MultiplyBottom16;
2182 s_pBot[8] = &SSE2_MultiplyBottom32;
2184 s_pSqu[1] = &SSE2_Square4;
2185 s_pSqu[2] = &SSE2_Square8;
2186 s_pSqu[4] = &SSE2_Square16;
2187 s_pSqu[8] = &SSE2_Square32;
2189 s_pTop[2] = &SSE2_MultiplyTop8;
2190 s_pTop[4] = &SSE2_MultiplyTop16;
2191 s_pTop[8] = &SSE2_MultiplyTop32;
2194 #endif // CRYPTOPP_INTEGER_SSE2 2196 s_pMul[1] = &Baseline_Multiply4;
2197 s_pMul[2] = &Baseline_Multiply8;
2199 s_pBot[1] = &Baseline_MultiplyBottom4;
2200 s_pBot[2] = &Baseline_MultiplyBottom8;
2202 s_pSqu[1] = &Baseline_Square4;
2203 s_pSqu[2] = &Baseline_Square8;
2205 s_pTop[2] = &Baseline_MultiplyTop8;
2207 #if !CRYPTOPP_INTEGER_SSE2 2208 s_pMul[4] = &Baseline_Multiply16;
2209 s_pBot[4] = &Baseline_MultiplyBottom16;
2210 s_pSqu[4] = &Baseline_Square16;
2211 s_pTop[4] = &Baseline_MultiplyTop16;
2212 #endif // !CRYPTOPP_INTEGER_SSE2 2216 inline int Add(word *C,
const word *A,
const word *B,
size_t N)
2218 #if CRYPTOPP_INTEGER_SSE2 2219 return s_pAdd(N, C, A, B);
2221 return Baseline_Add(N, C, A, B);
2222 #endif // CRYPTOPP_INTEGER_SSE2 2225 inline int Subtract(word *C,
const word *A,
const word *B,
size_t N)
2227 #if CRYPTOPP_INTEGER_SSE2 2228 return s_pSub(N, C, A, B);
2230 return Baseline_Sub(N, C, A, B);
2231 #endif // CRYPTOPP_INTEGER_SSE2 2257 void RecursiveMultiply(word *R, word *T,
const word *A,
const word *B,
size_t N)
2261 if (N <= s_recursionLimit)
2262 s_pMul[N/4](R, A, B);
2265 const size_t N2 = N/2;
2267 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2268 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2270 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2271 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2273 RecursiveMultiply(R2, T2, A1, B1, N2);
2274 RecursiveMultiply(T0, T2, R0, R1, N2);
2275 RecursiveMultiply(R0, T2, A0, B0, N2);
2279 int c2 = Add(R2, R2, R1, N2);
2281 c2 += Add(R1, R2, R0, N2);
2282 c3 += Add(R2, R2, R3, N2);
2285 c3 -= Subtract(R1, R1, T0, N);
2287 c3 += Add(R1, R1, T0, N);
2289 c3 += Increment(R2, N2, c2);
2291 Increment(R3, N2, c3);
2299 void RecursiveSquare(word *R, word *T,
const word *A,
size_t N)
2303 if (N <= s_recursionLimit)
2307 const size_t N2 = N/2;
2309 RecursiveSquare(R0, T2, A0, N2);
2310 RecursiveSquare(R2, T2, A1, N2);
2311 RecursiveMultiply(T0, T2, A0, A1, N2);
2313 int carry = Add(R1, R1, T0, N);
2314 carry += Add(R1, R1, T0, N);
2315 Increment(R3, N2, carry);
2324 void RecursiveMultiplyBottom(word *R, word *T,
const word *A,
const word *B,
size_t N)
2328 if (N <= s_recursionLimit)
2329 s_pBot[N/4](R, A, B);
2332 const size_t N2 = N/2;
2334 RecursiveMultiply(R, T, A0, B0, N2);
2335 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2336 Add(R1, R1, T0, N2);
2337 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2338 Add(R1, R1, T0, N2);
2348 void MultiplyTop(word *R, word *T,
const word *L,
const word *A,
const word *B,
size_t N)
2352 if (N <= s_recursionLimit)
2353 s_pTop[N/4](R, A, B, L[N-1]);
2356 const size_t N2 = N/2;
2358 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2359 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2361 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2362 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2364 RecursiveMultiply(T0, T2, R0, R1, N2);
2365 RecursiveMultiply(R0, T2, A1, B1, N2);
2370 int c2 = Subtract(T2, L+N2, L, N2);
2374 c2 -= Add(T2, T2, T0, N2);
2375 t = (Compare(T2, R0, N2) == -1);
2376 c3 = t - Subtract(T2, T2, T1, N2);
2380 c2 += Subtract(T2, T2, T0, N2);
2381 t = (Compare(T2, R0, N2) == -1);
2382 c3 = t + Add(T2, T2, T1, N2);
2387 c3 += Increment(T2, N2, c2);
2389 c3 -= Decrement(T2, N2, -c2);
2390 c3 += Add(R0, T2, R1, N2);
2393 Increment(R1, N2, c3);
2397 inline void Multiply(word *R, word *T,
const word *A,
const word *B,
size_t N)
2399 RecursiveMultiply(R, T, A, B, N);
2402 inline void Square(word *R, word *T,
const word *A,
size_t N)
2404 RecursiveSquare(R, T, A, N);
2407 inline void MultiplyBottom(word *R, word *T,
const word *A,
const word *B,
size_t N)
2409 RecursiveMultiplyBottom(R, T, A, B, N);
2417 void AsymmetricMultiply(word *R, word *T,
const word *A,
size_t NA,
const word *B,
size_t NB)
2424 Multiply(R, T, A, B, NA);
2447 R[NB] = LinearMultiply(R, B, A[0], NB);
2451 SetWords(R, 0, NB+2);
2454 CopyWords(R, B, NB);
2455 R[NB] = R[NB+1] = 0;
2463 Multiply(R, T, A, B, NA);
2464 CopyWords(T+2*NA, R+NA, NA);
2466 for (i=2*NA; i<NB; i+=2*NA)
2467 Multiply(T+NA+i, T, A, B+i, NA);
2468 for (i=NA; i<NB; i+=2*NA)
2469 Multiply(R+i, T, A, B+i, NA);
2473 for (i=0; i<NB; i+=2*NA)
2474 Multiply(R+i, T, A, B+i, NA);
2475 for (i=NA; i<NB; i+=2*NA)
2476 Multiply(T+NA+i, T, A, B+i, NA);
2479 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2480 Increment(R+NB, NA);
2487 void RecursiveInverseModPower2(word *R, word *T,
const word *A,
size_t N)
2494 const size_t N2 = N/2;
2495 RecursiveInverseModPower2(R0, T0, A0, N2);
2497 SetWords(T0+1, 0, N2-1);
2498 MultiplyTop(R1, T1, T0, R0, A0, N2);
2499 MultiplyBottom(T0, T1, R0, A1, N2);
2500 Add(T0, R1, T0, N2);
2501 TwosComplement(T0, N2);
2502 MultiplyBottom(R1, T1, R0, T0, N2);
2506 T[0] = AtomicInverseModPower2(A[0]);
2508 s_pBot[0](T+2, T, A);
2509 TwosComplement(T+2, 2);
2510 Increment(T+2, 2, 2);
2511 s_pBot[0](R, T, T+2);
2521 void MontgomeryReduce(word *R, word *T, word *X,
const word *M,
const word *U,
size_t N)
2524 MultiplyBottom(R, T, X, U, N);
2525 MultiplyTop(T, T+N, X, R, M, N);
2526 word borrow = Subtract(T, X+N, T, N);
2528 word carry = Add(T+N, T, M, N);
2530 CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2531 CopyWords(R, T + ((0-borrow) & N), N);
2533 const word u = 0-U[0];
2535 for (
size_t i=0; i<N; i++)
2537 const word t = u * X[i];
2539 for (
size_t j=0; j<N; j+=2)
2541 MultiplyWords(p, t, M[j]);
2542 Acc2WordsBy1(p, X[i+j]);
2544 X[i+j] = LowWord(p);
2546 MultiplyWords(p, t, M[j+1]);
2547 Acc2WordsBy1(p, X[i+j+1]);
2549 X[i+j+1] = LowWord(p);
2553 if (Increment(X+N+i, N-i, c))
2554 while (!Subtract(X+N, X+N, M, N)) {}
2557 memcpy(R, X+N, N*WORD_SIZE);
2559 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2560 for (
size_t i=0; i<N; i++)
2562 __m64 t = _mm_cvtsi32_si64(X[i]);
2563 t = _mm_mul_su32(t, u);
2564 __m64 c = _mm_setzero_si64();
2565 for (
size_t j=0; j<N; j+=2)
2567 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2568 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2569 c = _mm_add_si64(c, p);
2570 X[i+j] = _mm_cvtsi64_si32(c);
2571 c = _mm_srli_si64(c, 32);
2572 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2573 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2574 c = _mm_add_si64(c, p);
2575 X[i+j+1] = _mm_cvtsi64_si32(c);
2576 c = _mm_srli_si64(c, 32);
2579 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2580 while (!Subtract(X+N, X+N, M, N)) {}
2583 memcpy(R, X+N, N*WORD_SIZE);
2595 void HalfMontgomeryReduce(word *R, word *T,
const word *X,
const word *M,
const word *U,
const word *V,
size_t N)
2609 const size_t N2 = N/2;
2610 Multiply(T0, T2, V0, X3, N2);
2611 int c2 = Add(T0, T0, X0, N);
2612 MultiplyBottom(T3, T2, T0, U, N2);
2613 MultiplyTop(T2, R, T0, T3, M0, N2);
2614 c2 -= Subtract(T2, T1, T2, N2);
2615 Multiply(T0, R, T3, M1, N2);
2616 c2 -= Subtract(T0, T2, T0, N2);
2617 int c3 = -(int)Subtract(T1, X2, T1, N2);
2618 Multiply(R0, T2, V1, X3, N2);
2619 c3 += Add(R, R, T, N);
2622 c3 += Increment(R1, N2);
2624 c3 -= Decrement(R1, N2, -c2);
2628 Subtract(R, R, M, N);
2722 static inline void AtomicDivide(word *Q,
const word *A,
const word *B)
2725 DWord q = DivideFourWordsByTwo<word, DWord>(T,
DWord(A[0], A[1]),
DWord(A[2], A[3]),
DWord(B[0], B[1]));
2726 Q[0] = q.GetLowHalf();
2727 Q[1] = q.GetHighHalf();
2729 #if defined(CRYPTOPP_DEBUG) 2733 CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2743 static void CorrectQuotientEstimate(word *R, word *T, word *Q,
const word *B,
size_t N)
2747 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2749 word borrow = Subtract(R, R, T, N+2);
2751 CRYPTOPP_UNUSED(borrow);
2753 while (R[N] || Compare(R, B, N) >= 0)
2755 R[N] -= Subtract(R, R, B, N);
2756 Q[1] += (++Q[0]==0);
2767 void Divide(word *R, word *Q, word *T,
const word *A,
size_t NA,
const word *B,
size_t NB)
2775 word *
const TB=T+NA+2;
2776 word *
const TP=T+NA+2+NB;
2779 unsigned shiftWords = (B[NB-1]==0);
2780 TB[0] = TB[NB-1] = 0;
2781 CopyWords(TB+shiftWords, B, NB-shiftWords);
2782 unsigned shiftBits = WORD_BITS -
BitPrecision(TB[NB-1]);
2784 ShiftWordsLeftByBits(TB, NB, shiftBits);
2787 TA[0] = TA[NA] = TA[NA+1] = 0;
2788 CopyWords(TA+shiftWords, A, NA);
2789 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2791 if (TA[NA+1]==0 && TA[NA] <= 1)
2793 Q[NA-NB+1] = Q[NA-NB] = 0;
2794 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2796 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2807 BT[0] = TB[NB-2] + 1;
2808 BT[1] = TB[NB-1] + (BT[0]==0);
2811 for (
size_t i=NA-2; i>=NB; i-=2)
2813 AtomicDivide(Q+i-NB, TA+i-2, BT);
2814 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2818 CopyWords(R, TA+shiftWords, NB);
2819 ShiftWordsRightByBits(R, NB, shiftBits);
2822 static inline size_t EvenWordCount(
const word *X,
size_t N)
2824 while (N && X[N-2]==0 && X[N-1]==0)
2835 unsigned int AlmostInverse(word *R, word *T,
const word *A,
size_t NA,
const word *M,
size_t N)
2843 size_t bcLen=2, fgLen=EvenWordCount(M, N);
2847 SetWords(T, 0, 3*N);
2849 CopyWords(f, A, NA);
2857 if (EvenWordCount(f, fgLen)==0)
2863 ShiftWordsRightByWords(f, fgLen, 1);
2864 bcLen += 2 * (c[bcLen-1] != 0);
2866 ShiftWordsLeftByWords(c, bcLen, 1);
2876 if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2879 Subtract(R, M, b, N);
2885 ShiftWordsRightByBits(f, fgLen, i);
2886 t = ShiftWordsLeftByBits(c, bcLen, i);
2888 bcLen += 2 * (t!=0);
2891 bool swap = Compare(f, g, fgLen)==-1;
2896 fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2898 Subtract(f, f, g, fgLen);
2899 t = Add(b, b, c, bcLen);
2910 void DivideByPower2Mod(word *R,
const word *A,
size_t k,
const word *M,
size_t N)
2917 ShiftWordsRightByBits(R, N, 1);
2920 word carry = Add(R, R, M, N);
2921 ShiftWordsRightByBits(R, N, 1);
2922 R[N-1] += carry<<(WORD_BITS-1);
2931 void MultiplyByPower2Mod(word *R,
const word *A,
size_t k,
const word *M,
size_t N)
2936 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2937 Subtract(R, R, M, N);
2942 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2944 static inline size_t RoundupSize(
size_t n)
2947 return RoundupSizeTable[n];
2959 : reg(2), sign(POSITIVE)
2961 reg[0] = reg[1] = 0;
2965 : reg(RoundupSize(t.WordCount())), sign(t.sign)
2967 CopyWords(reg, t.reg, reg.
size());
2973 reg[0] = word(value);
2974 reg[1] = word(SafeRightShift<WORD_BITS>(value));
2987 reg[0] = word(value);
2988 reg[1] = word(SafeRightShift<WORD_BITS>((
unsigned long)value));
3003 unsigned long value = (
unsigned long)reg[0];
3004 value += SafeLeftShift<WORD_BITS, unsigned long>((
unsigned long)reg[1]);
3007 return (
signed long)value >= 0;
3009 return -(
signed long)value < 0;
3016 unsigned long value = (
unsigned long)reg[0];
3017 value += SafeLeftShift<WORD_BITS, unsigned long>((
unsigned long)reg[1]);
3018 return sign==
POSITIVE ? value : -(
signed long)value;
3027 Decode(encodedInteger, byteCount, s);
3032 encodedInteger.
Get(block, block.
size());
3046 Decode(encodedInteger, byteCount, s);
3051 #if (_MSC_VER >= 1500) 3052 std::reverse_copy(encodedInteger, encodedInteger+byteCount,
3053 stdext::make_checked_array_iterator(block.
begin(), block.
size()));
3055 std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.
begin());
3075 if (!
Randomize(rng, min, max, rnType, equiv, mod))
3095 if (reg.
size() != t.reg.
size() || t.reg[t.reg.
size()/2] == 0)
3097 CopyWords(reg, t.reg, reg.
size());
3108 if (n/WORD_BITS < reg.
size())
3109 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3119 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3123 if (n/WORD_BITS < reg.
size())
3124 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3133 if (n/WORD_SIZE < reg.
size())
3134 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3142 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3143 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3150 for (
unsigned int j=0; j<n; j++)
3151 v |= lword(
GetBit(i+j)) << j;
3172 std::swap(sign, a.sign);
3176 : reg(RoundupSize(length)), sign(POSITIVE)
3179 SetWords(reg+1, 0, reg.
size()-1);
3187 int radix, sign = 1;
3190 unsigned int length;
3191 for (length = 0; str[length] != 0; length++) {}
3198 switch (str[length-1])
3220 str += 1, length -= 1;
3223 if (length > 2 && str[0] ==
'0' && (str[1] ==
'x' || str[1] ==
'X'))
3226 str += 2, length -= 2;
3231 for (
unsigned int i=0; i<length; i++)
3233 int digit, ch =
static_cast<int>(str[i]);
3237 if (ch >=
'0' && ch <=
'9')
3239 else if (ch >=
'a' && ch <=
'f')
3240 digit = ch -
'a' + 10;
3241 else if (ch >=
'A' && ch <=
'F')
3242 digit = ch -
'A' + 10;
3256 unsigned int nh = 0, nl = 0, nc = 0;
3259 for (
unsigned int i=0; i<length; i++)
3261 int digit, ch =
static_cast<int>(str[i]);
3263 if (ch >=
'0' && ch <=
'9')
3265 else if (ch >=
'a' && ch <=
'f')
3266 digit = ch -
'a' + 10;
3267 else if (ch >=
'A' && ch <=
'F')
3268 digit = ch -
'A' + 10;
3281 v += position * (nh << 4 | nl);
3282 nc = 0, position <<= 8;
3292 for (
int i=static_cast<int>(length)-1; i>=0; i--)
3294 int digit, ch =
static_cast<int>(str[i]);
3296 if (ch >=
'0' && ch <=
'9')
3298 else if (ch >=
'a' && ch <=
'f')
3299 digit = ch -
'a' + 10;
3300 else if (ch >=
'A' && ch <=
'F')
3301 digit = ch -
'A' + 10;
3320 : reg(2), sign(POSITIVE)
3322 *
this = StringToInteger(str,order);
3326 : reg(2), sign(POSITIVE)
3328 *
this = StringToInteger(str,order);
3333 return (
unsigned int)CountWords(reg, reg.
size());
3340 return (wordCount-1)*WORD_SIZE +
BytePrecision(reg[wordCount-1]);
3349 return (wordCount-1)*WORD_BITS +
BitPrecision(reg[wordCount-1]);
3358 Decode(store, inputLen, s);
3371 while (inputLen>0 && (sign==
POSITIVE ? b==0 : b==0xff))
3379 for (
size_t i=inputLen; i > 0; i--)
3382 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3387 for (
size_t i=inputLen; i<reg.
size()*WORD_SIZE; i++)
3388 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3389 TwosComplement(reg, reg.
size());
3399 const bool pre = (signedness ==
UNSIGNED);
3414 Encode(sink, outputLen, signedness);
3421 for (
size_t i=outputLen; i > 0; i--)
3449 if (!dec.IsDefiniteLength() || dec.
MaxRetrievable() < dec.RemainingLength())
3465 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3481 word16 bitCount = word16(
BitCount());
3485 return 2 + byteCount;
3505 const size_t nbytes = nbits/8 + 1;
3509 buf[0] = (byte)
Crop(buf[0], nbits % 8);
3519 const unsigned int nbits = range.
BitCount();
3525 while (*
this > range);
3533 (
"RandomNumberType", rnType)(
"EquivalentTo", equiv)(
"Mod", mod));
3539 KDF2_RNG(
const byte *seed,
size_t seedSize)
3540 : m_counter(0), m_counterAndSeed(ClampSize(seedSize) + 4)
3542 memcpy(m_counterAndSeed + 4, seed, ClampSize(seedSize));
3554 inline size_t ClampSize(
size_t req)
const 3557 if (req > 16U*1024*1024)
3558 return 16U*1024*1024;
3586 throw InvalidArgument(
"Integer: invalid EquivalentTo and/or Mod argument");
3605 bq.
Get(finalSeed, finalSeed.size());
3606 kdf2Rng.reset(
new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3617 Integer min1 = min + (equiv-min)%mod;
3638 if (
FirstPrime(first, max, equiv, mod, pSelector))
3642 if (!
FirstPrime(first, max, equiv, mod, pSelector))
3650 if (
FirstPrime(*
this,
STDMIN(*
this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3660 std::istream& operator>>(std::istream& in,
Integer &a)
3663 unsigned int length = 0;
3672 if (length >= str.
size())
3673 str.
Grow(length + 16);
3675 while (in && (c==
'-' || c==
'x' || (c>=
'0' && c<=
'9') || (c>=
'a' && c<=
'f') || (c>=
'A' && c<=
'F') || c==
'h' || c==
'H' || c==
'o' || c==
'O' || c==
',' || c==
'.'));
3679 str[length-1] =
'\0';
3686 inline int FlagToBase(
long f) {
3687 return f == std::ios::hex ? 16 : (f == std::ios::oct ? 8 : 10);
3690 inline char FlagToSuffix(
long f) {
3691 return f == std::ios::hex ?
'h' : (f == std::ios::oct ?
'o' :
'.');
3695 std::ostream& operator<<(std::ostream& out,
const Integer &a)
3698 const long f = out.flags() & std::ios::basefield;
3699 const int base = FlagToBase(f);
3700 const char suffix = FlagToSuffix(f);
3712 static const char upper[]=
"0123456789ABCDEF";
3713 static const char lower[]=
"0123456789abcdef";
3715 const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3732 #ifdef CRYPTOPP_USE_STD_SHOWBASE 3733 if (out.flags() & std::ios_base::showbase)
3738 return out << suffix;
3746 if (Increment(reg, reg.
size()))
3749 reg[reg.
size()/2]=1;
3754 word borrow = Decrement(reg, reg.
size());
3767 if (Increment(reg, reg.
size()))
3770 reg[reg.
size()/2]=1;
3775 if (Decrement(reg, reg.
size()))
3789 else if (reg.
size() >= t.reg.
size())
3792 AndWords(result.reg, reg, t.reg.
size());
3800 AndWords(result.reg, t.reg, reg.
size());
3815 else if (reg.
size() >= t.reg.
size())
3818 OrWords(result.reg, t.reg, t.reg.
size());
3826 OrWords(result.reg, reg, reg.
size());
3841 else if (reg.
size() >= t.reg.
size())
3844 XorWords(result.reg, t.reg, t.reg.
size());
3852 XorWords(result.reg, reg, reg.
size());
3864 int carry;
const bool pre = (a.reg.size() == b.reg.size());
3865 if (!pre && a.reg.size() > b.reg.size())
3867 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3868 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3869 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3873 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3877 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3878 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3879 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3885 sum.reg[sum.reg.
size()/2] = 1;
3892 unsigned aSize = a.WordCount();
3894 unsigned bSize = b.WordCount();
3902 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3903 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3904 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3908 else if (aSize == bSize)
3910 if (Compare(a.reg, b.reg, aSize) >= 0)
3912 Subtract(diff.reg, a.reg, b.reg, aSize);
3917 Subtract(diff.reg, b.reg, a.reg, aSize);
3923 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3924 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3925 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3932 template <
class T>
inline const T& STDMAX2(
const T& a,
const T& b)
3934 return a < b ? b : a;
3939 Integer sum((word)0, STDMAX2(reg.
size(), b.reg.size()));
3942 if (b.NotNegative())
3943 PositiveAdd(sum, *
this, b);
3945 PositiveSubtract(sum, *
this, b);
3949 if (b.NotNegative())
3950 PositiveSubtract(sum, b, *
this);
3953 PositiveAdd(sum, *
this, b);
3966 PositiveAdd(*
this, *
this, t);
3968 PositiveSubtract(*
this, *
this, t);
3973 PositiveSubtract(*
this, t, *
this);
3976 PositiveAdd(*
this, *
this, t);
3985 Integer diff((word)0, STDMAX2(reg.
size(), b.reg.size()));
3988 if (b.NotNegative())
3989 PositiveSubtract(diff, *
this, b);
3991 PositiveAdd(diff, *
this, b);
3995 if (b.NotNegative())
3997 PositiveAdd(diff, *
this, b);
4001 PositiveSubtract(diff, b, *
this);
4012 PositiveSubtract(*
this, *
this, t);
4014 PositiveAdd(*
this, *
this, t);
4020 PositiveAdd(*
this, *
this, t);
4024 PositiveSubtract(*
this, t, *
this);
4032 const size_t shiftWords = n / WORD_BITS;
4033 const unsigned int shiftBits = (
unsigned int)(n % WORD_BITS);
4036 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
4037 ShiftWordsLeftByBits(reg+shiftWords, wordCount+
BitsToWords(shiftBits), shiftBits);
4044 const size_t shiftWords = n / WORD_BITS;
4045 const unsigned int shiftBits = (
unsigned int)(n % WORD_BITS);
4047 ShiftWordsRightByWords(reg, wordCount, shiftWords);
4048 if (wordCount > shiftWords)
4049 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
4061 AndWords(reg, t.reg, size);
4073 OrWords(reg, t.reg, t.reg.
size());
4077 const size_t head = reg.
size();
4078 const size_t tail = t.reg.
size() - reg.
size();
4080 OrWords(reg, t.reg, head);
4081 CopyWords(reg+head,t.reg+head,tail);
4098 XorWords(reg, t.reg, t.reg.
size());
4102 const size_t head = reg.
size();
4103 const size_t tail = t.reg.
size() - reg.
size();
4105 XorWords(reg, t.reg, head);
4106 CopyWords(reg+head,t.reg+head,tail);
4115 size_t aSize = RoundupSize(a.WordCount());
4116 size_t bSize = RoundupSize(b.WordCount());
4118 product.reg.
CleanNew(RoundupSize(aSize+bSize));
4122 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
4127 PositiveMultiply(product, a, b);
4129 if (a.NotNegative() != b.NotNegative())
4136 Multiply(product, *
this, b);
4165 unsigned aSize = a.WordCount();
4166 unsigned bSize = b.WordCount();
4182 remainder.reg.
CleanNew(RoundupSize(bSize));
4184 quotient.reg.
CleanNew(RoundupSize(aSize-bSize+2));
4188 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
4193 PositiveDivide(remainder, quotient, dividend, divisor);
4215 if (wordCount <= a.WordCount())
4217 r.reg.
resize(RoundupSize(wordCount));
4218 CopyWords(r.reg, a.reg, wordCount);
4219 SetWords(r.reg+wordCount, 0, r.reg.
size()-wordCount);
4220 if (n % WORD_BITS != 0)
4221 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4225 r.reg.
resize(RoundupSize(a.WordCount()));
4226 CopyWords(r.reg, a.reg, r.reg.
size());
4230 if (a.IsNegative() && r.
NotZero())
4261 remainder = dividend.reg[0] & (divisor-1);
4266 quotient.reg.
CleanNew(RoundupSize(i));
4270 quotient.reg[i] =
DWord(dividend.reg[i], remainder) / divisor;
4271 remainder =
DWord(dividend.reg[i], remainder) % divisor;
4282 remainder = divisor - remainder;
4305 if ((divisor & (divisor-1)) != 0)
4315 remainder =
DWord(reg[i], remainder) % divisor;
4322 remainder = sum % divisor;
4327 remainder = reg[0] & (divisor-1);
4331 remainder = divisor - remainder;
4339 sign =
Sign(1-sign);
4342 int Integer::PositiveCompare(
const Integer& t)
const 4349 return size > tSize ? 1 : -1;
4351 return CryptoPP::Compare(reg, t.reg, size);
4359 return PositiveCompare(t);
4368 return -PositiveCompare(t);
4384 y = (x + *
this/x) >> 1;
4398 return (
WordCount() == 1) && (reg[0] == 1);
4436 return Modulo(m).InverseModNext(m);
4440 return Modulo(m).InverseModNext(m);
4442 return InverseModNext(m);
4458 return !u ?
Zero() : (m*(*this-u)+1)/(*this);
4464 unsigned k = AlmostInverse(r.reg, T, reg, reg.
size(), m.reg, m.reg.
size());
4465 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.
size());
4473 word g0 = mod, g1 = *
this % mod;
4474 word v0 = 0, v1 = 1;
4502 if (oid != ASN1::prime_field())
4512 ASN1::prime_field().DEREncode(seq);
4529 if (a.reg.size()==m_modulus.reg.
size())
4531 CryptoPP::DivideByPower2Mod(m_result.reg.
begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4535 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4540 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4542 if (CryptoPP::Add(m_result.reg.
begin(), a.reg, b.reg, a.reg.size())
4543 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4545 CryptoPP::Subtract(m_result.reg.
begin(), m_result.reg, m_modulus.reg, a.reg.size());
4552 if (m_result1 >= m_modulus)
4553 m_result1 -= m_modulus;
4560 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4562 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4563 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4565 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4580 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4582 if (CryptoPP::Subtract(m_result.reg.
begin(), a.reg, b.reg, a.reg.size()))
4583 CryptoPP::Add(m_result.reg.
begin(), m_result.reg, m_modulus.reg, a.reg.size());
4590 m_result1 += m_modulus;
4597 if (a.reg.size()==m_modulus.reg.
size() && b.reg.size()==m_modulus.reg.
size())
4599 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4600 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4617 CopyWords(m_result.reg.
begin(), m_modulus.reg, m_modulus.reg.
size());
4618 if (CryptoPP::Subtract(m_result.reg.
begin(), m_result.reg, a.reg, a.reg.size()))
4619 Decrement(m_result.reg.
begin()+a.reg.size(), m_modulus.reg.
size()-a.reg.size());
4626 if (m_modulus.
IsOdd())
4637 if (m_modulus.
IsOdd())
4641 for (
unsigned int i=0; i<exponentsCount; i++)
4650 m_u((word)0, m_modulus.reg.size()),
4651 m_workspace(5*m_modulus.reg.size())
4653 if (!m_modulus.IsOdd())
4654 throw InvalidArgument(
"MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4656 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.
size());
4661 word *
const T = m_workspace.
begin();
4662 word *
const R = m_result.reg.begin();
4663 const size_t N = m_modulus.reg.size();
4666 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4667 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4668 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4674 word *
const T = m_workspace.
begin();
4675 word *
const R = m_result.reg.begin();
4676 const size_t N = m_modulus.reg.size();
4679 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4680 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4681 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4687 word *
const T = m_workspace.
begin();
4688 word *
const R = m_result.reg.begin();
4689 const size_t N = m_modulus.reg.size();
4692 CopyWords(T, a.reg, a.reg.size());
4693 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4694 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4701 word *
const T = m_workspace.
begin();
4702 word *
const R = m_result.reg.begin();
4703 const size_t N = m_modulus.reg.size();
4706 CopyWords(T, a.reg, a.reg.size());
4707 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4708 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4709 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4714 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4716 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4723 template <> CRYPTOPP_DLL
4727 static const unsigned int BIT_32 = (1U << 31);
4728 const bool UPPER = !!(base & BIT_32);
4729 static const unsigned int BIT_31 = (1U << 30);
4730 const bool BASE = !!(base & BIT_31);
4732 const char CH = UPPER ?
'A' :
'a';
4733 base &= ~(BIT_32|BIT_31);
4739 bool negative =
false, zero =
false;
4740 if (value.IsNegative())
4757 s[i++]=char((digit < 10 ?
'0' : (CH - 10)) + digit);
4762 result.reserve(i+2);
4777 else if (base == 16)
4789 template <> CRYPTOPP_DLL
4793 static const unsigned int HIGH_BIT = (1U << 31);
4794 const char CH = !!(base & HIGH_BIT) ?
'A' :
'a';
4804 word64 digit = value % base;
4805 result = char((digit < 10 ?
'0' : (CH - 10)) + digit) + result;
4811 #ifndef CRYPTOPP_NO_ASSIGN_TO_INTEGER 4814 bool AssignIntToInteger(
const std::type_info &valueType,
void *pInteger,
const void *pInt)
4816 if (valueType !=
typeid(
Integer))
4818 *
reinterpret_cast<Integer *
>(pInteger) = *reinterpret_cast<const int *>(pInt);
4821 #endif // CRYPTOPP_NO_ASSIGN_TO_INTEGER 4830 SetFunctionPointers();
4839 #if defined(HAVE_GCC_INIT_PRIORITY) 4841 const Integer g_zero __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 11))) =
Integer(0L);
4842 const Integer g_one __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 12))) =
Integer(1L);
4843 const Integer g_two __attribute__ ((init_priority (CRYPTOPP_INIT_PRIORITY + 13))) =
Integer(2L);
4844 #elif defined(HAVE_MSC_INIT_PRIORITY) 4845 #pragma warning(disable: 4075) 4846 #pragma init_seg(".CRT$XCU") 4851 #pragma warning(default: 4075) 4852 #elif HAVE_XLC_INIT_PRIORITY 4854 #pragma priority(280) 4867 #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY) 4869 #elif defined(CRYPTOPP_CXX11_DYNAMIC_INIT) 4870 static const Integer s_zero(0L);
4872 #else // Potential memory leak. Avoid if possible. 4879 #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY) 4881 #elif defined(CRYPTOPP_CXX11_DYNAMIC_INIT) 4882 static const Integer s_one(1L);
4884 #else // Potential memory leak. Avoid if possible. 4891 #if defined(HAVE_GCC_INIT_PRIORITY) || defined(HAVE_MSC_INIT_PRIORITY) || defined(HAVE_XLC_INIT_PRIORITY) 4893 #elif defined(CRYPTOPP_CXX11_DYNAMIC_INIT) 4894 static const Integer s_two(2L);
4896 #else // Potential memory leak. Avoid if possible. 4903 #endif // CRYPTOPP_IMPORTS Used to pass byte array input as part of a NameValuePairs object.
An invalid argument was detected.
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Integer MultiplicativeInverse() const
Calculate multiplicative inverse.
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
bool NotZero() const
Determines if the Integer is non-0.
Classes for working with NameValuePairs.
Integer Or(const Integer &t) const
Bitwise OR.
Integer & operator|=(const Integer &t)
Bitwise OR Assignment.
void swap(SecBlock< T, A > &b)
Swap contents with another SecBlock.
Integer And(const Integer &t) const
Bitwise AND.
a number which is probabilistically prime
Utility functions for the Crypto++ library.
Integer Plus(const Integer &b) const
Addition.
ByteOrder
Provides the byte ordering.
Restricts the instantiation of a class to one static object without locks.
void CleanNew(size_type newSize)
Change size without preserving contents.
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
bool IsSquare() const
Determine whether this integer is a perfect square.
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
size_t size() const
Length of the memory block.
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
This file contains helper classes/functions for implementing public key algorithms.
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
Encode absolute value as big-endian octet string.
Integer & operator=(const Integer &t)
Assignment.
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
static Integer Gcd(const Integer &a, const Integer &n)
Calculate greatest common divisor.
void resize(size_type newSize)
Change size and preserve contents.
Integer & operator+=(const Integer &t)
Addition Assignment.
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
void PutWord(bool assumeAligned, ByteOrder order, byte *block, T value, const byte *xorBlock=NULL)
Access a block of memory.
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
void CleanGrow(size_type newSize)
Change size and preserve contents.
Integer & operator--()
Pre-decrement.
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
Secure memory block with allocator and cleanup.
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
signed long ConvertToLong() const
Convert the Integer to Long.
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Signedness
Used when importing and exporting integers.
unsigned int MaxElementByteLength() const
Provides the maximum byte size of an element in the ring.
ASN.1 object identifiers for algorthms and schemes.
Classes for automatic resource management.
bool IsP4()
Determines if the CPU is an Intel P4.
bool IsNegative() const
Determines if the Integer is negative.
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Library configuration file.
MontgomeryRepresentation(const Integer &modulus)
Construct a MontgomeryRepresentation.
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
Extended Division.
Ring of congruence classes modulo n.
Interface for random number generators.
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
void New(size_type newSize)
Change size without preserving contents.
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Integer InverseMod(const Integer &n) const
Calculate multiplicative inverse.
bool IsPositive() const
Determines if the Integer is positive.
bool NotNegative() const
Determines if the Integer is non-negative.
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
static const Integer & One()
Integer representing 1.
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
P1363 key derivation function.
byte order is little-endian
Sign
Used internally to represent the integer.
Integer ConvertIn(const Integer &a) const
Reduces an element in the congruence class.
bool operator!() const
Negation.
Pointer that overloads operator ->
bool IsUnit() const
Determine if 1 or -1.
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Classes and functions for secure memory allocations.
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
Integer Modulo(const Integer &b) const
Remainder.
Copy input to a memory buffer.
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
Minimum number of bytes to encode this integer.
Integer DividedBy(const Integer &b) const
Division.
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Integer Times(const Integer &b) const
Multiplication.
a number with no special properties
const byte * begin() const
Pointer to the first byte in the memory block.
Integer & operator++()
Pre-increment.
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
void swap(Integer &a)
Swaps this Integer with another Integer.
Integer()
Creates the zero integer.
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Integer & operator<<=(size_t n)
Left-shift Assignment.
bool IsZero() const
Determines if the Integer is 0.
Exception thrown when an error is encountered decoding an OpenPGP integer.
void Negate()
Reverse the Sign of the Integer.
int Compare(const Integer &a) const
Perform signed comparison.
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Application callback to signal suitability of a cabdidate prime.
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Multiple precision integer with arithmetic operations.
virtual Element Exponentiate(const Element &a, const Integer &e) const
Raises a base to an exponent in the group.
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
const Integer & Half(const Integer &a) const
Divides an element by 2.
static const Integer & Two()
Integer representing 2.
bool IsPowerOf2(const T &value)
Tests whether a value is a power of 2.
const Integer & Square(const Integer &a) const
Square an element in the ring.
Integer & operator^=(const Integer &t)
Bitwise XOR Assignment.
#define MEMORY_BARRIER
A memory barrier.
RandomNumberType
Properties of a random integer.
const char * Seed()
ConstByteArrayParameter.
bool IsEven() const
Determines if the Integer is even parity.
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
String-based implementation of Store interface.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
Extended Division.
ModularArithmetic(const Integer &modulus=Integer::One())
Construct a ModularArithmetic.
void BERDecodeError()
Raises a BERDecodeErr.
Data structure used to store byte strings.
Functions for CPU features and intrinsics.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
iterator begin()
Provides an iterator pointing to the first element in the memory block.
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
const char * PointerToPrimeSelector()
const PrimeSelector *
Implementation of BufferedTransformation's attachment interface.
Classes and functions for number theoretic operations.
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Exception thrown when division by 0 is encountered.
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Integer Squared() const
Multiply this integer by itself.
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Exception thrown when a random number cannot be found that satisfies the condition.
bool GenerateRandomNoThrow(RandomNumberGenerator &rng, const NameValuePairs ¶ms=g_nullNameValuePairs)
Generate a random number.
Performs modular arithmetic in Montgomery representation for increased speed.
bool HasSSE2()
Determines SSE2 availability.
Integer Minus(const Integer &b) const
Subtraction.
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Multiple precision integer with arithmetic operations.
Integer Xor(const Integer &t) const
Bitwise XOR.
static const Integer & Zero()
Integer representing 0.
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
void Grow(size_type newSize)
Change size and preserve contents.
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Class file for performing modular arithmetic.
Crypto++ library namespace.
bool GetValue(const char *name, T &value) const
Get a named value.
Integer & operator>>=(size_t n)
Right-shift Assignment.
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Integer SquareRoot() const
Extract square root.
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Integer AbsoluteValue() const
Retrieve the absolute value of this integer.
Integer & operator-=(const Integer &t)
Subtraction Assignment.
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
size_type size() const
Provides the count of elements in the SecBlock.
Integer & operator &=(const Integer &t)
Bitwise AND Assignment.
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Integer operator-() const
Subtraction.
the value is positive or 0
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
bool IsOdd() const
Determines if the Integer is odd parity.
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
Interface for retrieving values given their names.
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.