#include #include #include #include #if (defined(NTL_WIZARD_HACK) && defined(NTL_GF2X_LIB)) #undef NTL_GF2X_LIB #endif #ifdef NTL_GF2X_LIB #include #endif NTL_START_IMPL long GF2X::HexOutput = 0; void GF2X::SetMaxLength(long n) { if (n < 0) Error("GF2X::SetMaxLength: negative length"); if (NTL_OVERFLOW(n, 1, 0)) Error("GF2X::SetMaxLength: excessive length"); long w = (n + NTL_BITS_PER_LONG - 1)/NTL_BITS_PER_LONG; xrep.SetMaxLength(w); } GF2X::GF2X(INIT_SIZE_TYPE, long n) { SetMaxLength(n); } const GF2X& GF2X::zero() { static GF2X z; return z; } void GF2X::normalize() { long n; const _ntl_ulong *p; n = xrep.length(); if (n == 0) return; p = xrep.elts() + n; while (n > 0 && (*--p) == 0) { n--; } xrep.QuickSetLength(n); } long IsZero(const GF2X& a) { return a.xrep.length() == 0; } long IsOne(const GF2X& a) { return a.xrep.length() == 1 && a.xrep[0] == 1; } long IsX(const GF2X& a) { return a.xrep.length() == 1 && a.xrep[0] == 2; } GF2 coeff(const GF2X& a, long i) { if (i < 0) return to_GF2(0); long wi = i/NTL_BITS_PER_LONG; if (wi >= a.xrep.length()) return to_GF2(0); long bi = i - wi*NTL_BITS_PER_LONG; return to_GF2((a.xrep[wi] & (1UL << bi)) != 0); } GF2 LeadCoeff(const GF2X& a) { if (IsZero(a)) return to_GF2(0); else return to_GF2(1); } GF2 ConstTerm(const GF2X& a) { if (IsZero(a)) return to_GF2(0); else return to_GF2((a.xrep[0] & 1) != 0); } void set(GF2X& x) { x.xrep.SetLength(1); x.xrep[0] = 1; } void SetX(GF2X& x) { x.xrep.SetLength(1); x.xrep[0] = 2; } void SetCoeff(GF2X& x, long i) { if (i < 0) { Error("SetCoeff: negative index"); return; } long n, j; n = x.xrep.length(); long wi = i/NTL_BITS_PER_LONG; if (wi >= n) { x.xrep.SetLength(wi+1); for (j = n; j <= wi; j++) x.xrep[j] = 0; } long bi = i - wi*NTL_BITS_PER_LONG; x.xrep[wi] |= (1UL << bi); } void SetCoeff(GF2X& x, long i, long val) { if (i < 0) { Error("SetCoeff: negative index"); return; } val = val & 1; if (val) { SetCoeff(x, i); return; } // we want to clear position i long n; n = x.xrep.length(); long wi = i/NTL_BITS_PER_LONG; if (wi >= n) return; long bi = i - wi*NTL_BITS_PER_LONG; x.xrep[wi] &= ~(1UL << bi); if (wi == n-1) x.normalize(); } void SetCoeff(GF2X& x, long i, GF2 a) { SetCoeff(x, i, rep(a)); } void swap(GF2X& a, GF2X& b) { swap(a.xrep, b.xrep); } long deg(const GF2X& aa) { long n = aa.xrep.length(); if (n == 0) return -1; _ntl_ulong a = aa.xrep[n-1]; long i = 0; if (a == 0) Error("GF2X: unnormalized polynomial detected in deg"); while (a>=256) i += 8, a >>= 8; if (a >=16) i += 4, a >>= 4; if (a >= 4) i += 2, a >>= 2; if (a >= 2) i += 2; else if (a >= 1) i++; return NTL_BITS_PER_LONG*(n-1) + i - 1; } long operator==(const GF2X& a, const GF2X& b) { return a.xrep == b.xrep; } long operator==(const GF2X& a, long b) { if (b & 1) return IsOne(a); else return IsZero(a); } long operator==(const GF2X& a, GF2 b) { if (b == 1) return IsOne(a); else return IsZero(a); } void random(GF2X& x, long n) { if (n < 0) Error("GF2X random: negative length"); if (NTL_OVERFLOW(n, 1, 0)) Error("GF2X random: excessive length"); long wl = (n+NTL_BITS_PER_LONG-1)/NTL_BITS_PER_LONG; x.xrep.SetLength(wl); long i; for (i = 0; i < wl-1; i++) { x.xrep[i] = RandomWord(); } if (n > 0) { long pos = n % NTL_BITS_PER_LONG; if (pos == 0) pos = NTL_BITS_PER_LONG; x.xrep[wl-1] = RandomBits_ulong(pos); } x.normalize(); } void add(GF2X& x, const GF2X& a, const GF2X& b) { long sa = a.xrep.length(); long sb = b.xrep.length(); long i; if (sa == sb) { x.xrep.SetLength(sa); if (sa == 0) return; _ntl_ulong *xp = x.xrep.elts(); const _ntl_ulong *ap = a.xrep.elts(); const _ntl_ulong *bp = b.xrep.elts(); for (i = 0; i < sa; i++) xp[i] = ap[i] ^ bp[i]; i = sa-1; while (i >= 0 && !xp[i]) i--; x.xrep.QuickSetLength(i+1); } else if (sa < sb) { x.xrep.SetLength(sb); _ntl_ulong *xp = x.xrep.elts(); const _ntl_ulong *ap = a.xrep.elts(); const _ntl_ulong *bp = b.xrep.elts(); for (i = 0; i < sa; i++) xp[i] = ap[i] ^ bp[i]; for (; i < sb; i++) xp[i] = bp[i]; } else { // sa > sb x.xrep.SetLength(sa); _ntl_ulong *xp = x.xrep.elts(); const _ntl_ulong *ap = a.xrep.elts(); const _ntl_ulong *bp = b.xrep.elts(); for (i = 0; i < sb; i++) xp[i] = ap[i] ^ bp[i]; for (; i < sa; i++) xp[i] = ap[i]; } } /* * The bodies of mul1, Mul1, AddMul1, and mul_half * are generated by the MakeDesc program, and the * macros NTL_BB_MUL_CODE... are defined in mach_desc.h. * Thanks to Paul Zimmermann for providing improvements * to this approach. */ #if (defined(NTL_GF2X_ALTCODE1)) #define NTL_EFF_BB_MUL_CODE0 NTL_ALT1_BB_MUL_CODE0 #define NTL_EFF_BB_MUL_CODE1 NTL_ALT1_BB_MUL_CODE1 #define NTL_EFF_BB_MUL_CODE2 NTL_ALT1_BB_MUL_CODE2 #define NTL_EFF_SHORT_BB_MUL_CODE1 NTL_ALT1_SHORT_BB_MUL_CODE1 #define NTL_EFF_HALF_BB_MUL_CODE0 NTL_ALT1_HALF_BB_MUL_CODE0 #elif (defined(NTL_GF2X_ALTCODE)) #define NTL_EFF_BB_MUL_CODE0 NTL_ALT_BB_MUL_CODE0 #define NTL_EFF_BB_MUL_CODE1 NTL_ALT_BB_MUL_CODE1 #define NTL_EFF_BB_MUL_CODE2 NTL_ALT_BB_MUL_CODE2 #define NTL_EFF_SHORT_BB_MUL_CODE1 NTL_ALT_SHORT_BB_MUL_CODE1 #define NTL_EFF_HALF_BB_MUL_CODE0 NTL_ALT_HALF_BB_MUL_CODE0 #else #define NTL_EFF_BB_MUL_CODE0 NTL_BB_MUL_CODE0 #define NTL_EFF_BB_MUL_CODE1 NTL_BB_MUL_CODE1 #define NTL_EFF_BB_MUL_CODE2 NTL_BB_MUL_CODE2 #define NTL_EFF_SHORT_BB_MUL_CODE1 NTL_SHORT_BB_MUL_CODE1 #define NTL_EFF_HALF_BB_MUL_CODE0 NTL_HALF_BB_MUL_CODE0 #endif static void mul1(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b) { NTL_EFF_BB_MUL_CODE0 } #ifdef NTL_GF2X_NOINLINE #define mul1_IL mul1 #else static inline void mul1_inline(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b) { NTL_EFF_BB_MUL_CODE0 } #define mul1_IL mul1_inline #endif static void Mul1(_ntl_ulong *cp, const _ntl_ulong *bp, long sb, _ntl_ulong a) { NTL_EFF_BB_MUL_CODE1 } static void AddMul1(_ntl_ulong *cp, const _ntl_ulong* bp, long sb, _ntl_ulong a) { NTL_EFF_BB_MUL_CODE2 } static void Mul1_short(_ntl_ulong *cp, const _ntl_ulong *bp, long sb, _ntl_ulong a) { NTL_EFF_SHORT_BB_MUL_CODE1 } static void mul_half(_ntl_ulong *c, _ntl_ulong a, _ntl_ulong b) { NTL_EFF_HALF_BB_MUL_CODE0 } // mul2...mul8 hard-code 2x2...8x8 word multiplies. // I adapted these routines from LiDIA (except mul3, see below). // Inlining these seems to hurt, not help. static void mul2(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) { _ntl_ulong hs0, hs1; _ntl_ulong hl2[2]; hs0 = a[0] ^ a[1]; hs1 = b[0] ^ b[1]; mul1_IL(c, a[0], b[0]); mul1_IL(c+2, a[1], b[1]); mul1_IL(hl2, hs0, hs1); hl2[0] = hl2[0] ^ c[0] ^ c[2]; hl2[1] = hl2[1] ^ c[1] ^ c[3]; c[1] ^= hl2[0]; c[2] ^= hl2[1]; } /* * This version of mul3 I got from Weimerskirch, Stebila, * and Shantz, "Generic GF(2^m) arithmetic in software * an its application to ECC" (ACISP 2003). */ static void mul3 (_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) { _ntl_ulong d0[2], d1[2], d2[2], d01[2], d02[2], d12[2]; mul1_IL(d0, a[0], b[0]); mul1_IL(d1, a[1], b[1]); mul1_IL(d2, a[2], b[2]); mul1_IL(d01, a[0]^a[1], b[0]^b[1]); mul1_IL(d02, a[0]^a[2], b[0]^b[2]); mul1_IL(d12, a[1]^a[2], b[1]^b[2]); c[0] = d0[0]; c[1] = d0[1] ^ d01[0] ^ d1[0] ^ d0[0]; c[2] = d01[1] ^ d1[1] ^ d0[1] ^ d02[0] ^ d2[0] ^ d0[0] ^ d1[0]; c[3] = d02[1] ^ d2[1] ^ d0[1] ^ d1[1] ^ d12[0] ^ d1[0] ^ d2[0]; c[4] = d12[1] ^ d1[1] ^ d2[1] ^ d2[0]; c[5] = d2[1]; } static void mul4(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) { _ntl_ulong hs0[2], hs1[2]; _ntl_ulong hl2[4]; hs0[0] = a[0] ^ a[2]; hs0[1] = a[1] ^ a[3]; hs1[0] = b[0] ^ b[2]; hs1[1] = b[1] ^ b[3]; mul2(c, a, b); mul2(c+4, a+2, b+2); mul2(hl2, hs0, hs1); hl2[0] = hl2[0] ^ c[0] ^ c[4]; hl2[1] = hl2[1] ^ c[1] ^ c[5]; hl2[2] = hl2[2] ^ c[2] ^ c[6]; hl2[3] = hl2[3] ^ c[3] ^ c[7]; c[2] ^= hl2[0]; c[3] ^= hl2[1]; c[4] ^= hl2[2]; c[5] ^= hl2[3]; } static void mul5 (_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) { _ntl_ulong hs0[3], hs1[3]; _ntl_ulong hl2[6]; hs0[0] = a[0] ^ a[3]; hs0[1] = a[1] ^ a[4]; hs0[2] = a[2]; hs1[0] = b[0] ^ b[3]; hs1[1] = b[1] ^ b[4]; hs1[2] = b[2]; mul3(c, a, b); mul3(hl2, hs0, hs1); mul2(c+6, a+3, b+3); hl2[0] = hl2[0] ^ c[0] ^ c[6]; hl2[1] = hl2[1] ^ c[1] ^ c[7]; hl2[2] = hl2[2] ^ c[2] ^ c[8]; hl2[3] = hl2[3] ^ c[3] ^ c[9]; hl2[4] = hl2[4] ^ c[4]; hl2[5] = hl2[5] ^ c[5]; c[3] ^= hl2[0]; c[4] ^= hl2[1]; c[5] ^= hl2[2]; c[6] ^= hl2[3]; c[7] ^= hl2[4]; c[8] ^= hl2[5]; } static void mul6(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) { _ntl_ulong hs0[3], hs1[3]; _ntl_ulong hl2[6]; hs0[0] = a[0] ^ a[3]; hs0[1] = a[1] ^ a[4]; hs0[2] = a[2] ^ a[5]; hs1[0] = b[0] ^ b[3]; hs1[1] = b[1] ^ b[4]; hs1[2] = b[2] ^ b[5]; mul3(c, a, b); mul3(c+6, a+3, b+3); mul3(hl2, hs0, hs1); hl2[0] = hl2[0] ^ c[0] ^ c[6]; hl2[1] = hl2[1] ^ c[1] ^ c[7]; hl2[2] = hl2[2] ^ c[2] ^ c[8]; hl2[3] = hl2[3] ^ c[3] ^ c[9]; hl2[4] = hl2[4] ^ c[4] ^ c[10]; hl2[5] = hl2[5] ^ c[5] ^ c[11]; c[3] ^= hl2[0]; c[4] ^= hl2[1]; c[5] ^= hl2[2]; c[6] ^= hl2[3]; c[7] ^= hl2[4]; c[8] ^= hl2[5]; } static void mul7(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) { _ntl_ulong hs0[4], hs1[4]; _ntl_ulong hl2[8]; hs0[0] = a[0] ^ a[4]; hs0[1] = a[1] ^ a[5]; hs0[2] = a[2] ^ a[6]; hs0[3] = a[3]; hs1[0] = b[0] ^ b[4]; hs1[1] = b[1] ^ b[5]; hs1[2] = b[2] ^ b[6]; hs1[3] = b[3]; mul4(c, a, b); mul4(hl2, hs0, hs1); mul3(c+8, a+4, b+4); hl2[0] = hl2[0] ^ c[0] ^ c[8]; hl2[1] = hl2[1] ^ c[1] ^ c[9]; hl2[2] = hl2[2] ^ c[2] ^ c[10]; hl2[3] = hl2[3] ^ c[3] ^ c[11]; hl2[4] = hl2[4] ^ c[4] ^ c[12]; hl2[5] = hl2[5] ^ c[5] ^ c[13]; hl2[6] = hl2[6] ^ c[6]; hl2[7] = hl2[7] ^ c[7]; c[4] ^= hl2[0]; c[5] ^= hl2[1]; c[6] ^= hl2[2]; c[7] ^= hl2[3]; c[8] ^= hl2[4]; c[9] ^= hl2[5]; c[10] ^= hl2[6]; c[11] ^= hl2[7]; } static void mul8(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b) { _ntl_ulong hs0[4], hs1[4]; _ntl_ulong hl2[8]; hs0[0] = a[0] ^ a[4]; hs0[1] = a[1] ^ a[5]; hs0[2] = a[2] ^ a[6]; hs0[3] = a[3] ^ a[7]; hs1[0] = b[0] ^ b[4]; hs1[1] = b[1] ^ b[5]; hs1[2] = b[2] ^ b[6]; hs1[3] = b[3] ^ b[7]; mul4(c, a, b); mul4(c+8, a+4, b+4); mul4(hl2, hs0, hs1); hl2[0] = hl2[0] ^ c[0] ^ c[8]; hl2[1] = hl2[1] ^ c[1] ^ c[9]; hl2[2] = hl2[2] ^ c[2] ^ c[10]; hl2[3] = hl2[3] ^ c[3] ^ c[11]; hl2[4] = hl2[4] ^ c[4] ^ c[12]; hl2[5] = hl2[5] ^ c[5] ^ c[13]; hl2[6] = hl2[6] ^ c[6] ^ c[14]; hl2[7] = hl2[7] ^ c[7] ^ c[15]; c[4] ^= hl2[0]; c[5] ^= hl2[1]; c[6] ^= hl2[2]; c[7] ^= hl2[3]; c[8] ^= hl2[4]; c[9] ^= hl2[5]; c[10] ^= hl2[6]; c[11] ^= hl2[7]; } static void KarMul(_ntl_ulong *c, const _ntl_ulong *a, const _ntl_ulong *b, long len, _ntl_ulong *stk) { if (len <= 8) { switch (len) { case 1: mul1(c, a[0], b[0]); break; case 2: mul2(c, a, b); break; case 3: mul3(c, a, b); break; case 4: mul4(c, a, b); break; case 5: mul5(c, a, b); break; case 6: mul6(c, a, b); break; case 7: mul7(c, a, b); break; case 8: mul8(c, a, b); break; } return; } long ll, lh, i, ll2, lh2; const _ntl_ulong *a0, *a1, *b0, *b1; _ntl_ulong *a01, *b01, *h; lh = len >> 1; ll = (len+1) >> 1; ll2 = ll << 1; lh2 = lh << 1; a01 = stk; stk += ll+1; b01 = stk; stk += ll+1; h = stk; stk += ll2+1; a0 = a; a1 = a+ll; b0 = b; b1 = b+ll; KarMul(c, a0, b0, ll, stk); KarMul(c+ll2, a1, b1, lh, stk); for (i = 0; i < lh; i++) { a01[i] = a[i] ^ a[i+ll]; b01[i] = b[i] ^ b[i+ll]; } if (lh < ll) { a01[lh] = a[lh]; b01[lh] = b[lh]; } KarMul(h, a01, b01, ll, stk); for (i = 0; i < ll2; i++) h[i] ^= c[i]; for (i = 0; i < lh2; i++) h[i] ^= c[i+ll2]; for (i = 0; i < ll2; i++) c[i+ll] ^= h[i]; } #ifdef NTL_GF2X_LIB void mul(GF2X& c, const GF2X& a, const GF2X& b) { long sa = a.xrep.length(); long sb = b.xrep.length(); if (sa <= 0 || sb <= 0) { clear(c); return; } _ntl_ulong a0 = a.xrep[0]; _ntl_ulong b0 = b.xrep[0]; if (sb == 1 && b0 == 1) { c = a; return; } if (sa == 1 && a0 == 1) { c = b; return; } if (&a == &b) { sqr(c, a); return; } if (sa == 1 && sb == 1) { // special case... _ntl_ulong v[2]; if (!(a0 >> NTL_BITS_PER_LONG/2)) mul_half(v, b0, a0); else if (!(b0 >> NTL_BITS_PER_LONG/2)) mul_half(v, a0, b0); else mul1(v, a0, b0); if (v[1]) { c.xrep.SetLength(2); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; } else { c.xrep.SetLength(1); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; } return; } if (sa == 2 && sb == 2) { // special case... _ntl_ulong v[4]; mul2(v, &a.xrep[0], &b.xrep[0]); if (v[3]) { c.xrep.SetLength(4); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; } else { c.xrep.SetLength(3); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; } return; } // another special case: one of the two inputs // has length 1 (or less). if (sa == 1) { c.xrep.SetLength(sb + 1); _ntl_ulong *cp = c.xrep.elts(); const _ntl_ulong *bp = b.xrep.elts(); if (a0 >> (NTL_BITS_PER_LONG-NTL_BB_MUL1_BITS+1)) Mul1(cp, bp, sb, a0); else Mul1_short(cp, bp, sb, a0); c.normalize(); return; } if (sb == 1) { c.xrep.SetLength(sa + 1); _ntl_ulong *cp = c.xrep.elts(); const _ntl_ulong *ap = a.xrep.elts(); if (b0 >> (NTL_BITS_PER_LONG-NTL_BB_MUL1_BITS+1)) Mul1(cp, ap, sa, b0); else Mul1_short(cp, ap, sa, b0); c.normalize(); return; } // finally: the general case WordVector mem; const _ntl_ulong *ap = a.xrep.elts(), *bp = b.xrep.elts(); _ntl_ulong *cp; long sc = sa + sb; long in_mem = 0; if (&a == &c || &b == &c) { mem.SetLength(sc); cp = mem.elts(); in_mem = 1; } else { c.xrep.SetLength(sc); cp = c.xrep.elts(); } gf2x_mul(cp, ap, sa, bp, sb); if (in_mem) { c.xrep = mem; } c.normalize(); mem.release(); } #else void OldMul(GF2X& c, const GF2X& a, const GF2X& b) { mul(c, a, b); } #endif #ifdef NTL_GF2X_LIB void OldMul(GF2X& c, const GF2X& a, const GF2X& b) #else void mul(GF2X& c, const GF2X& a, const GF2X& b) #endif { long sa = a.xrep.length(); long sb = b.xrep.length(); if (sa <= 0 || sb <= 0) { clear(c); return; } _ntl_ulong a0 = a.xrep[0]; _ntl_ulong b0 = b.xrep[0]; if (sb == 1 && b0 == 1) { c = a; return; } if (sa == 1 && a0 == 1) { c = b; return; } if (&a == &b) { sqr(c, a); return; } if (sa == sb && sa <= 8) { // we treat these cases specially for efficiency reasons switch (sa) { case 1: { _ntl_ulong v[2]; if (!(a0 >> NTL_BITS_PER_LONG/2)) mul_half(v, b0, a0); else if (!(b0 >> NTL_BITS_PER_LONG/2)) mul_half(v, a0, b0); else mul1(v, a0, b0); if (v[1]) { c.xrep.SetLength(2); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; } else { c.xrep.SetLength(1); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; } } return; case 2: { _ntl_ulong v[4]; mul2(v, &a.xrep[0], &b.xrep[0]); if (v[3]) { c.xrep.SetLength(4); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; } else { c.xrep.SetLength(3); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; } } return; case 3: { _ntl_ulong v[6]; mul3(v, &a.xrep[0], &b.xrep[0]); if (v[5]) { c.xrep.SetLength(6); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; } else { c.xrep.SetLength(5); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; } } return; case 4: { _ntl_ulong v[8]; mul4(v, &a.xrep[0], &b.xrep[0]); if (v[7]) { c.xrep.SetLength(8); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; cp[6] = v[6]; cp[7] = v[7]; } else { c.xrep.SetLength(7); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; cp[6] = v[6]; } } return; case 5: { _ntl_ulong v[10]; mul5(v, &a.xrep[0], &b.xrep[0]); if (v[9]) { c.xrep.SetLength(10); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; cp[6] = v[6]; cp[7] = v[7]; cp[8] = v[8]; cp[9] = v[9]; } else { c.xrep.SetLength(9); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; cp[6] = v[6]; cp[7] = v[7]; cp[8] = v[8]; } } return; case 6: { _ntl_ulong v[12]; mul6(v, &a.xrep[0], &b.xrep[0]); if (v[11]) { c.xrep.SetLength(12); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; cp[6] = v[6]; cp[7] = v[7]; cp[8] = v[8]; cp[9] = v[9]; cp[10] = v[10]; cp[11] = v[11]; } else { c.xrep.SetLength(11); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; cp[6] = v[6]; cp[7] = v[7]; cp[8] = v[8]; cp[9] = v[9]; cp[10] = v[10]; } } return; case 7: { _ntl_ulong v[14]; mul7(v, &a.xrep[0], &b.xrep[0]); if (v[13]) { c.xrep.SetLength(14); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; cp[6] = v[6]; cp[7] = v[7]; cp[8] = v[8]; cp[9] = v[9]; cp[10] = v[10]; cp[11] = v[11]; cp[12] = v[12]; cp[13] = v[13]; } else { c.xrep.SetLength(13); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; cp[6] = v[6]; cp[7] = v[7]; cp[8] = v[8]; cp[9] = v[9]; cp[10] = v[10]; cp[11] = v[11]; cp[12] = v[12]; } } return; case 8: { _ntl_ulong v[16]; mul8(v, &a.xrep[0], &b.xrep[0]); if (v[15]) { c.xrep.SetLength(16); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; cp[6] = v[6]; cp[7] = v[7]; cp[8] = v[8]; cp[9] = v[9]; cp[10] = v[10]; cp[11] = v[11]; cp[12] = v[12]; cp[13] = v[13]; cp[14] = v[14]; cp[15] = v[15]; } else { c.xrep.SetLength(15); _ntl_ulong *cp = &c.xrep[0]; cp[0] = v[0]; cp[1] = v[1]; cp[2] = v[2]; cp[3] = v[3]; cp[4] = v[4]; cp[5] = v[5]; cp[6] = v[6]; cp[7] = v[7]; cp[8] = v[8]; cp[9] = v[9]; cp[10] = v[10]; cp[11] = v[11]; cp[12] = v[12]; cp[13] = v[13]; cp[14] = v[14]; } } return; } } // another special case: one of the two inputs // has length 1 (or less). if (sa == 1) { c.xrep.SetLength(sb + 1); _ntl_ulong *cp = c.xrep.elts(); const _ntl_ulong *bp = b.xrep.elts(); if (a0 >> (NTL_BITS_PER_LONG-NTL_BB_MUL1_BITS+1)) Mul1(cp, bp, sb, a0); else Mul1_short(cp, bp, sb, a0); c.normalize(); return; } if (sb == 1) { c.xrep.SetLength(sa + 1); _ntl_ulong *cp = c.xrep.elts(); const _ntl_ulong *ap = a.xrep.elts(); if (b0 >> (NTL_BITS_PER_LONG-NTL_BB_MUL1_BITS+1)) Mul1(cp, ap, sa, b0); else Mul1_short(cp, ap, sa, b0); c.normalize(); return; } // finally: the general case static WordVector mem; static WordVector stk; static WordVector vec; const _ntl_ulong *ap, *bp; _ntl_ulong *cp; long sc = sa + sb; long in_mem = 0; if (&a == &c || &b == &c) { mem.SetLength(sc); cp = mem.elts(); in_mem = 1; } else { c.xrep.SetLength(sc); cp = c.xrep.elts(); } long n, hn, sp; n = min(sa, sb); sp = 0; while (n > 8) { hn = (n+1) >> 1; sp += (hn << 2) + 3; n = hn; } stk.SetLength(sp); _ntl_ulong *stk_p = stk.elts(); if (sa > sb) { { long t; t = sa; sa = sb; sb = t; } ap = b.xrep.elts(); bp = a.xrep.elts(); } else { ap = a.xrep.elts(); bp = b.xrep.elts(); } vec.SetLength(2*sa); _ntl_ulong *v = vec.elts(); long i, j; for (i = 0; i < sc; i++) cp[i] = 0; do { if (sa == 0) break; if (sa == 1) { AddMul1(cp, bp, sb, ap[0]); break; } // general case for (i = 0; i+sa <= sb; i += sa) { KarMul(v, ap, bp + i, sa, stk_p); for (j = 0; j < 2*sa; j++) cp[i+j] ^= v[j]; } { const _ntl_ulong *t; t = ap; ap = bp + i; bp = t; } { long t; t = sa; sa = sb - i; sb = t; } cp = cp + i; } while (1); if (in_mem) c.xrep = mem; c.normalize(); mem.release(); stk.release(); vec.release(); } void mul(GF2X& c, const GF2X& a, long b) { if (b & 1) c = a; else clear(c); } void mul(GF2X& c, const GF2X& a, GF2 b) { if (b == 1) c = a; else clear(c); } void trunc(GF2X& x, const GF2X& a, long m) { if (m < 0) Error("trunc: bad args"); long n = a.xrep.length(); if (n == 0 || m == 0) { clear(x); return; } if (&x == &a) { if (n*NTL_BITS_PER_LONG > m) { long wm = (m-1)/NTL_BITS_PER_LONG; long bm = m - NTL_BITS_PER_LONG*wm; _ntl_ulong msk; if (bm == NTL_BITS_PER_LONG) msk = ~(0UL); else msk = ((1UL << bm) - 1UL); x.xrep[wm] &= msk; x.xrep.QuickSetLength(wm+1); x.normalize(); } } else if (n*NTL_BITS_PER_LONG <= m) x = a; else { long wm = (m-1)/NTL_BITS_PER_LONG; long bm = m - NTL_BITS_PER_LONG*wm; x.xrep.SetLength(wm+1); _ntl_ulong *xp = &x.xrep[0]; const _ntl_ulong *ap = &a.xrep[0]; long i; for (i = 0; i < wm; i++) xp[i] = ap[i]; _ntl_ulong msk; if (bm == NTL_BITS_PER_LONG) msk = ~(0UL); else msk = ((1UL << bm) - 1UL); xp[wm] = ap[wm] & msk; x.normalize(); } } /****** implementation of vec_GF2X ******/ NTL_vector_impl(GF2X,vec_GF2X) NTL_eq_vector_impl(GF2X,vec_GF2X) void MulByX(GF2X& x, const GF2X& a) { long n = a.xrep.length(); if (n == 0) { clear(x); return; } if (a.xrep[n-1] & (1UL << (NTL_BITS_PER_LONG-1))) { x.xrep.SetLength(n+1); x.xrep[n] = 1; } else if (&x != &a) x.xrep.SetLength(n); _ntl_ulong *xp = &x.xrep[0]; const _ntl_ulong *ap = &a.xrep[0]; long i; for (i = n-1; i > 0; i--) xp[i] = (ap[i] << 1) | (ap[i-1] >> (NTL_BITS_PER_LONG-1)); xp[0] = ap[0] << 1; // no need to normalize } static _ntl_ulong sqrtab[256] = { 0UL, 1UL, 4UL, 5UL, 16UL, 17UL, 20UL, 21UL, 64UL, 65UL, 68UL, 69UL, 80UL, 81UL, 84UL, 85UL, 256UL, 257UL, 260UL, 261UL, 272UL, 273UL, 276UL, 277UL, 320UL, 321UL, 324UL, 325UL, 336UL, 337UL, 340UL, 341UL, 1024UL, 1025UL, 1028UL, 1029UL, 1040UL, 1041UL, 1044UL, 1045UL, 1088UL, 1089UL, 1092UL, 1093UL, 1104UL, 1105UL, 1108UL, 1109UL, 1280UL, 1281UL, 1284UL, 1285UL, 1296UL, 1297UL, 1300UL, 1301UL, 1344UL, 1345UL, 1348UL, 1349UL, 1360UL, 1361UL, 1364UL, 1365UL, 4096UL, 4097UL, 4100UL, 4101UL, 4112UL, 4113UL, 4116UL, 4117UL, 4160UL, 4161UL, 4164UL, 4165UL, 4176UL, 4177UL, 4180UL, 4181UL, 4352UL, 4353UL, 4356UL, 4357UL, 4368UL, 4369UL, 4372UL, 4373UL, 4416UL, 4417UL, 4420UL, 4421UL, 4432UL, 4433UL, 4436UL, 4437UL, 5120UL, 5121UL, 5124UL, 5125UL, 5136UL, 5137UL, 5140UL, 5141UL, 5184UL, 5185UL, 5188UL, 5189UL, 5200UL, 5201UL, 5204UL, 5205UL, 5376UL, 5377UL, 5380UL, 5381UL, 5392UL, 5393UL, 5396UL, 5397UL, 5440UL, 5441UL, 5444UL, 5445UL, 5456UL, 5457UL, 5460UL, 5461UL, 16384UL, 16385UL, 16388UL, 16389UL, 16400UL, 16401UL, 16404UL, 16405UL, 16448UL, 16449UL, 16452UL, 16453UL, 16464UL, 16465UL, 16468UL, 16469UL, 16640UL, 16641UL, 16644UL, 16645UL, 16656UL, 16657UL, 16660UL, 16661UL, 16704UL, 16705UL, 16708UL, 16709UL, 16720UL, 16721UL, 16724UL, 16725UL, 17408UL, 17409UL, 17412UL, 17413UL, 17424UL, 17425UL, 17428UL, 17429UL, 17472UL, 17473UL, 17476UL, 17477UL, 17488UL, 17489UL, 17492UL, 17493UL, 17664UL, 17665UL, 17668UL, 17669UL, 17680UL, 17681UL, 17684UL, 17685UL, 17728UL, 17729UL, 17732UL, 17733UL, 17744UL, 17745UL, 17748UL, 17749UL, 20480UL, 20481UL, 20484UL, 20485UL, 20496UL, 20497UL, 20500UL, 20501UL, 20544UL, 20545UL, 20548UL, 20549UL, 20560UL, 20561UL, 20564UL, 20565UL, 20736UL, 20737UL, 20740UL, 20741UL, 20752UL, 20753UL, 20756UL, 20757UL, 20800UL, 20801UL, 20804UL, 20805UL, 20816UL, 20817UL, 20820UL, 20821UL, 21504UL, 21505UL, 21508UL, 21509UL, 21520UL, 21521UL, 21524UL, 21525UL, 21568UL, 21569UL, 21572UL, 21573UL, 21584UL, 21585UL, 21588UL, 21589UL, 21760UL, 21761UL, 21764UL, 21765UL, 21776UL, 21777UL, 21780UL, 21781UL, 21824UL, 21825UL, 21828UL, 21829UL, 21840UL, 21841UL, 21844UL, 21845UL }; static inline void sqr1(_ntl_ulong *c, _ntl_ulong a) { _ntl_ulong hi, lo; NTL_BB_SQR_CODE c[0] = lo; c[1] = hi; } void sqr(GF2X& c, const GF2X& a) { long sa = a.xrep.length(); if (sa <= 0) { clear(c); return; } c.xrep.SetLength(sa << 1); _ntl_ulong *cp = c.xrep.elts(); const _ntl_ulong *ap = a.xrep.elts(); long i; for (i = sa-1; i >= 0; i--) sqr1(cp + (i << 1), ap[i]); c.normalize(); return; } void LeftShift(GF2X& c, const GF2X& a, long n) { if (IsZero(a)) { clear(c); return; } if (n == 1) { MulByX(c, a); return; } if (n < 0) { if (n < -NTL_MAX_LONG) clear(c); else RightShift(c, a, -n); return; } if (NTL_OVERFLOW(n, 1, 0)) Error("overflow in LeftShift"); if (n == 0) { c = a; return; } long sa = a.xrep.length(); long wn = n/NTL_BITS_PER_LONG; long bn = n - wn*NTL_BITS_PER_LONG; long sc = sa + wn; if (bn) sc++; c.xrep.SetLength(sc); _ntl_ulong *cp = c.xrep.elts(); const _ntl_ulong *ap = a.xrep.elts(); long i; if (bn == 0) { for (i = sa+wn-1; i >= wn; i--) cp[i] = ap[i-wn]; for (i = wn-1; i >= 0; i--) cp[i] = 0; } else { cp[sa+wn] = ap[sa-1] >> (NTL_BITS_PER_LONG-bn); for (i = sa+wn-1; i >= wn+1; i--) cp[i] = (ap[i-wn] << bn) | (ap[i-wn-1] >> (NTL_BITS_PER_LONG-bn)); cp[wn] = ap[0] << bn; for (i = wn-1; i >= 0; i--) cp[i] = 0; } c.normalize(); } void ShiftAdd(GF2X& c, const GF2X& a, long n) // c = c + a*X^n { if (n < 0) Error("ShiftAdd: negative argument"); if (n == 0) { add(c, c, a); return; } if (NTL_OVERFLOW(n, 1, 0)) Error("overflow in ShiftAdd"); long sa = a.xrep.length(); if (sa <= 0) { return; } long sc = c.xrep.length(); long wn = n/NTL_BITS_PER_LONG; long bn = n - wn*NTL_BITS_PER_LONG; long ss = sa + wn; if (bn) ss++; if (ss > sc) c.xrep.SetLength(ss); _ntl_ulong *cp = c.xrep.elts(); const _ntl_ulong *ap = a.xrep.elts(); long i; for (i = sc; i < ss; i++) cp[i] = 0; if (bn == 0) { for (i = sa+wn-1; i >= wn; i--) cp[i] ^= ap[i-wn]; } else { cp[sa+wn] ^= ap[sa-1] >> (NTL_BITS_PER_LONG-bn); for (i = sa+wn-1; i >= wn+1; i--) cp[i] ^= (ap[i-wn] << bn) | (ap[i-wn-1] >> (NTL_BITS_PER_LONG-bn)); cp[wn] ^= ap[0] << bn; } c.normalize(); } void RightShift(GF2X& c, const GF2X& a, long n) { if (IsZero(a)) { clear(c); return; } if (n < 0) { if (n < -NTL_MAX_LONG) Error("overflow in RightShift"); LeftShift(c, a, -n); return; } if (n == 0) { c = a; return; } long sa = a.xrep.length(); long wn = n/NTL_BITS_PER_LONG; long bn = n - wn*NTL_BITS_PER_LONG; if (wn >= sa) { clear(c); return; } c.xrep.SetLength(sa-wn); _ntl_ulong *cp = c.xrep.elts(); const _ntl_ulong *ap = a.xrep.elts(); long i; if (bn == 0) { for (i = 0; i < sa-wn; i++) cp[i] = ap[i+wn]; } else { for (i = 0; i < sa-wn-1; i++) cp[i] = (ap[i+wn] >> bn) | (ap[i+wn+1] << (NTL_BITS_PER_LONG - bn)); cp[sa-wn-1] = ap[sa-1] >> bn; } c.normalize(); } static _ntl_ulong revtab[256] = { 0UL, 128UL, 64UL, 192UL, 32UL, 160UL, 96UL, 224UL, 16UL, 144UL, 80UL, 208UL, 48UL, 176UL, 112UL, 240UL, 8UL, 136UL, 72UL, 200UL, 40UL, 168UL, 104UL, 232UL, 24UL, 152UL, 88UL, 216UL, 56UL, 184UL, 120UL, 248UL, 4UL, 132UL, 68UL, 196UL, 36UL, 164UL, 100UL, 228UL, 20UL, 148UL, 84UL, 212UL, 52UL, 180UL, 116UL, 244UL, 12UL, 140UL, 76UL, 204UL, 44UL, 172UL, 108UL, 236UL, 28UL, 156UL, 92UL, 220UL, 60UL, 188UL, 124UL, 252UL, 2UL, 130UL, 66UL, 194UL, 34UL, 162UL, 98UL, 226UL, 18UL, 146UL, 82UL, 210UL, 50UL, 178UL, 114UL, 242UL, 10UL, 138UL, 74UL, 202UL, 42UL, 170UL, 106UL, 234UL, 26UL, 154UL, 90UL, 218UL, 58UL, 186UL, 122UL, 250UL, 6UL, 134UL, 70UL, 198UL, 38UL, 166UL, 102UL, 230UL, 22UL, 150UL, 86UL, 214UL, 54UL, 182UL, 118UL, 246UL, 14UL, 142UL, 78UL, 206UL, 46UL, 174UL, 110UL, 238UL, 30UL, 158UL, 94UL, 222UL, 62UL, 190UL, 126UL, 254UL, 1UL, 129UL, 65UL, 193UL, 33UL, 161UL, 97UL, 225UL, 17UL, 145UL, 81UL, 209UL, 49UL, 177UL, 113UL, 241UL, 9UL, 137UL, 73UL, 201UL, 41UL, 169UL, 105UL, 233UL, 25UL, 153UL, 89UL, 217UL, 57UL, 185UL, 121UL, 249UL, 5UL, 133UL, 69UL, 197UL, 37UL, 165UL, 101UL, 229UL, 21UL, 149UL, 85UL, 213UL, 53UL, 181UL, 117UL, 245UL, 13UL, 141UL, 77UL, 205UL, 45UL, 173UL, 109UL, 237UL, 29UL, 157UL, 93UL, 221UL, 61UL, 189UL, 125UL, 253UL, 3UL, 131UL, 67UL, 195UL, 35UL, 163UL, 99UL, 227UL, 19UL, 147UL, 83UL, 211UL, 51UL, 179UL, 115UL, 243UL, 11UL, 139UL, 75UL, 203UL, 43UL, 171UL, 107UL, 235UL, 27UL, 155UL, 91UL, 219UL, 59UL, 187UL, 123UL, 251UL, 7UL, 135UL, 71UL, 199UL, 39UL, 167UL, 103UL, 231UL, 23UL, 151UL, 87UL, 215UL, 55UL, 183UL, 119UL, 247UL, 15UL, 143UL, 79UL, 207UL, 47UL, 175UL, 111UL, 239UL, 31UL, 159UL, 95UL, 223UL, 63UL, 191UL, 127UL, 255UL }; static inline _ntl_ulong rev1(_ntl_ulong a) { return NTL_BB_REV_CODE; } void CopyReverse(GF2X& c, const GF2X& a, long hi) // c[0..hi] = reverse(a[0..hi]), with zero fill as necessary // input may alias output { if (hi < 0) { clear(c); return; } if (NTL_OVERFLOW(hi, 1, 0)) Error("overflow in CopyReverse"); long n = hi+1; long sa = a.xrep.length(); if (n <= 0 || sa <= 0) { clear(c); return; } long wn = n/NTL_BITS_PER_LONG; long bn = n - wn*NTL_BITS_PER_LONG; if (bn != 0) { wn++; bn = NTL_BITS_PER_LONG - bn; } c.xrep.SetLength(wn); _ntl_ulong *cp = c.xrep.elts(); const _ntl_ulong *ap = a.xrep.elts(); long mm = min(sa, wn); long i; for (i = 0; i < mm; i++) cp[i] = ap[i]; for (i = mm; i < wn; i++) cp[i] = 0; if (bn != 0) { for (i = wn-1; i >= 1; i--) cp[i] = (cp[i] << bn) | (cp[i-1] >> (NTL_BITS_PER_LONG-bn)); cp[0] = cp[0] << bn; } for (i = 0; i < wn/2; i++) { _ntl_ulong t; t = cp[i]; cp[i] = cp[wn-1-i]; cp[wn-1-i] = t; } for (i = 0; i < wn; i++) cp[i] = rev1(cp[i]); c.normalize(); } void div(GF2X& q, const GF2X& a, GF2 b) { if (b == 0) Error("div: division by zero"); q = a; } void div(GF2X& q, const GF2X& a, long b) { if ((b & 1) == 0) Error("div: division by zero"); q = a; } void GF2XFromBytes(GF2X& x, const unsigned char *p, long n) { if (n <= 0) { x = 0; return; } const long BytesPerLong = NTL_BITS_PER_LONG/8; long lw, r, i, j; lw = n/BytesPerLong; r = n - lw*BytesPerLong; if (r != 0) lw++; else r = BytesPerLong; x.xrep.SetLength(lw); unsigned long *xp = x.xrep.elts(); for (i = 0; i < lw-1; i++) { unsigned long t = 0; for (j = 0; j < BytesPerLong; j++) { t >>= 8; t += (((unsigned long)(*p)) & 255UL) << ((BytesPerLong-1)*8); p++; } xp[i] = t; } unsigned long t = 0; for (j = 0; j < r; j++) { t >>= 8; t += (((unsigned long)(*p)) & 255UL) << ((BytesPerLong-1)*8); p++; } t >>= (BytesPerLong-r)*8; xp[lw-1] = t; x.normalize(); } void BytesFromGF2X(unsigned char *p, const GF2X& a, long n) { if (n < 0) n = 0; const long BytesPerLong = NTL_BITS_PER_LONG/8; long lbits = deg(a) + 1; long lbytes = (lbits+7)/8; long min_bytes = min(lbytes, n); long min_words = min_bytes/BytesPerLong; long r = min_bytes - min_words*BytesPerLong; if (r != 0) min_words++; else r = BytesPerLong; const unsigned long *ap = a.xrep.elts(); long i, j; for (i = 0; i < min_words-1; i++) { unsigned long t = ap[i]; for (j = 0; j < BytesPerLong; j++) { *p = t & 255UL; t >>= 8; p++; } } if (min_words > 0) { unsigned long t = ap[min_words-1]; for (j = 0; j < r; j++) { *p = t & 255UL; t >>= 8; p++; } } for (j = min_bytes; j < n; j++) { *p = 0; p++; } } NTL_END_IMPL