Context Navigation

source: git/ntl/src/ZZX1.c @ 287cc8

spielwiese

Last change on this file since 287cc8 was 287cc8, checked in by Hans Schönemann <hannes@…>, 14 years ago
ntl 5.5.2 git-svn-id: file:///usr/local/Singular/svn/trunk@12402 2c84dea3-7e68-4137-9b89-c4e89433aadc
Property mode set to `100644`
File size: 40.8 KB

Line
1
2
3	#include <NTL/ZZX.h>
4
5	#include <NTL/new.h>
6
7	NTL_START_IMPL
8
9
10
11
12
13	void conv(zz_pX& x, const ZZX& a)
14	{
15	conv(x.rep, a.rep);
16	x.normalize();
17	}
18
19
20	void conv(ZZX& x, const zz_pX& a)
21	{
22	conv(x.rep, a.rep);
23	x.normalize();
24	}
25
26
27	long CRT(ZZX& gg, ZZ& a, const zz_pX& G)
28	{
29	long n = gg.rep.length();
30
31	long p = zz_p::modulus();
32
33	ZZ new_a;
34	mul(new_a, a, p);
35
36	long a_inv;
37	a_inv = rem(a, p);
38	a_inv = InvMod(a_inv, p);
39
40	long p1;
41	p1 = p >> 1;
42
43	ZZ a1;
44	RightShift(a1, a, 1);
45
46	long p_odd = (p & 1);
47
48	long modified = 0;
49
50	long h;
51
52	long m = G.rep.length();
53
54	long max_mn = max(m, n);
55
56	gg.rep.SetLength(max_mn);
57
58	ZZ g;
59	long i;
60
61	for (i = 0; i < n; i++) {
62	if (!CRTInRange(gg.rep[i], a)) {
63	modified = 1;
64	rem(g, gg.rep[i], a);
65	if (g > a1) sub(g, g, a);
66	}
67	else
68	g = gg.rep[i];
69
70	h = rem(g, p);
71
72	if (i < m)
73	h = SubMod(rep(G.rep[i]), h, p);
74	else
75	h = NegateMod(h, p);
76
77	h = MulMod(h, a_inv, p);
78	if (h > p1)
79	h = h - p;
80
81	if (h != 0) {
82	modified = 1;
83
84	if (!p_odd && g > 0 && (h == p1))
85	MulSubFrom(g, a, h);
86	else
87	MulAddTo(g, a, h);
88	}
89
90	gg.rep[i] = g;
91	}
92
93
94	for (; i < m; i++) {
95	h = rep(G.rep[i]);
96	h = MulMod(h, a_inv, p);
97	if (h > p1)
98	h = h - p;
99
100	modified = 1;
101	mul(g, a, h);
102	gg.rep[i] = g;
103	}
104
105	gg.normalize();
106	a = new_a;
107
108	return modified;
109	}
110
111	long CRT(ZZX& gg, ZZ& a, const ZZ_pX& G)
112	{
113	long n = gg.rep.length();
114
115	const ZZ& p = ZZ_p::modulus();
116
117	ZZ new_a;
118	mul(new_a, a, p);
119
120	ZZ a_inv;
121	rem(a_inv, a, p);
122	InvMod(a_inv, a_inv, p);
123
124	ZZ p1;
125	RightShift(p1, p, 1);
126
127	ZZ a1;
128	RightShift(a1, a, 1);
129
130	long p_odd = IsOdd(p);
131
132	long modified = 0;
133
134	ZZ h;
135	ZZ ah;
136
137	long m = G.rep.length();
138
139	long max_mn = max(m, n);
140
141	gg.rep.SetLength(max_mn);
142
143	ZZ g;
144	long i;
145
146	for (i = 0; i < n; i++) {
147	if (!CRTInRange(gg.rep[i], a)) {
148	modified = 1;
149	rem(g, gg.rep[i], a);
150	if (g > a1) sub(g, g, a);
151	}
152	else
153	g = gg.rep[i];
154
155	rem(h, g, p);
156
157	if (i < m)
158	SubMod(h, rep(G.rep[i]), h, p);
159	else
160	NegateMod(h, h, p);
161
162	MulMod(h, h, a_inv, p);
163	if (h > p1)
164	sub(h, h, p);
165
166	if (h != 0) {
167	modified = 1;
168	mul(ah, a, h);
169
170	if (!p_odd && g > 0 && (h == p1))
171	sub(g, g, ah);
172	else
173	add(g, g, ah);
174	}
175
176	gg.rep[i] = g;
177	}
178
179
180	for (; i < m; i++) {
181	h = rep(G.rep[i]);
182	MulMod(h, h, a_inv, p);
183	if (h > p1)
184	sub(h, h, p);
185
186	modified = 1;
187	mul(g, a, h);
188	gg.rep[i] = g;
189	}
190
191	gg.normalize();
192	a = new_a;
193
194	return modified;
195	}
196
197
198
199
200	/* Compute a = b * 2^l mod p, where p = 2^n+1. 0<=l<=n and 0<b<p are
201	assumed. */
202	static void LeftRotate(ZZ& a, const ZZ& b, long l, const ZZ& p, long n)
203	{
204	if (l == 0) {
205	if (&a != &b) {
206	a = b;
207	}
208	return;
209	}
210
211	/* tmp := upper l bits of b */
212	static ZZ tmp;
213	RightShift(tmp, b, n - l);
214	/* a := 2^l * lower n - l bits of b */
215	trunc(a, b, n - l);
216	LeftShift(a, a, l);
217	/* a -= tmp */
218	sub(a, a, tmp);
219	if (sign(a) < 0) {
220	add(a, a, p);
221	}
222	}
223
224
225	/* Compute a = b * 2^l mod p, where p = 2^n+1. 0<=p<b is assumed. */
226	static void Rotate(ZZ& a, const ZZ& b, long l, const ZZ& p, long n)
227	{
228	if (IsZero(b)) {
229	clear(a);
230	return;
231	}
232
233	/* l %= 2n */
234	if (l >= 0) {
235	l %= (n << 1);
236	} else {
237	l = (n << 1) - 1 - (-(l + 1) % (n << 1));
238	}
239
240	/* a = b * 2^l mod p */
241	if (l < n) {
242	LeftRotate(a, b, l, p, n);
243	} else {
244	LeftRotate(a, b, l - n, p, n);
245	SubPos(a, p, a);
246	}
247	}
248
249
250
251	/* Fast Fourier Transform. a is a vector of length 2^l, 2^l divides 2n,
252	p = 2^n+1, w = 2^r mod p is a primitive (2^l)th root of
253	unity. Returns a(1),a(w),...,a(w^{2^l-1}) mod p in bit-reverse
254	order. */
255	static void fft(vec_ZZ& a, long r, long l, const ZZ& p, long n)
256	{
257	long round;
258	long off, i, j, e;
259	long halfsize;
260	ZZ tmp, tmp1;
261
262	for (round = 0; round < l; round++, r <<= 1) {
263	halfsize = 1L << (l - 1 - round);
264	for (i = (1L << round) - 1, off = 0; i >= 0; i--, off += halfsize) {
265	for (j = 0, e = 0; j < halfsize; j++, off++, e+=r) {
266	/* One butterfly :
267	( a[off], a[off+halfsize] ) *= ( 1 w^{j2^round} )
268	( 1 -w^{j2^round} ) */
269	/* tmp = a[off] - a[off + halfsize] mod p */
270	sub(tmp, a[off], a[off + halfsize]);
271	if (sign(tmp) < 0) {
272	add(tmp, tmp, p);
273	}
274	/* a[off] += a[off + halfsize] mod p */
275	add(a[off], a[off], a[off + halfsize]);
276	sub(tmp1, a[off], p);
277	if (sign(tmp1) >= 0) {
278	a[off] = tmp1;
279	}
280	/* a[off + halfsize] = tmp * w^{j2^round} mod p */
281	Rotate(a[off + halfsize], tmp, e, p, n);
282	}
283	}
284	}
285	}
286
287	/* Inverse FFT. r must be the same as in the call to FFT. Result is
288	by 2^l too large. */
289	static void ifft(vec_ZZ& a, long r, long l, const ZZ& p, long n)
290	{
291	long round;
292	long off, i, j, e;
293	long halfsize;
294	ZZ tmp, tmp1;
295
296	for (round = l - 1, r <<= l - 1; round >= 0; round--, r >>= 1) {
297	halfsize = 1L << (l - 1 - round);
298	for (i = (1L << round) - 1, off = 0; i >= 0; i--, off += halfsize) {
299	for (j = 0, e = 0; j < halfsize; j++, off++, e+=r) {
300	/* One inverse butterfly :
301	( a[off], a[off+halfsize] ) *= ( 1 1 )
302	( w^{-j2^round} -w^{-j2^round} ) */
303	/* a[off + halfsize] = w^{-j2^round} mod p /
304	Rotate(a[off + halfsize], a[off + halfsize], -e, p, n);
305	/* tmp = a[off] - a[off + halfsize] */
306	sub(tmp, a[off], a[off + halfsize]);
307
308	/* a[off] += a[off + halfsize] mod p */
309	add(a[off], a[off], a[off + halfsize]);
310	sub(tmp1, a[off], p);
311	if (sign(tmp1) >= 0) {
312	a[off] = tmp1;
313	}
314	/* a[off+halfsize] = tmp mod p */
315	if (sign(tmp) < 0) {
316	add(a[off+halfsize], tmp, p);
317	} else {
318	a[off+halfsize] = tmp;
319	}
320	}
321	}
322	}
323	}
324
325
326
327	/* Multiplication a la Schoenhage & Strassen, modulo a "Fermat" number
328	p = 2^{mr}+1, where m is a power of two and r is odd. Then w = 2^r
329	is a primitive 2mth root of unity, i.e., polynomials whose product
330	has degree less than 2m can be multiplied, provided that the
331	coefficients of the product polynomial are at most 2^{mr-1} in
332	absolute value. The algorithm is not called recursively;
333	coefficient arithmetic is done directly.*/
334
335	void SSMul(ZZX& c, const ZZX& a, const ZZX& b)
336	{
337	long na = deg(a);
338	long nb = deg(b);
339
340	if (na <= 0 \|\| nb <= 0) {
341	PlainMul(c, a, b);
342	return;
343	}
344
345	long n = na + nb; /* degree of the product */
346
347
348	/* Choose m and r suitably */
349	long l = NextPowerOfTwo(n + 1) - 1; /* 2^l <= n < 2^{l+1} */
350	long m2 = 1L << (l + 1); /* m2 = 2m = 2^{l+1} */
351	/* Bitlength of the product: if the coefficients of a are absolutely less
352	than 2^ka and the coefficients of b are absolutely less than 2^kb, then
353	the coefficients of ab are absolutely less than
354	(min(na,nb)+1)2^{ka+kb} <= 2^bound. */
355	long bound = 2 + NumBits(min(na, nb)) + MaxBits(a) + MaxBits(b);
356	/* Let r be minimal so that mr > bound */
357	long r = (bound >> l) + 1;
358	long mr = r << l;
359
360	/* p := 2^{mr}+1 */
361	ZZ p;
362	set(p);
363	LeftShift(p, p, mr);
364	add(p, p, 1);
365
366	/* Make coefficients of a and b positive */
367	vec_ZZ aa, bb;
368	aa.SetLength(m2);
369	bb.SetLength(m2);
370
371	long i;
372	for (i = 0; i <= deg(a); i++) {
373	if (sign(a.rep[i]) >= 0) {
374	aa[i] = a.rep[i];
375	} else {
376	add(aa[i], a.rep[i], p);
377	}
378	}
379
380	for (i = 0; i <= deg(b); i++) {
381	if (sign(b.rep[i]) >= 0) {
382	bb[i] = b.rep[i];
383	} else {
384	add(bb[i], b.rep[i], p);
385	}
386	}
387
388	/* 2m-point FFT's mod p */
389	fft(aa, r, l + 1, p, mr);
390	fft(bb, r, l + 1, p, mr);
391
392	/* Pointwise multiplication aa := aa * bb mod p */
393	ZZ tmp, ai;
394	for (i = 0; i < m2; i++) {
395	mul(ai, aa[i], bb[i]);
396	if (NumBits(ai) > mr) {
397	RightShift(tmp, ai, mr);
398	trunc(ai, ai, mr);
399	sub(ai, ai, tmp);
400	if (sign(ai) < 0) {
401	add(ai, ai, p);
402	}
403	}
404	aa[i] = ai;
405	}
406
407	ifft(aa, r, l + 1, p, mr);
408
409	/* Retrieve c, dividing by 2m, and subtracting p where necessary */
410	c.rep.SetLength(n + 1);
411	for (i = 0; i <= n; i++) {
412	ai = aa[i];
413	ZZ& ci = c.rep[i];
414	if (!IsZero(ai)) {
415	/* ci = -ai * 2^{mr-l-1} = ai * 2^{-l-1} = ai / 2m mod p */
416	LeftRotate(ai, ai, mr - l - 1, p, mr);
417	sub(tmp, p, ai);
418	if (NumBits(tmp) >= mr) { /* ci >= (p-1)/2 */
419	negate(ci, ai); /* ci = -ai = ci - p */
420	}
421	else
422	ci = tmp;
423	}
424	else
425	clear(ci);
426	}
427	}
428
429
430	// SSRatio computes how much bigger the SS modulus must be
431	// to accomodate the necessary roots of unity.
432	// This is useful in determining algorithm crossover points.
433
434	double SSRatio(long na, long maxa, long nb, long maxb)
435	{
436	if (na <= 0 \|\| nb <= 0) return 0;
437
438	long n = na + nb; /* degree of the product */
439
440
441	long l = NextPowerOfTwo(n + 1) - 1; /* 2^l <= n < 2^{l+1} */
442	long bound = 2 + NumBits(min(na, nb)) + maxa + maxb;
443	long r = (bound >> l) + 1;
444	long mr = r << l;
445
446	return double(mr + 1)/double(bound);
447	}
448
449	void HomMul(ZZX& x, const ZZX& a, const ZZX& b)
450	{
451	if (&a == &b) {
452	HomSqr(x, a);
453	return;
454	}
455
456	long da = deg(a);
457	long db = deg(b);
458
459	if (da < 0 \|\| db < 0) {
460	clear(x);
461	return;
462	}
463
464	long bound = 2 + NumBits(min(da, db)+1) + MaxBits(a) + MaxBits(b);
465
466
467	ZZ prod;
468	set(prod);
469
470	long i, nprimes;
471
472	zz_pBak bak;
473	bak.save();
474
475	for (nprimes = 0; NumBits(prod) <= bound; nprimes++) {
476	if (nprimes >= NumFFTPrimes)
477	zz_p::FFTInit(nprimes);
478	mul(prod, prod, FFTPrime[nprimes]);
479	}
480
481
482	ZZ coeff;
483	ZZ t1;
484	long tt;
485
486	vec_ZZ c;
487
488	c.SetLength(da+db+1);
489
490	long j;
491
492	for (i = 0; i < nprimes; i++) {
493	zz_p::FFTInit(i);
494	long p = zz_p::modulus();
495
496	div(t1, prod, p);
497	tt = rem(t1, p);
498	tt = InvMod(tt, p);
499	mul(coeff, t1, tt);
500
501	zz_pX A, B, C;
502
503	conv(A, a);
504	conv(B, b);
505	mul(C, A, B);
506
507	long m = deg(C);
508
509	for (j = 0; j <= m; j++) {
510	/* c[j] += coeffrep(C.rep[j]) /
511	MulAddTo(c[j], coeff, rep(C.rep[j]));
512	// mul(t1, coeff, rep(C.rep[j]));
513	// add(c[j], c[j], t1);
514	}
515	}
516
517	x.rep.SetLength(da+db+1);
518
519	ZZ prod2;
520	RightShift(prod2, prod, 1);
521
522	for (j = 0; j <= da+db; j++) {
523	rem(t1, c[j], prod);
524
525	if (t1 > prod2)
526	sub(x.rep[j], t1, prod);
527	else
528	x.rep[j] = t1;
529	}
530
531	x.normalize();
532
533	bak.restore();
534	}
535
536	static
537	long MaxSize(const ZZX& a)
538	{
539	long res = 0;
540	long n = a.rep.length();
541
542	long i;
543	for (i = 0; i < n; i++) {
544	long t = a.rep[i].size();
545	if (t > res)
546	res = t;
547	}
548
549	return res;
550	}
551
552
553	void mul(ZZX& c, const ZZX& a, const ZZX& b)
554	{
555	if (IsZero(a) \|\| IsZero(b)) {
556	clear(c);
557	return;
558	}
559
560	if (&a == &b) {
561	sqr(c, a);
562	return;
563	}
564
565	long maxa = MaxSize(a);
566	long maxb = MaxSize(b);
567
568	long k = min(maxa, maxb);
569	long s = min(deg(a), deg(b)) + 1;
570
571	if (s == 1 \|\| (k == 1 && s < 40) \|\| (k == 2 && s < 20) \|\|
572	(k == 3 && s < 10)) {
573
574	PlainMul(c, a, b);
575	return;
576	}
577
578	if (s < 80 \|\| (k < 30 && s < 150)) {
579	KarMul(c, a, b);
580	return;
581	}
582
583
584	if (maxa + maxb >= 40 &&
585	SSRatio(deg(a), MaxBits(a), deg(b), MaxBits(b)) < 1.75)
586	SSMul(c, a, b);
587	else
588	HomMul(c, a, b);
589	}
590
591
592	void SSSqr(ZZX& c, const ZZX& a)
593	{
594	long na = deg(a);
595	if (na <= 0) {
596	PlainSqr(c, a);
597	return;
598	}
599
600	long n = na + na; /* degree of the product */
601
602
603	long l = NextPowerOfTwo(n + 1) - 1; /* 2^l <= n < 2^{l+1} */
604	long m2 = 1L << (l + 1); /* m2 = 2m = 2^{l+1} */
605	long bound = 2 + NumBits(na) + 2*MaxBits(a);
606	long r = (bound >> l) + 1;
607	long mr = r << l;
608
609	/* p := 2^{mr}+1 */
610	ZZ p;
611	set(p);
612	LeftShift(p, p, mr);
613	add(p, p, 1);
614
615	vec_ZZ aa;
616	aa.SetLength(m2);
617
618	long i;
619	for (i = 0; i <= deg(a); i++) {
620	if (sign(a.rep[i]) >= 0) {
621	aa[i] = a.rep[i];
622	} else {
623	add(aa[i], a.rep[i], p);
624	}
625	}
626
627
628	/* 2m-point FFT's mod p */
629	fft(aa, r, l + 1, p, mr);
630
631	/* Pointwise multiplication aa := aa * aa mod p */
632	ZZ tmp, ai;
633	for (i = 0; i < m2; i++) {
634	sqr(ai, aa[i]);
635	if (NumBits(ai) > mr) {
636	RightShift(tmp, ai, mr);
637	trunc(ai, ai, mr);
638	sub(ai, ai, tmp);
639	if (sign(ai) < 0) {
640	add(ai, ai, p);
641	}
642	}
643	aa[i] = ai;
644	}
645
646	ifft(aa, r, l + 1, p, mr);
647
648	ZZ ci;
649
650	/* Retrieve c, dividing by 2m, and subtracting p where necessary */
651	c.rep.SetLength(n + 1);
652
653	for (i = 0; i <= n; i++) {
654	ai = aa[i];
655	ZZ& ci = c.rep[i];
656	if (!IsZero(ai)) {
657	/* ci = -ai * 2^{mr-l-1} = ai * 2^{-l-1} = ai / 2m mod p */
658	LeftRotate(ai, ai, mr - l - 1, p, mr);
659	sub(tmp, p, ai);
660	if (NumBits(tmp) >= mr) { /* ci >= (p-1)/2 */
661	negate(ci, ai); /* ci = -ai = ci - p */
662	}
663	else
664	ci = tmp;
665	}
666	else
667	clear(ci);
668	}
669	}
670
671	void HomSqr(ZZX& x, const ZZX& a)
672	{
673
674	long da = deg(a);
675
676	if (da < 0) {
677	clear(x);
678	return;
679	}
680
681	long bound = 2 + NumBits(da+1) + 2*MaxBits(a);
682
683
684	ZZ prod;
685	set(prod);
686
687	long i, nprimes;
688
689	zz_pBak bak;
690	bak.save();
691
692	for (nprimes = 0; NumBits(prod) <= bound; nprimes++) {
693	if (nprimes >= NumFFTPrimes)
694	zz_p::FFTInit(nprimes);
695	mul(prod, prod, FFTPrime[nprimes]);
696	}
697
698
699	ZZ coeff;
700	ZZ t1;
701	long tt;
702
703	vec_ZZ c;
704
705	c.SetLength(da+da+1);
706
707	long j;
708
709	for (i = 0; i < nprimes; i++) {
710	zz_p::FFTInit(i);
711	long p = zz_p::modulus();
712
713	div(t1, prod, p);
714	tt = rem(t1, p);
715	tt = InvMod(tt, p);
716	mul(coeff, t1, tt);
717
718	zz_pX A, C;
719
720	conv(A, a);
721	sqr(C, A);
722
723	long m = deg(C);
724
725	for (j = 0; j <= m; j++) {
726	/* c[j] += coeffrep(C.rep[j]) /
727	MulAddTo(c[j], coeff, rep(C.rep[j]));
728	// mul(t1, coeff, rep(C.rep[j]));
729	// add(c[j], c[j], t1);
730	}
731	}
732
733	x.rep.SetLength(da+da+1);
734
735	ZZ prod2;
736	RightShift(prod2, prod, 1);
737
738	for (j = 0; j <= da+da; j++) {
739	rem(t1, c[j], prod);
740
741	if (t1 > prod2)
742	sub(x.rep[j], t1, prod);
743	else
744	x.rep[j] = t1;
745	}
746
747	x.normalize();
748
749	bak.restore();
750	}
751
752
753	void sqr(ZZX& c, const ZZX& a)
754	{
755	if (IsZero(a)) {
756	clear(c);
757	return;
758	}
759
760	long maxa = MaxSize(a);
761
762	long k = maxa;
763	long s = deg(a) + 1;
764
765	if (s == 1 \|\| (k == 1 && s < 50) \|\| (k == 2 && s < 25) \|\|
766	(k == 3 && s < 25) \|\| (k == 4 && s < 10)) {
767
768	PlainSqr(c, a);
769	return;
770	}
771
772	if (s < 80 \|\| (k < 30 && s < 150)) {
773	KarSqr(c, a);
774	return;
775	}
776
777	long mba = MaxBits(a);
778
779	if (2*maxa >= 40 &&
780	SSRatio(deg(a), mba, deg(a), mba) < 1.75)
781	SSSqr(c, a);
782	else
783	HomSqr(c, a);
784	}
785
786
787	void mul(ZZX& x, const ZZX& a, const ZZ& b)
788	{
789	ZZ t;
790	long i, da;
791
792	const ZZ *ap;
793	ZZ* xp;
794
795	if (IsZero(b)) {
796	clear(x);
797	return;
798	}
799
800	t = b;
801	da = deg(a);
802	x.rep.SetLength(da+1);
803	ap = a.rep.elts();
804	xp = x.rep.elts();
805
806	for (i = 0; i <= da; i++)
807	mul(xp[i], ap[i], t);
808	}
809
810	void mul(ZZX& x, const ZZX& a, long b)
811	{
812	long i, da;
813
814	const ZZ *ap;
815	ZZ* xp;
816
817	if (b == 0) {
818	clear(x);
819	return;
820	}
821
822	da = deg(a);
823	x.rep.SetLength(da+1);
824	ap = a.rep.elts();
825	xp = x.rep.elts();
826
827	for (i = 0; i <= da; i++)
828	mul(xp[i], ap[i], b);
829	}
830
831
832
833
834	void diff(ZZX& x, const ZZX& a)
835	{
836	long n = deg(a);
837	long i;
838
839	if (n <= 0) {
840	clear(x);
841	return;
842	}
843
844	if (&x != &a)
845	x.rep.SetLength(n);
846
847	for (i = 0; i <= n-1; i++) {
848	mul(x.rep[i], a.rep[i+1], i+1);
849	}
850
851	if (&x == &a)
852	x.rep.SetLength(n);
853
854	x.normalize();
855	}
856
857	void HomPseudoDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b)
858	{
859	if (IsZero(b)) Error("division by zero");
860
861	long da = deg(a);
862	long db = deg(b);
863
864	if (da < db) {
865	r = b;
866	clear(q);
867	return;
868	}
869
870	ZZ LC;
871	LC = LeadCoeff(b);
872
873	ZZ LC1;
874
875	power(LC1, LC, da-db+1);
876
877	long a_bound = NumBits(LC1) + MaxBits(a);
878
879	LC1.kill();
880
881	long b_bound = MaxBits(b);
882
883	zz_pBak bak;
884	bak.save();
885
886	ZZX qq, rr;
887
888	ZZ prod, t;
889	set(prod);
890
891	clear(qq);
892	clear(rr);
893
894	long i;
895	long Qinstable, Rinstable;
896
897	Qinstable = 1;
898	Rinstable = 1;
899
900	for (i = 0; ; i++) {
901	zz_p::FFTInit(i);
902	long p = zz_p::modulus();
903
904
905	if (divide(LC, p)) continue;
906
907	zz_pX A, B, Q, R;
908
909	conv(A, a);
910	conv(B, b);
911
912	if (!IsOne(LC)) {
913	zz_p y;
914	conv(y, LC);
915	power(y, y, da-db+1);
916	mul(A, A, y);
917	}
918
919	if (!Qinstable) {
920	conv(Q, qq);
921	mul(R, B, Q);
922	sub(R, A, R);
923
924	if (deg(R) >= db)
925	Qinstable = 1;
926	else
927	Rinstable = CRT(rr, prod, R);
928	}
929
930	if (Qinstable) {
931	DivRem(Q, R, A, B);
932	t = prod;
933	Qinstable = CRT(qq, t, Q);
934	Rinstable = CRT(rr, prod, R);
935	}
936
937	if (!Qinstable && !Rinstable) {
938	// stabilized...check if prod is big enough
939
940	long bound1 = b_bound + MaxBits(qq) + NumBits(min(db, da-db)+1);
941	long bound2 = MaxBits(rr);
942	long bound = max(bound1, bound2);
943
944	if (a_bound > bound)
945	bound = a_bound;
946
947	bound += 4;
948
949	if (NumBits(prod) > bound)
950	break;
951	}
952	}
953
954	bak.restore();
955
956	q = qq;
957	r = rr;
958	}
959
960
961
962
963	void HomPseudoDiv(ZZX& q, const ZZX& a, const ZZX& b)
964	{
965	ZZX r;
966	HomPseudoDivRem(q, r, a, b);
967	}
968
969	void HomPseudoRem(ZZX& r, const ZZX& a, const ZZX& b)
970	{
971	ZZX q;
972	HomPseudoDivRem(q, r, a, b);
973	}
974
975	void PlainPseudoDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b)
976	{
977	long da, db, dq, i, j, LCIsOne;
978	const ZZ *bp;
979	ZZ *qp;
980	ZZ *xp;
981
982
983	ZZ s, t;
984
985	da = deg(a);
986	db = deg(b);
987
988	if (db < 0) Error("ZZX: division by zero");
989
990	if (da < db) {
991	r = a;
992	clear(q);
993	return;
994	}
995
996	ZZX lb;
997
998	if (&q == &b) {
999	lb = b;
1000	bp = lb.rep.elts();
1001	}
1002	else
1003	bp = b.rep.elts();
1004
1005	ZZ LC = bp[db];
1006	LCIsOne = IsOne(LC);
1007
1008
1009	vec_ZZ x;
1010
1011	x = a.rep;
1012	xp = x.elts();
1013
1014	dq = da - db;
1015	q.rep.SetLength(dq+1);
1016	qp = q.rep.elts();
1017
1018	if (!LCIsOne) {
1019	t = LC;
1020	for (i = dq-1; i >= 0; i--) {
1021	mul(xp[i], xp[i], t);
1022	if (i > 0) mul(t, t, LC);
1023	}
1024	}
1025
1026	for (i = dq; i >= 0; i--) {
1027	t = xp[i+db];
1028	qp[i] = t;
1029
1030	for (j = db-1; j >= 0; j--) {
1031	mul(s, t, bp[j]);
1032	if (!LCIsOne) mul(xp[i+j], xp[i+j], LC);
1033	sub(xp[i+j], xp[i+j], s);
1034	}
1035	}
1036
1037	if (!LCIsOne) {
1038	t = LC;
1039	for (i = 1; i <= dq; i++) {
1040	mul(qp[i], qp[i], t);
1041	if (i < dq) mul(t, t, LC);
1042	}
1043	}
1044
1045
1046	r.rep.SetLength(db);
1047	for (i = 0; i < db; i++)
1048	r.rep[i] = xp[i];
1049	r.normalize();
1050	}
1051
1052
1053	void PlainPseudoDiv(ZZX& q, const ZZX& a, const ZZX& b)
1054	{
1055	ZZX r;
1056	PlainPseudoDivRem(q, r, a, b);
1057	}
1058
1059	void PlainPseudoRem(ZZX& r, const ZZX& a, const ZZX& b)
1060	{
1061	ZZX q;
1062	PlainPseudoDivRem(q, r, a, b);
1063	}
1064
1065	void div(ZZX& q, const ZZX& a, long b)
1066	{
1067	if (b == 0) Error("div: division by zero");
1068
1069	if (!divide(q, a, b)) Error("DivRem: quotient undefined over ZZ");
1070	}
1071
1072	void div(ZZX& q, const ZZX& a, const ZZ& b)
1073	{
1074	if (b == 0) Error("div: division by zero");
1075
1076	if (!divide(q, a, b)) Error("DivRem: quotient undefined over ZZ");
1077	}
1078
1079	static
1080	void ConstDivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZ& b)
1081	{
1082	if (b == 0) Error("DivRem: division by zero");
1083
1084	if (!divide(q, a, b)) Error("DivRem: quotient undefined over ZZ");
1085
1086	r = 0;
1087	}
1088
1089	static
1090	void ConstRem(ZZX& r, const ZZX& a, const ZZ& b)
1091	{
1092	if (b == 0) Error("rem: division by zero");
1093
1094	r = 0;
1095	}
1096
1097
1098
1099	void DivRem(ZZX& q, ZZX& r, const ZZX& a, const ZZX& b)
1100	{
1101	long da = deg(a);
1102	long db = deg(b);
1103
1104	if (db < 0) Error("DivRem: division by zero");
1105
1106	if (da < db) {
1107	r = a;
1108	q = 0;
1109	}
1110	else if (db == 0) {
1111	ConstDivRem(q, r, a, ConstTerm(b));
1112	}
1113	else if (IsOne(LeadCoeff(b))) {
1114	PseudoDivRem(q, r, a, b);
1115	}
1116	else if (LeadCoeff(b) == -1) {
1117	ZZX b1;
1118	negate(b1, b);
1119	PseudoDivRem(q, r, a, b1);
1120	negate(q, q);
1121	}
1122	else if (divide(q, a, b)) {
1123	r = 0;
1124	}
1125	else {
1126	ZZX q1, r1;
1127	ZZ m;
1128	PseudoDivRem(q1, r1, a, b);
1129	power(m, LeadCoeff(b), da-db+1);
1130	if (!divide(q, q1, m)) Error("DivRem: quotient not defined over ZZ");
1131	if (!divide(r, r1, m)) Error("DivRem: remainder not defined over ZZ");
1132	}
1133	}
1134
1135	void div(ZZX& q, const ZZX& a, const ZZX& b)
1136	{
1137	long da = deg(a);
1138	long db = deg(b);
1139
1140	if (db < 0) Error("div: division by zero");
1141
1142	if (da < db) {
1143	q = 0;
1144	}
1145	else if (db == 0) {
1146	div(q, a, ConstTerm(b));
1147	}
1148	else if (IsOne(LeadCoeff(b))) {
1149	PseudoDiv(q, a, b);
1150	}
1151	else if (LeadCoeff(b) == -1) {
1152	ZZX b1;
1153	negate(b1, b);
1154	PseudoDiv(q, a, b1);
1155	negate(q, q);
1156	}
1157	else if (divide(q, a, b)) {
1158
1159	// nothing to do
1160
1161	}
1162	else {
1163	ZZX q1;
1164	ZZ m;
1165	PseudoDiv(q1, a, b);
1166	power(m, LeadCoeff(b), da-db+1);
1167	if (!divide(q, q1, m)) Error("div: quotient not defined over ZZ");
1168	}
1169	}
1170
1171	void rem(ZZX& r, const ZZX& a, const ZZX& b)
1172	{
1173	long da = deg(a);
1174	long db = deg(b);
1175
1176	if (db < 0) Error("rem: division by zero");
1177
1178	if (da < db) {
1179	r = a;
1180	}
1181	else if (db == 0) {
1182	ConstRem(r, a, ConstTerm(b));
1183	}
1184	else if (IsOne(LeadCoeff(b))) {
1185	PseudoRem(r, a, b);
1186	}
1187	else if (LeadCoeff(b) == -1) {
1188	ZZX b1;
1189	negate(b1, b);
1190	PseudoRem(r, a, b1);
1191	}
1192	else if (divide(a, b)) {
1193	r = 0;
1194	}
1195	else {
1196	ZZX r1;
1197	ZZ m;
1198	PseudoRem(r1, a, b);
1199	power(m, LeadCoeff(b), da-db+1);
1200	if (!divide(r, r1, m)) Error("rem: remainder not defined over ZZ");
1201	}
1202	}
1203
1204	long HomDivide(ZZX& q, const ZZX& a, const ZZX& b)
1205	{
1206	if (IsZero(b)) {
1207	if (IsZero(a)) {
1208	clear(q);
1209	return 1;
1210	}
1211	else
1212	return 0;
1213	}
1214
1215	if (IsZero(a)) {
1216	clear(q);
1217	return 1;
1218	}
1219
1220	if (deg(b) == 0) {
1221	return divide(q, a, ConstTerm(b));
1222	}
1223
1224	if (deg(a) < deg(b)) return 0;
1225
1226	ZZ ca, cb, cq;
1227
1228	content(ca, a);
1229	content(cb, b);
1230
1231	if (!divide(cq, ca, cb)) return 0;
1232
1233	ZZX aa, bb;
1234
1235	divide(aa, a, ca);
1236	divide(bb, b, cb);
1237
1238	if (!divide(LeadCoeff(aa), LeadCoeff(bb)))
1239	return 0;
1240
1241	if (!divide(ConstTerm(aa), ConstTerm(bb)))
1242	return 0;
1243
1244	zz_pBak bak;
1245	bak.save();
1246
1247	ZZX qq;
1248
1249	ZZ prod;
1250	set(prod);
1251
1252	clear(qq);
1253	long res = 1;
1254	long Qinstable = 1;
1255
1256
1257	long a_bound = MaxBits(aa);
1258	long b_bound = MaxBits(bb);
1259
1260
1261	long i;
1262	for (i = 0; ; i++) {
1263	zz_p::FFTInit(i);
1264	long p = zz_p::modulus();
1265
1266	if (divide(LeadCoeff(bb), p)) continue;
1267
1268	zz_pX A, B, Q, R;
1269
1270	conv(A, aa);
1271	conv(B, bb);
1272
1273	if (!Qinstable) {
1274	conv(Q, qq);
1275	mul(R, B, Q);
1276	sub(R, A, R);
1277
1278	if (deg(R) >= deg(B))
1279	Qinstable = 1;
1280	else if (!IsZero(R)) {
1281	res = 0;
1282	break;
1283	}
1284	else
1285	mul(prod, prod, p);
1286	}
1287
1288	if (Qinstable) {
1289	if (!divide(Q, A, B)) {
1290	res = 0;
1291	break;
1292	}
1293
1294	Qinstable = CRT(qq, prod, Q);
1295	}
1296
1297	if (!Qinstable) {
1298	// stabilized...check if prod is big enough
1299
1300	long bound = b_bound + MaxBits(qq) +
1301	NumBits(min(deg(bb), deg(qq)) + 1);
1302
1303	if (a_bound > bound)
1304	bound = a_bound;
1305
1306	bound += 3;
1307
1308	if (NumBits(prod) > bound)
1309	break;
1310	}
1311	}
1312
1313	bak.restore();
1314
1315	if (res) mul(q, qq, cq);
1316	return res;
1317
1318	}
1319
1320
1321	long HomDivide(const ZZX& a, const ZZX& b)
1322	{
1323	if (deg(b) == 0) {
1324	return divide(a, ConstTerm(b));
1325	}
1326	else {
1327	ZZX q;
1328	return HomDivide(q, a, b);
1329	}
1330	}
1331
1332	long PlainDivide(ZZX& qq, const ZZX& aa, const ZZX& bb)
1333	{
1334	if (IsZero(bb)) {
1335	if (IsZero(aa)) {
1336	clear(qq);
1337	return 1;
1338	}
1339	else
1340	return 0;
1341	}
1342
1343	if (deg(bb) == 0) {
1344	return divide(qq, aa, ConstTerm(bb));
1345	}
1346
1347	long da, db, dq, i, j, LCIsOne;
1348	const ZZ *bp;
1349	ZZ *qp;
1350	ZZ *xp;
1351
1352
1353	ZZ s, t;
1354
1355	da = deg(aa);
1356	db = deg(bb);
1357
1358	if (da < db) {
1359	return 0;
1360	}
1361
1362	ZZ ca, cb, cq;
1363
1364	content(ca, aa);
1365	content(cb, bb);
1366
1367	if (!divide(cq, ca, cb)) {
1368	return 0;
1369	}
1370
1371
1372	ZZX a, b, q;
1373
1374	divide(a, aa, ca);
1375	divide(b, bb, cb);
1376
1377	if (!divide(LeadCoeff(a), LeadCoeff(b)))
1378	return 0;
1379
1380	if (!divide(ConstTerm(a), ConstTerm(b)))
1381	return 0;
1382
1383	long coeff_bnd = MaxBits(a) + (NumBits(da+1)+1)/2 + (da-db);
1384
1385	bp = b.rep.elts();
1386
1387	ZZ LC;
1388	LC = bp[db];
1389
1390	LCIsOne = IsOne(LC);
1391
1392	xp = a.rep.elts();
1393
1394	dq = da - db;
1395	q.rep.SetLength(dq+1);
1396	qp = q.rep.elts();
1397
1398	for (i = dq; i >= 0; i--) {
1399	if (!LCIsOne) {
1400	if (!divide(t, xp[i+db], LC))
1401	return 0;
1402	}
1403	else
1404	t = xp[i+db];
1405
1406	if (NumBits(t) > coeff_bnd) return 0;
1407
1408	qp[i] = t;
1409
1410	for (j = db-1; j >= 0; j--) {
1411	mul(s, t, bp[j]);
1412	sub(xp[i+j], xp[i+j], s);
1413	}
1414	}
1415
1416	for (i = 0; i < db; i++)
1417	if (!IsZero(xp[i]))
1418	return 0;
1419
1420	mul(qq, q, cq);
1421	return 1;
1422	}
1423
1424	long PlainDivide(const ZZX& a, const ZZX& b)
1425	{
1426	if (deg(b) == 0)
1427	return divide(a, ConstTerm(b));
1428	else {
1429	ZZX q;
1430	return PlainDivide(q, a, b);
1431	}
1432	}
1433
1434
1435	long divide(ZZX& q, const ZZX& a, const ZZX& b)
1436	{
1437	long da = deg(a);
1438	long db = deg(b);
1439
1440	if (db <= 8 \|\| da-db <= 8)
1441	return PlainDivide(q, a, b);
1442	else
1443	return HomDivide(q, a, b);
1444	}
1445
1446	long divide(const ZZX& a, const ZZX& b)
1447	{
1448	long da = deg(a);
1449	long db = deg(b);
1450
1451	if (db <= 8 \|\| da-db <= 8)
1452	return PlainDivide(a, b);
1453	else
1454	return HomDivide(a, b);
1455	}
1456
1457
1458
1459
1460
1461
1462
1463	long divide(ZZX& q, const ZZX& a, const ZZ& b)
1464	{
1465	if (IsZero(b)) {
1466	if (IsZero(a)) {
1467	clear(q);
1468	return 1;
1469	}
1470	else
1471	return 0;
1472	}
1473
1474	if (IsOne(b)) {
1475	q = a;
1476	return 1;
1477	}
1478
1479	if (b == -1) {
1480	negate(q, a);
1481	return 1;
1482	}
1483
1484	long n = a.rep.length();
1485	vec_ZZ res(INIT_SIZE, n);
1486	long i;
1487
1488	for (i = 0; i < n; i++) {
1489	if (!divide(res[i], a.rep[i], b))
1490	return 0;
1491	}
1492
1493	q.rep = res;
1494	return 1;
1495	}
1496
1497	long divide(const ZZX& a, const ZZ& b)
1498	{
1499	if (IsZero(b)) return IsZero(a);
1500
1501	if (IsOne(b) \|\| b == -1) {
1502	return 1;
1503	}
1504
1505	long n = a.rep.length();
1506	long i;
1507
1508	for (i = 0; i < n; i++) {
1509	if (!divide(a.rep[i], b))
1510	return 0;
1511	}
1512
1513	return 1;
1514	}
1515
1516	long divide(ZZX& q, const ZZX& a, long b)
1517	{
1518	if (b == 0) {
1519	if (IsZero(a)) {
1520	clear(q);
1521	return 1;
1522	}
1523	else
1524	return 0;
1525	}
1526
1527	if (b == 1) {
1528	q = a;
1529	return 1;
1530	}
1531
1532	if (b == -1) {
1533	negate(q, a);
1534	return 1;
1535	}
1536
1537	long n = a.rep.length();
1538	vec_ZZ res(INIT_SIZE, n);
1539	long i;
1540
1541	for (i = 0; i < n; i++) {
1542	if (!divide(res[i], a.rep[i], b))
1543	return 0;
1544	}
1545
1546	q.rep = res;
1547	return 1;
1548	}
1549
1550	long divide(const ZZX& a, long b)
1551	{
1552	if (b == 0) return IsZero(a);
1553	if (b == 1 \|\| b == -1) {
1554	return 1;
1555	}
1556
1557	long n = a.rep.length();
1558	long i;
1559
1560	for (i = 0; i < n; i++) {
1561	if (!divide(a.rep[i], b))
1562	return 0;
1563	}
1564
1565	return 1;
1566	}
1567
1568
1569
1570	void content(ZZ& d, const ZZX& f)
1571	{
1572	ZZ res;
1573	long i;
1574
1575	clear(res);
1576	for (i = 0; i <= deg(f); i++) {
1577	GCD(res, res, f.rep[i]);
1578	if (IsOne(res)) break;
1579	}
1580
1581	if (sign(LeadCoeff(f)) < 0) negate(res, res);
1582	d = res;
1583	}
1584
1585	void PrimitivePart(ZZX& pp, const ZZX& f)
1586	{
1587	if (IsZero(f)) {
1588	clear(pp);
1589	return;
1590	}
1591
1592	ZZ d;
1593
1594	content(d, f);
1595	divide(pp, f, d);
1596	}
1597
1598
1599	static
1600	void BalCopy(ZZX& g, const zz_pX& G)
1601	{
1602	long p = zz_p::modulus();
1603	long p2 = p >> 1;
1604	long n = G.rep.length();
1605	long i;
1606	long t;
1607
1608	g.rep.SetLength(n);
1609	for (i = 0; i < n; i++) {
1610	t = rep(G.rep[i]);
1611	if (t > p2) t = t - p;
1612	conv(g.rep[i], t);
1613	}
1614	}
1615
1616
1617
1618	void GCD(ZZX& d, const ZZX& a, const ZZX& b)
1619	{
1620	if (IsZero(a)) {
1621	d = b;
1622	if (sign(LeadCoeff(d)) < 0) negate(d, d);
1623	return;
1624	}
1625
1626	if (IsZero(b)) {
1627	d = a;
1628	if (sign(LeadCoeff(d)) < 0) negate(d, d);
1629	return;
1630	}
1631
1632	ZZ c1, c2, c;
1633	ZZX f1, f2;
1634
1635	content(c1, a);
1636	divide(f1, a, c1);
1637
1638	content(c2, b);
1639	divide(f2, b, c2);
1640
1641	GCD(c, c1, c2);
1642
1643	ZZ ld;
1644	GCD(ld, LeadCoeff(f1), LeadCoeff(f2));
1645
1646	ZZX g, h, res;
1647
1648	ZZ prod;
1649	set(prod);
1650
1651	zz_pBak bak;
1652	bak.save();
1653
1654
1655	long FirstTime = 1;
1656
1657	long i;
1658	for (i = 0; ;i++) {
1659	zz_p::FFTInit(i);
1660	long p = zz_p::modulus();
1661
1662	if (divide(LeadCoeff(f1), p) \|\| divide(LeadCoeff(f2), p)) continue;
1663
1664	zz_pX G, F1, F2;
1665	zz_p LD;
1666
1667	conv(F1, f1);
1668	conv(F2, f2);
1669	conv(LD, ld);
1670
1671	GCD(G, F1, F2);
1672	mul(G, G, LD);
1673
1674
1675	if (deg(G) == 0) {
1676	set(res);
1677	break;
1678	}
1679
1680	if (FirstTime \|\| deg(G) < deg(g)) {
1681	FirstTime = 0;
1682	conv(prod, p);
1683	BalCopy(g, G);
1684	}
1685	else if (deg(G) > deg(g))
1686	continue;
1687	else if (!CRT(g, prod, G)) {
1688	PrimitivePart(res, g);
1689	if (divide(f1, res) && divide(f2, res))
1690	break;
1691	}
1692
1693	}
1694
1695	bak.restore();
1696
1697	mul(d, res, c);
1698	if (sign(LeadCoeff(d)) < 0) negate(d, d);
1699	}
1700
1701	void trunc(ZZX& x, const ZZX& a, long m)
1702
1703	// x = a % X^m, output may alias input
1704
1705	{
1706	if (m < 0) Error("trunc: bad args");
1707
1708	if (&x == &a) {
1709	if (x.rep.length() > m) {
1710	x.rep.SetLength(m);
1711	x.normalize();
1712	}
1713	}
1714	else {
1715	long n;
1716	long i;
1717	ZZ* xp;
1718	const ZZ* ap;
1719
1720	n = min(a.rep.length(), m);
1721	x.rep.SetLength(n);
1722
1723	xp = x.rep.elts();
1724	ap = a.rep.elts();
1725
1726	for (i = 0; i < n; i++) xp[i] = ap[i];
1727
1728	x.normalize();
1729	}
1730	}
1731
1732
1733
1734	void LeftShift(ZZX& x, const ZZX& a, long n)
1735	{
1736	if (IsZero(a)) {
1737	clear(x);
1738	return;
1739	}
1740
1741	if (n < 0) {
1742	if (n < -NTL_MAX_LONG)
1743	clear(x);
1744	else
1745	RightShift(x, a, -n);
1746	return;
1747	}
1748
1749	if (NTL_OVERFLOW(n, 1, 0))
1750	Error("overflow in LeftShift");
1751
1752	long m = a.rep.length();
1753
1754	x.rep.SetLength(m+n);
1755
1756	long i;
1757	for (i = m-1; i >= 0; i--)
1758	x.rep[i+n] = a.rep[i];
1759
1760	for (i = 0; i < n; i++)
1761	clear(x.rep[i]);
1762	}
1763
1764
1765	void RightShift(ZZX& x, const ZZX& a, long n)
1766	{
1767	if (IsZero(a)) {
1768	clear(x);
1769	return;
1770	}
1771
1772	if (n < 0) {
1773	if (n < -NTL_MAX_LONG) Error("overflow in RightShift");
1774	LeftShift(x, a, -n);
1775	return;
1776	}
1777
1778	long da = deg(a);
1779	long i;
1780
1781	if (da < n) {
1782	clear(x);
1783	return;
1784	}
1785
1786	if (&x != &a)
1787	x.rep.SetLength(da-n+1);
1788
1789	for (i = 0; i <= da-n; i++)
1790	x.rep[i] = a.rep[i+n];
1791
1792	if (&x == &a)
1793	x.rep.SetLength(da-n+1);
1794
1795	x.normalize();
1796	}
1797
1798
1799	void TraceVec(vec_ZZ& S, const ZZX& ff)
1800	{
1801	if (!IsOne(LeadCoeff(ff)))
1802	Error("TraceVec: bad args");
1803
1804	ZZX f;
1805	f = ff;
1806
1807	long n = deg(f);
1808
1809	S.SetLength(n);
1810
1811	if (n == 0)
1812	return;
1813
1814	long k, i;
1815	ZZ acc, t;
1816
1817	S[0] = n;
1818
1819	for (k = 1; k < n; k++) {
1820	mul(acc, f.rep[n-k], k);
1821
1822	for (i = 1; i < k; i++) {
1823	mul(t, f.rep[n-i], S[k-i]);
1824	add(acc, acc, t);
1825	}
1826
1827	negate(S[k], acc);
1828	}
1829
1830	}
1831
1832	static
1833	void EuclLength(ZZ& l, const ZZX& a)
1834	{
1835	long n = a.rep.length();
1836	long i;
1837
1838	ZZ sum, t;
1839
1840	clear(sum);
1841	for (i = 0; i < n; i++) {
1842	sqr(t, a.rep[i]);
1843	add(sum, sum, t);
1844	}
1845
1846	if (sum > 1) {
1847	SqrRoot(l, sum);
1848	add(l, l, 1);
1849	}
1850	else
1851	l = sum;
1852	}
1853
1854
1855
1856	static
1857	long ResBound(const ZZX& a, const ZZX& b)
1858	{
1859	if (IsZero(a) \|\| IsZero(b))
1860	return 0;
1861
1862	ZZ t1, t2, t;
1863	EuclLength(t1, a);
1864	EuclLength(t2, b);
1865	power(t1, t1, deg(b));
1866	power(t2, t2, deg(a));
1867	mul(t, t1, t2);
1868	return NumBits(t);
1869	}
1870
1871
1872
1873	void resultant(ZZ& rres, const ZZX& a, const ZZX& b, long deterministic)
1874	{
1875	if (IsZero(a) \|\| IsZero(b)) {
1876	clear(rres);
1877	return;
1878	}
1879
1880	zz_pBak zbak;
1881	zbak.save();
1882
1883	ZZ_pBak Zbak;
1884	Zbak.save();
1885
1886	long instable = 1;
1887
1888	long bound = 2+ResBound(a, b);
1889
1890	long gp_cnt = 0;
1891
1892	ZZ res, prod;
1893
1894	clear(res);
1895	set(prod);
1896
1897
1898	long i;
1899	for (i = 0; ; i++) {
1900	if (NumBits(prod) > bound)
1901	break;
1902
1903	if (!deterministic &&
1904	!instable && bound > 1000 && NumBits(prod) < 0.25*bound) {
1905
1906	ZZ P;
1907
1908
1909	long plen = 90 + NumBits(max(bound, NumBits(res)));
1910
1911	do {
1912	GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++));
1913	}
1914	while (divide(LeadCoeff(a), P) \|\| divide(LeadCoeff(b), P));
1915
1916	ZZ_p::init(P);
1917
1918	ZZ_pX A, B;
1919	conv(A, a);
1920	conv(B, b);
1921
1922	ZZ_p t;
1923	resultant(t, A, B);
1924
1925	if (CRT(res, prod, rep(t), P))
1926	instable = 1;
1927	else
1928	break;
1929	}
1930
1931
1932	zz_p::FFTInit(i);
1933	long p = zz_p::modulus();
1934	if (divide(LeadCoeff(a), p) \|\| divide(LeadCoeff(b), p))
1935	continue;
1936
1937	zz_pX A, B;
1938	conv(A, a);
1939	conv(B, b);
1940
1941	zz_p t;
1942	resultant(t, A, B);
1943
1944	instable = CRT(res, prod, rep(t), p);
1945	}
1946
1947	rres = res;
1948
1949	zbak.restore();
1950	Zbak.restore();
1951	}
1952
1953
1954
1955
1956	void MinPolyMod(ZZX& gg, const ZZX& a, const ZZX& f)
1957
1958	{
1959	if (!IsOne(LeadCoeff(f)) \|\| deg(f) < 1 \|\| deg(a) >= deg(f))
1960	Error("MinPolyMod: bad args");
1961
1962	if (IsZero(a)) {
1963	SetX(gg);
1964	return;
1965	}
1966
1967	ZZ_pBak Zbak;
1968	Zbak.save();
1969	zz_pBak zbak;
1970	zbak.save();
1971
1972	long n = deg(f);
1973
1974	long instable = 1;
1975
1976	long gp_cnt = 0;
1977
1978	ZZ prod;
1979	ZZX g;
1980
1981	clear(g);
1982	set(prod);
1983
1984	long bound = -1;
1985
1986	long i;
1987	for (i = 0; ; i++) {
1988	if (deg(g) == n) {
1989	if (bound < 0)
1990	bound = 2+CharPolyBound(a, f);
1991
1992	if (NumBits(prod) > bound)
1993	break;
1994	}
1995
1996	if (!instable &&
1997	(deg(g) < n \|\|
1998	(deg(g) == n && bound > 1000 && NumBits(prod) < 0.75*bound))) {
1999
2000	// guarantees 2^{-80} error probability
2001	long plen = 90 + max( 2*NumBits(n) + NumBits(MaxBits(f)),
2002	max( NumBits(n) + NumBits(MaxBits(a)),
2003	NumBits(MaxBits(g)) ));
2004
2005	ZZ P;
2006	GenPrime(P, plen, 90 + 2*NumBits(gp_cnt++));
2007	ZZ_p::init(P);
2008
2009
2010	ZZ_pX A, F, G;
2011	conv(A, a);
2012	conv(F, f);
2013	conv(G, g);
2014
2015	ZZ_pXModulus FF;
2016	build(FF, F);
2017
2018	ZZ_pX H;
2019	CompMod(H, G, A, FF);
2020
2021	if (IsZero(H))
2022	break;
2023
2024	instable = 1;
2025	}
2026
2027	zz_p::FFTInit(i);
2028
2029	zz_pX A, F;
2030	conv(A, a);
2031	conv(F, f);
2032
2033	zz_pXModulus FF;
2034	build(FF, F);
2035
2036	zz_pX G;
2037	MinPolyMod(G, A, FF);
2038
2039	if (deg(G) < deg(g))
2040	continue;
2041
2042	if (deg(G) > deg(g)) {
2043	clear(g);
2044	set(prod);
2045	}
2046
2047	instable = CRT(g, prod, G);
2048	}
2049
2050	gg = g;
2051
2052	Zbak.restore();
2053	zbak.restore();
2054	}
2055
2056
2057	void XGCD(ZZ& rr, ZZX& ss, ZZX& tt, const ZZX& a, const ZZX& b,
2058	long deterministic)
2059	{
2060	ZZ r;
2061
2062	resultant(r, a, b, deterministic);
2063
2064	if (IsZero(r)) {
2065	clear(rr);
2066	return;
2067	}
2068
2069	zz_pBak bak;
2070	bak.save();
2071
2072	long i;
2073	long instable = 1;
2074
2075	ZZ tmp;
2076	ZZ prod;
2077	ZZX s, t;
2078
2079	set(prod);
2080	clear(s);
2081	clear(t);
2082
2083	for (i = 0; ; i++) {
2084	zz_p::FFTInit(i);
2085	long p = zz_p::modulus();
2086
2087	if (divide(LeadCoeff(a), p) \|\| divide(LeadCoeff(b), p) \|\| divide(r, p))
2088	continue;
2089
2090	zz_p R;
2091	conv(R, r);
2092
2093	zz_pX D, S, T, A, B;
2094	conv(A, a);
2095	conv(B, b);
2096
2097	if (!instable) {
2098	conv(S, s);
2099	conv(T, t);
2100	zz_pX t1, t2;
2101	mul(t1, A, S);
2102	mul(t2, B, T);
2103	add(t1, t1, t2);
2104
2105	if (deg(t1) == 0 && ConstTerm(t1) == R)
2106	mul(prod, prod, p);
2107	else
2108	instable = 1;
2109	}
2110
2111	if (instable) {
2112	XGCD(D, S, T, A, B);
2113
2114	mul(S, S, R);
2115	mul(T, T, R);
2116
2117	tmp = prod;
2118	long Sinstable = CRT(s, tmp, S);
2119	long Tinstable = CRT(t, prod, T);
2120
2121	instable = Sinstable \|\| Tinstable;
2122	}
2123
2124	if (!instable) {
2125	long bound1 = NumBits(min(deg(a), deg(s)) + 1)
2126	+ MaxBits(a) + MaxBits(s);
2127	long bound2 = NumBits(min(deg(b), deg(t)) + 1)
2128	+ MaxBits(b) + MaxBits(t);
2129
2130	long bound = 4 + max(NumBits(r), max(bound1, bound2));
2131
2132	if (NumBits(prod) > bound)
2133	break;
2134	}
2135	}
2136
2137	rr = r;
2138	ss = s;
2139	tt = t;
2140
2141	bak.restore();
2142	}
2143
2144	void NormMod(ZZ& x, const ZZX& a, const ZZX& f, long deterministic)
2145	{
2146	if (!IsOne(LeadCoeff(f)) \|\| deg(a) >= deg(f) \|\| deg(f) <= 0)
2147	Error("norm: bad args");
2148
2149	if (IsZero(a)) {
2150	clear(x);
2151	return;
2152	}
2153
2154	resultant(x, f, a, deterministic);
2155	}
2156
2157	void TraceMod(ZZ& res, const ZZX& a, const ZZX& f)
2158	{
2159	if (!IsOne(LeadCoeff(f)) \|\| deg(a) >= deg(f) \|\| deg(f) <= 0)
2160	Error("trace: bad args");
2161
2162	vec_ZZ S;
2163
2164	TraceVec(S, f);
2165
2166	InnerProduct(res, S, a.rep);
2167	}
2168
2169
2170	void discriminant(ZZ& d, const ZZX& a, long deterministic)
2171	{
2172	long m = deg(a);
2173
2174	if (m < 0) {
2175	clear(d);
2176	return;
2177	}
2178
2179	ZZX a1;
2180	ZZ res;
2181
2182	diff(a1, a);
2183	resultant(res, a, a1, deterministic);
2184	if (!divide(res, res, LeadCoeff(a)))
2185	Error("discriminant: inexact division");
2186
2187	m = m & 3;
2188	if (m >= 2)
2189	negate(res, res);
2190
2191	d = res;
2192	}
2193
2194
2195	void MulMod(ZZX& x, const ZZX& a, const ZZX& b, const ZZX& f)
2196	{
2197	if (deg(a) >= deg(f) \|\| deg(b) >= deg(f) \|\| deg(f) == 0 \|\|
2198	!IsOne(LeadCoeff(f)))
2199	Error("MulMod: bad args");
2200
2201	ZZX t;
2202	mul(t, a, b);
2203	rem(x, t, f);
2204	}
2205
2206	void SqrMod(ZZX& x, const ZZX& a, const ZZX& f)
2207	{
2208	if (deg(a) >= deg(f) \|\| deg(f) == 0 \|\| !IsOne(LeadCoeff(f)))
2209	Error("MulMod: bad args");
2210
2211	ZZX t;
2212	sqr(t, a);
2213	rem(x, t, f);
2214	}
2215
2216
2217
2218	static
2219	void MulByXModAux(ZZX& h, const ZZX& a, const ZZX& f)
2220	{
2221	long i, n, m;
2222	ZZ* hh;
2223	const ZZ aa, ff;
2224
2225	ZZ t, z;
2226
2227
2228	n = deg(f);
2229	m = deg(a);
2230
2231	if (m >= n \|\| n == 0 \|\| !IsOne(LeadCoeff(f)))
2232	Error("MulByXMod: bad args");
2233
2234	if (m < 0) {
2235	clear(h);
2236	return;
2237	}
2238
2239	if (m < n-1) {
2240	h.rep.SetLength(m+2);
2241	hh = h.rep.elts();
2242	aa = a.rep.elts();
2243	for (i = m+1; i >= 1; i--)
2244	hh[i] = aa[i-1];
2245	clear(hh[0]);
2246	}
2247	else {
2248	h.rep.SetLength(n);
2249	hh = h.rep.elts();
2250	aa = a.rep.elts();
2251	ff = f.rep.elts();
2252	negate(z, aa[n-1]);
2253	for (i = n-1; i >= 1; i--) {
2254	mul(t, z, ff[i]);
2255	add(hh[i], aa[i-1], t);
2256	}
2257	mul(hh[0], z, ff[0]);
2258	h.normalize();
2259	}
2260	}
2261
2262	void MulByXMod(ZZX& h, const ZZX& a, const ZZX& f)
2263	{
2264	if (&h == &f) {
2265	ZZX hh;
2266	MulByXModAux(hh, a, f);
2267	h = hh;
2268	}
2269	else
2270	MulByXModAux(h, a, f);
2271	}
2272
2273	static
2274	void EuclLength1(ZZ& l, const ZZX& a)
2275	{
2276	long n = a.rep.length();
2277	long i;
2278
2279	ZZ sum, t;
2280
2281	clear(sum);
2282	for (i = 0; i < n; i++) {
2283	sqr(t, a.rep[i]);
2284	add(sum, sum, t);
2285	}
2286
2287	abs(t, ConstTerm(a));
2288	mul(t, t, 2);
2289	add(t, t, 1);
2290	add(sum, sum, t);
2291
2292	if (sum > 1) {
2293	SqrRoot(l, sum);
2294	add(l, l, 1);
2295	}
2296	else
2297	l = sum;
2298	}
2299
2300
2301	long CharPolyBound(const ZZX& a, const ZZX& f)
2302	// This computes a bound on the size of the
2303	// coefficients of the characterstic polynomial.
2304	// It uses the characterization of the char poly as
2305	// resultant_y(f(y), x-a(y)), and then interpolates this
2306	// through complex primimitive (deg(f)+1)-roots of unity.
2307
2308	{
2309	if (IsZero(a) \|\| IsZero(f))
2310	Error("CharPolyBound: bad args");
2311
2312	ZZ t1, t2, t;
2313	EuclLength1(t1, a);
2314	EuclLength(t2, f);
2315	power(t1, t1, deg(f));
2316	power(t2, t2, deg(a));
2317	mul(t, t1, t2);
2318	return NumBits(t);
2319	}
2320
2321
2322	void SetCoeff(ZZX& x, long i, long a)
2323	{
2324	if (a == 1)
2325	SetCoeff(x, i);
2326	else {
2327	static ZZ aa;
2328	conv(aa, a);
2329	SetCoeff(x, i, aa);
2330	}
2331	}
2332
2333	// vectors
2334
2335	NTL_vector_impl(ZZX,vec_ZZX)
2336
2337	NTL_eq_vector_impl(ZZX,vec_ZZX)
2338
2339
2340
2341	void CopyReverse(ZZX& x, const ZZX& a, long hi)
2342
2343	// x[0..hi] = reverse(a[0..hi]), with zero fill
2344	// input may not alias output
2345
2346	{
2347	long i, j, n, m;
2348
2349	n = hi+1;
2350	m = a.rep.length();
2351
2352	x.rep.SetLength(n);
2353
2354	const ZZ* ap = a.rep.elts();
2355	ZZ* xp = x.rep.elts();
2356
2357	for (i = 0; i < n; i++) {
2358	j = hi-i;
2359	if (j < 0 \|\| j >= m)
2360	clear(xp[i]);
2361	else
2362	xp[i] = ap[j];
2363	}
2364
2365	x.normalize();
2366	}
2367
2368	void reverse(ZZX& x, const ZZX& a, long hi)
2369	{
2370	if (hi < 0) { clear(x); return; }
2371	if (NTL_OVERFLOW(hi, 1, 0))
2372	Error("overflow in reverse");
2373
2374	if (&x == &a) {
2375	ZZX tmp;
2376	CopyReverse(tmp, a, hi);
2377	x = tmp;
2378	}
2379	else
2380	CopyReverse(x, a, hi);
2381	}
2382
2383	void MulTrunc(ZZX& x, const ZZX& a, const ZZX& b, long n)
2384	{
2385	ZZX t;
2386	mul(t, a, b);
2387	trunc(x, t, n);
2388	}
2389
2390	void SqrTrunc(ZZX& x, const ZZX& a, long n)
2391	{
2392	ZZX t;
2393	sqr(t, a);
2394	trunc(x, t, n);
2395	}
2396
2397
2398	void NewtonInvTrunc(ZZX& c, const ZZX& a, long e)
2399	{
2400	ZZ x;
2401
2402	if (ConstTerm(a) == 1)
2403	x = 1;
2404	else if (ConstTerm(a) == -1)
2405	x = -1;
2406	else
2407	Error("InvTrunc: non-invertible constant term");
2408
2409	if (e == 1) {
2410	conv(c, x);
2411	return;
2412	}
2413
2414	static vec_long E;
2415	E.SetLength(0);
2416	append(E, e);
2417	while (e > 1) {
2418	e = (e+1)/2;
2419	append(E, e);
2420	}
2421
2422	long L = E.length();
2423
2424	ZZX g, g0, g1, g2;
2425
2426
2427	g.rep.SetMaxLength(E[0]);
2428	g0.rep.SetMaxLength(E[0]);
2429	g1.rep.SetMaxLength((3*E[0]+1)/2);
2430	g2.rep.SetMaxLength(E[0]);
2431
2432	conv(g, x);
2433
2434	long i;
2435
2436	for (i = L-1; i > 0; i--) {
2437	// lift from E[i] to E[i-1]
2438
2439	long k = E[i];
2440	long l = E[i-1]-E[i];
2441
2442	trunc(g0, a, k+l);
2443
2444	mul(g1, g0, g);
2445	RightShift(g1, g1, k);
2446	trunc(g1, g1, l);
2447
2448	mul(g2, g1, g);
2449	trunc(g2, g2, l);
2450	LeftShift(g2, g2, k);
2451
2452	sub(g, g, g2);
2453	}
2454
2455	c = g;
2456	}
2457
2458
2459	void InvTrunc(ZZX& c, const ZZX& a, long e)
2460	{
2461	if (e < 0) Error("InvTrunc: bad args");
2462
2463	if (e == 0) {
2464	clear(c);
2465	return;
2466	}
2467
2468	if (NTL_OVERFLOW(e, 1, 0))
2469	Error("overflow in InvTrunc");
2470
2471	NewtonInvTrunc(c, a, e);
2472	}
2473
2474	NTL_END_IMPL

Note: See TracBrowser for help on using the repository browser.

Download in other formats: