1 | |
---|
2 | |
---|
3 | #include <stdio.h> |
---|
4 | |
---|
5 | #include <NTL/version.h> |
---|
6 | |
---|
7 | |
---|
8 | #if (defined(__GNUC__) && (defined(__i386__) || defined(__i486__) || defined(__i586__))) |
---|
9 | |
---|
10 | |
---|
11 | #define AutoFix (1) |
---|
12 | |
---|
13 | #else |
---|
14 | |
---|
15 | #define AutoFix (0) |
---|
16 | |
---|
17 | #endif |
---|
18 | |
---|
19 | |
---|
20 | long f1(void); |
---|
21 | long f2(long bpl); |
---|
22 | double f3(void); |
---|
23 | void f4(double *p); |
---|
24 | void f5(int *p); |
---|
25 | long f6(void); |
---|
26 | long f7(void); |
---|
27 | long f8(void); |
---|
28 | |
---|
29 | |
---|
30 | |
---|
31 | double power2(long k) |
---|
32 | { |
---|
33 | long i; |
---|
34 | double res; |
---|
35 | |
---|
36 | res = 1; |
---|
37 | |
---|
38 | for (i = 1; i <= k; i++) |
---|
39 | res = res * 2; |
---|
40 | |
---|
41 | return res; |
---|
42 | } |
---|
43 | |
---|
44 | long DoubleRounding(long dp) |
---|
45 | { |
---|
46 | double a = power2(dp-1) + 1; |
---|
47 | double b = (power2(dp-6)-1)/power2(dp-5); |
---|
48 | double x; |
---|
49 | x = a + b; |
---|
50 | f4(&x); |
---|
51 | |
---|
52 | if (x != power2(dp-1) + 1) |
---|
53 | return 1; |
---|
54 | else |
---|
55 | return 0; |
---|
56 | } |
---|
57 | |
---|
58 | |
---|
59 | long DoublePrecision() |
---|
60 | { |
---|
61 | long k; |
---|
62 | double l1 = (double)1; |
---|
63 | double lh = 1/(double)2; |
---|
64 | double epsilon; |
---|
65 | double fudge, oldfudge; |
---|
66 | |
---|
67 | epsilon = l1; |
---|
68 | fudge = l1+l1; |
---|
69 | |
---|
70 | k = 0; |
---|
71 | |
---|
72 | do { |
---|
73 | k++; |
---|
74 | epsilon = epsilon * lh; |
---|
75 | oldfudge = fudge; |
---|
76 | fudge = l1 + epsilon; |
---|
77 | f4(&fudge); |
---|
78 | f4(&oldfudge); |
---|
79 | } while (fudge > l1 && fudge < oldfudge); |
---|
80 | |
---|
81 | return k; |
---|
82 | } |
---|
83 | |
---|
84 | long DoublePrecision1() |
---|
85 | { |
---|
86 | register double fudge, oldfudge; |
---|
87 | |
---|
88 | long k; |
---|
89 | double l1 = (double)1; |
---|
90 | double lh = 1/(double)2; |
---|
91 | double epsilon; |
---|
92 | |
---|
93 | |
---|
94 | epsilon = l1; |
---|
95 | fudge = l1+l1; |
---|
96 | |
---|
97 | k = 0; |
---|
98 | |
---|
99 | do { |
---|
100 | k++; |
---|
101 | epsilon = epsilon * lh; |
---|
102 | oldfudge = fudge; |
---|
103 | fudge = l1 + epsilon; |
---|
104 | } while (fudge > l1 && fudge < oldfudge); |
---|
105 | |
---|
106 | return k; |
---|
107 | } |
---|
108 | |
---|
109 | union d_or_rep { |
---|
110 | double d; |
---|
111 | unsigned long rep[2]; |
---|
112 | }; |
---|
113 | |
---|
114 | long RepTest(void) |
---|
115 | { |
---|
116 | union d_or_rep v; |
---|
117 | |
---|
118 | v.rep[0] = v.rep[1] = 0; |
---|
119 | |
---|
120 | v.d = 565656565656.0; |
---|
121 | |
---|
122 | if (v.rep[0] == 0x42607678 && v.rep[1] == 0x46f30000) |
---|
123 | return 1; |
---|
124 | else if (v.rep[1] == 0x42607678 && v.rep[0] == 0x46f30000) |
---|
125 | return -1; |
---|
126 | else |
---|
127 | return 0; |
---|
128 | } |
---|
129 | |
---|
130 | void print2k(FILE *f, long k, long bpl) |
---|
131 | { |
---|
132 | long m, l; |
---|
133 | long first; |
---|
134 | |
---|
135 | if (k <= 0) { |
---|
136 | fprintf(f, "((double) 1.0)"); |
---|
137 | return; |
---|
138 | } |
---|
139 | |
---|
140 | m = bpl - 2; |
---|
141 | first = 1; |
---|
142 | |
---|
143 | fprintf(f, "("); |
---|
144 | |
---|
145 | while (k > 0) { |
---|
146 | if (k > m) |
---|
147 | l = m; |
---|
148 | else |
---|
149 | l = k; |
---|
150 | |
---|
151 | k = k - l; |
---|
152 | |
---|
153 | if (first) |
---|
154 | first = 0; |
---|
155 | else |
---|
156 | fprintf(f, "*"); |
---|
157 | |
---|
158 | fprintf(f, "((double)(1L<<%ld))", l); |
---|
159 | } |
---|
160 | |
---|
161 | fprintf(f, ")"); |
---|
162 | } |
---|
163 | |
---|
164 | |
---|
165 | void print_BB_mul_code(FILE *f, long n) |
---|
166 | { |
---|
167 | long i; |
---|
168 | |
---|
169 | fprintf(f, "\n\n"); |
---|
170 | |
---|
171 | fprintf(f, "#define NTL_BB_MUL_CODE \\\n"); |
---|
172 | |
---|
173 | for (i = n-6; i >= 2; i -= 2) { |
---|
174 | fprintf(f, "hi=(hi<<2)|(lo>>%ld); ", n-2); |
---|
175 | fprintf(f, "lo=(lo<<2)^A[(b>>%ld)&3];\\\n", i); |
---|
176 | } |
---|
177 | |
---|
178 | fprintf(f, "\n\n"); |
---|
179 | } |
---|
180 | |
---|
181 | void print_BB_half_mul_code(FILE *f, long n) |
---|
182 | { |
---|
183 | long i; |
---|
184 | |
---|
185 | fprintf(f, "\n\n"); |
---|
186 | |
---|
187 | fprintf(f, "#define NTL_BB_HALF_MUL_CODE \\\n"); |
---|
188 | |
---|
189 | for (i = n/2-6; i >= 2; i -= 2) { |
---|
190 | fprintf(f, "hi=(hi<<2)|(lo>>%ld); ", n-2); |
---|
191 | fprintf(f, "lo=(lo<<2)^A[(b>>%ld)&3];\\\n", i); |
---|
192 | } |
---|
193 | |
---|
194 | fprintf(f, "\n\n"); |
---|
195 | } |
---|
196 | |
---|
197 | |
---|
198 | void print_BB_sqr_code(FILE *f, long n) |
---|
199 | { |
---|
200 | long i, pos; |
---|
201 | |
---|
202 | fprintf(f, "\n\n"); |
---|
203 | fprintf(f, "#define NTL_BB_SQR_CODE \\\n"); |
---|
204 | fprintf(f, "lo=sqrtab[a&255];\\\n"); |
---|
205 | pos = 16; |
---|
206 | |
---|
207 | for (i = 8; i < n; i += 8) { |
---|
208 | if (2*(i+8) <= n) { |
---|
209 | fprintf(f, "lo=lo|(sqrtab[(a>>%ld)&255]<<%ld);\\\n", i, pos); |
---|
210 | pos += 16; |
---|
211 | } |
---|
212 | else if (2*i == n) { |
---|
213 | fprintf(f, "hi=sqrtab[(a>>%ld)&255];\\\n", i); |
---|
214 | pos = 16; |
---|
215 | } |
---|
216 | else if (2*i > n) { |
---|
217 | fprintf(f, "hi=hi|(sqrtab[(a>>%ld)&255]<<%ld);\\\n", i, pos); |
---|
218 | pos += 16; |
---|
219 | } |
---|
220 | else { /* only applies if word size is not a multiple of 16 */ |
---|
221 | fprintf(f, "_ntl_ulong t=sqrtab[(a>>%ld)&255];\\\n", i); |
---|
222 | fprintf(f, "lo=lo|(t<<%ld);\\\n", pos); |
---|
223 | fprintf(f, "hi=t>>%ld;\\\n", n-8); |
---|
224 | pos = 8; |
---|
225 | } |
---|
226 | } |
---|
227 | |
---|
228 | fprintf(f, "\n\n"); |
---|
229 | } |
---|
230 | |
---|
231 | |
---|
232 | void print_BB_rev_code(FILE *f, long n) |
---|
233 | { |
---|
234 | long i; |
---|
235 | |
---|
236 | fprintf(f, "\n\n"); |
---|
237 | fprintf(f, "#define NTL_BB_REV_CODE "); |
---|
238 | |
---|
239 | for (i = 0; i < n; i += 8) { |
---|
240 | if (i != 0) fprintf(f, "\\\n|"); |
---|
241 | fprintf(f, "(revtab[(a>>%ld)&255]<<%ld)", i, n-8-i); |
---|
242 | } |
---|
243 | |
---|
244 | fprintf(f, "\n\n"); |
---|
245 | } |
---|
246 | |
---|
247 | |
---|
248 | |
---|
249 | |
---|
250 | char *yn_vec[2] = { "no", "yes" }; |
---|
251 | |
---|
252 | |
---|
253 | |
---|
254 | int main() |
---|
255 | { |
---|
256 | long bpl, bpi, rs_arith, nbits, single_mul_ok; |
---|
257 | long a; |
---|
258 | long x; |
---|
259 | long dp, dp1, dr; |
---|
260 | FILE *f; |
---|
261 | int xx; |
---|
262 | long warnings = 0; |
---|
263 | int c; |
---|
264 | |
---|
265 | fprintf(stderr, "This is NTL version %s\n\n", NTL_VERSION); |
---|
266 | |
---|
267 | |
---|
268 | x = f1(); |
---|
269 | if (~x != f8()) { |
---|
270 | fprintf(stderr, "BAD NEWS: machine must be 2's compliment.\n"); |
---|
271 | return 1; |
---|
272 | } |
---|
273 | |
---|
274 | if ((x >> 1) == x) |
---|
275 | rs_arith = 1; |
---|
276 | else |
---|
277 | rs_arith = 0; |
---|
278 | |
---|
279 | bpl = 0; |
---|
280 | while (x != 0) { |
---|
281 | x = x << 1; |
---|
282 | bpl++; |
---|
283 | } |
---|
284 | |
---|
285 | bpi = 0; |
---|
286 | xx = 1; |
---|
287 | while (xx) { |
---|
288 | xx = xx << 1; |
---|
289 | bpi++; |
---|
290 | f5(&xx); |
---|
291 | } |
---|
292 | |
---|
293 | a = f2(bpl); |
---|
294 | if (2*a != f7() || a*a != f6()) { |
---|
295 | fprintf(stderr, "BAD NEWS: machine must work modulo 2^wordsize.\n"); |
---|
296 | return 1; |
---|
297 | } |
---|
298 | |
---|
299 | if (((long)f3()) != 1.0) { |
---|
300 | fprintf(stderr, "BAD NEWS: machine must truncate floating point.\n"); |
---|
301 | return 1; |
---|
302 | } |
---|
303 | |
---|
304 | if (bpl & 7) { |
---|
305 | fprintf(stderr, "BAD NEWS: word size must be multiple of 8 bits.\n"); |
---|
306 | return 1; |
---|
307 | } |
---|
308 | |
---|
309 | if (bpl < 32) { |
---|
310 | fprintf(stderr, "BAD NEWS: word size must be at least 32 bits.\n"); |
---|
311 | return 1; |
---|
312 | } |
---|
313 | |
---|
314 | |
---|
315 | dp = DoublePrecision(); |
---|
316 | dp1 = DoublePrecision1(); |
---|
317 | dr = DoubleRounding(dp); |
---|
318 | |
---|
319 | if (bpl-2 < dp-3) |
---|
320 | nbits = bpl-2; |
---|
321 | else |
---|
322 | nbits = dp-3; |
---|
323 | |
---|
324 | if (nbits & 1) nbits--; |
---|
325 | |
---|
326 | if (nbits < 30) { |
---|
327 | fprintf(stderr, "BAD NEWS: NBITS too small.\n"); |
---|
328 | return 1; |
---|
329 | } |
---|
330 | |
---|
331 | single_mul_ok = RepTest(); |
---|
332 | |
---|
333 | fprintf(stderr, "GOOD NEWS: compatible machine.\n"); |
---|
334 | fprintf(stderr, "summary of machine characteristics:\n"); |
---|
335 | fprintf(stderr, "bits per long = %ld\n", bpl); |
---|
336 | fprintf(stderr, "bits per int = %ld\n", bpi); |
---|
337 | fprintf(stderr, "arith right shift = %s\n", yn_vec[rs_arith]); |
---|
338 | fprintf(stderr, "double precision = %ld\n", dp); |
---|
339 | fprintf(stderr, "NBITS (maximum) = %ld\n", nbits); |
---|
340 | fprintf(stderr, "single mul ok = %s\n", yn_vec[single_mul_ok != 0]); |
---|
341 | fprintf(stderr, "extended doubles = %s\n", yn_vec[dp1 > dp]); |
---|
342 | fprintf(stderr, "double rounding = %s\n", yn_vec[dr]); |
---|
343 | |
---|
344 | if (dp1 > dp && AutoFix) |
---|
345 | fprintf(stderr, "-- auto x86 fix\n"); |
---|
346 | |
---|
347 | if (dp != 53) { |
---|
348 | warnings = 1; |
---|
349 | |
---|
350 | fprintf(stderr, "\n\nWARNING:\n\n"); |
---|
351 | fprintf(stderr, "Nonstandard floating point precision.\n"); |
---|
352 | fprintf(stderr, "IEEE standard is 53 bits.\n"); |
---|
353 | } |
---|
354 | |
---|
355 | #if (defined(__sparc__) && !defined(__sparc_v8__)) |
---|
356 | |
---|
357 | warnings = 1; |
---|
358 | |
---|
359 | fprintf(stderr, "\n\nWARNING:\n\n"); |
---|
360 | fprintf(stderr, "If this Sparc is a Sparc-10 or later (so it has\n"); |
---|
361 | fprintf(stderr, "a hardware integer multiply instruction) you\n"); |
---|
362 | fprintf(stderr, "should specify the -mv8 option in the makefile\n"); |
---|
363 | fprintf(stderr, "to obtain more efficient code.\n"); |
---|
364 | |
---|
365 | #endif |
---|
366 | |
---|
367 | if (dp1 > dp && !AutoFix) { |
---|
368 | warnings = 1; |
---|
369 | fprintf(stderr, "\n\nWARNING:\n\n"); |
---|
370 | fprintf(stderr, "This platform has extended double precision registers.\n"); |
---|
371 | fprintf(stderr, "While that may sound like a good thing, it actually is not.\n"); |
---|
372 | fprintf(stderr, "If this is a Pentium or other x86 and your compiler\n"); |
---|
373 | fprintf(stderr, "is g++ or supports GNU 'asm' constructs, it is recommended\n"); |
---|
374 | fprintf(stderr, "to compile NTL with the NTL_X86_FIX flag to get true IEEE floating point.\n"); |
---|
375 | fprintf(stderr, "Set this flag by editing the file config.h.\n"); |
---|
376 | fprintf(stderr, "The code should still work even if you don't set\n"); |
---|
377 | fprintf(stderr, "this flag. See quad_float.txt for details.\n\n"); |
---|
378 | } |
---|
379 | |
---|
380 | if (dp1 <= dp && dr) { |
---|
381 | warnings = 1; |
---|
382 | fprintf(stderr, "\n\nWARNING:\n\n"); |
---|
383 | fprintf(stderr, "Hmm....your machine double rounds, but the measured precision\n"); |
---|
384 | fprintf(stderr, "does not reflect this.\n"); |
---|
385 | fprintf(stderr, "Make sure you have optimizations turned on.\n\n"); |
---|
386 | } |
---|
387 | |
---|
388 | #if 0 |
---|
389 | |
---|
390 | /* better not to be interactive */ |
---|
391 | |
---|
392 | if (warnings) { |
---|
393 | fprintf(stderr, "Do you want to continue anyway[y/n]? "); |
---|
394 | c = getchar(); |
---|
395 | if (c == 'n' || c == 'N') { |
---|
396 | fprintf(stderr, "Make the necessary changes to the makefile and/or config.h,\n"); |
---|
397 | fprintf(stderr, "then type 'make clobber' and then 'make'.\n\n\n"); |
---|
398 | return 1; |
---|
399 | } |
---|
400 | } |
---|
401 | |
---|
402 | #endif |
---|
403 | |
---|
404 | f = fopen("mach_desc.h", "w"); |
---|
405 | if (!f) { |
---|
406 | fprintf(stderr, "can't open mach_desc.h for writing\n"); |
---|
407 | return 1; |
---|
408 | } |
---|
409 | |
---|
410 | fprintf(f, "#ifndef NTL_mach_desc__H\n"); |
---|
411 | fprintf(f, "#define NTL_mach_desc__H\n\n\n"); |
---|
412 | fprintf(f, "#define NTL_BITS_PER_LONG (%ld)\n", bpl); |
---|
413 | fprintf(f, "#define NTL_MAX_LONG (%ldL)\n", ((long) ((1UL<<(bpl-1))-1UL))); |
---|
414 | fprintf(f, "#define NTL_MAX_INT (%ld)\n", ((long) ((1UL<<(bpi-1))-1UL))); |
---|
415 | fprintf(f, "#define NTL_BITS_PER_INT (%ld)\n", bpi); |
---|
416 | fprintf(f, "#define NTL_ARITH_RIGHT_SHIFT (%ld)\n", rs_arith); |
---|
417 | fprintf(f, "#define NTL_NBITS_MAX (%ld)\n", nbits); |
---|
418 | fprintf(f, "#define NTL_DOUBLE_PRECISION (%ld)\n", dp); |
---|
419 | fprintf(f, "#define NTL_FDOUBLE_PRECISION "); |
---|
420 | print2k(f, dp-1, bpl); |
---|
421 | fprintf(f, "\n"); |
---|
422 | fprintf(f, "#define NTL_QUAD_FLOAT_SPLIT ("); |
---|
423 | print2k(f, dp - (dp/2), bpl); |
---|
424 | fprintf(f, "+1.0)\n"); |
---|
425 | fprintf(f, "#define NTL_EXT_DOUBLE (%d)\n", dp1 > dp); |
---|
426 | fprintf(f, "#define NTL_SINGLE_MUL_OK (%d)\n", single_mul_ok != 0); |
---|
427 | fprintf(f, "#define NTL_DOUBLES_LOW_HIGH (%d)\n\n\n", single_mul_ok < 0); |
---|
428 | print_BB_mul_code(f, bpl); |
---|
429 | print_BB_half_mul_code(f, bpl); |
---|
430 | print_BB_sqr_code(f, bpl); |
---|
431 | print_BB_rev_code(f, bpl); |
---|
432 | |
---|
433 | fprintf(f, "#define NTL_MIN_LONG (-NTL_MAX_LONG - 1L)\n"); |
---|
434 | fprintf(f, "#define NTL_MIN_INT (-NTL_MAX_INT - 1)\n"); |
---|
435 | |
---|
436 | fprintf(f, "#endif\n\n"); |
---|
437 | |
---|
438 | fclose(f); |
---|
439 | |
---|
440 | fprintf(stderr, "\n\n"); |
---|
441 | |
---|
442 | return 0; |
---|
443 | } |
---|