1 | /* |
---|
2 | * gfanlib_circuittabletypeint.h |
---|
3 | * |
---|
4 | * Created on: Apr 10, 2016 |
---|
5 | * Author: anders |
---|
6 | */ |
---|
7 | |
---|
8 | #ifndef GFANLIB_CIRCUITTABLEINT_H_ |
---|
9 | #define GFANLIB_CIRCUITTABLEINT_H_ |
---|
10 | |
---|
11 | #include <cstdint> |
---|
12 | #include <exception> |
---|
13 | #include <sstream> |
---|
14 | #include <assert.h> |
---|
15 | |
---|
16 | namespace gfan{ |
---|
17 | |
---|
18 | class MVMachineIntegerOverflow: public std::exception |
---|
19 | { |
---|
20 | virtual const char* what() const throw() |
---|
21 | { |
---|
22 | return "Exception: Overflow (or possible future overflow) in 32/64 bit integers in tropical homotopy."; |
---|
23 | } |
---|
24 | }; |
---|
25 | |
---|
26 | extern MVMachineIntegerOverflow MVMachineIntegerOverflow; |
---|
27 | |
---|
28 | /* |
---|
29 | * The philosophy here is that if this class overflows, then the computation needs to be restarted. Therefore |
---|
30 | * all overflows must be caught. |
---|
31 | */ |
---|
32 | class CircuitTableInt32{ |
---|
33 | public: |
---|
34 | class Divisor{ |
---|
35 | public: |
---|
36 | int32_t v; |
---|
37 | int shift; |
---|
38 | int32_t multiplicativeInverse; |
---|
39 | Divisor(CircuitTableInt32 const &a)// for exact division |
---|
40 | { // A good place to read about these tricks seems to be the book "Hacker's Delight" by Warren. |
---|
41 | v=a.v; |
---|
42 | shift=0; |
---|
43 | int32_t t=v; |
---|
44 | // uint32_t t=v; |
---|
45 | assert(t); |
---|
46 | while(!(t&1)){t>>= 1;shift++;} |
---|
47 | uint32_t inverse=t; |
---|
48 | while(t*inverse!=1)inverse*=2-t*inverse; |
---|
49 | multiplicativeInverse=inverse; |
---|
50 | } |
---|
51 | }; |
---|
52 | class Double{ |
---|
53 | public: |
---|
54 | int64_t v; |
---|
55 | Double():v(0){}; |
---|
56 | Double(int64_t a):v(a){}; |
---|
57 | Double &operator+=(Double a){v+=a.v;return *this;} |
---|
58 | Double &operator-=(Double a){v-=a.v;return *this;} |
---|
59 | CircuitTableInt32 castToSingle()const; |
---|
60 | bool isZero()const{return v==0;} |
---|
61 | bool isNegative()const{return v<0;} |
---|
62 | Double operator-()const{Double ret;ret.v=-v;return ret;} |
---|
63 | friend Double operator-(Double const &a, Double const &b){return Double(a.v-b.v);} |
---|
64 | friend Double operator+(Double const &a, Double const &b){return Double(a.v+b.v);} |
---|
65 | Double &addWithOverflowCheck(Double const &a) |
---|
66 | { |
---|
67 | if(a.v<0 || v<0 || a.v>1000000000000000000 || v>1000000000000000000) throw MVMachineIntegerOverflow; |
---|
68 | v+=a.v; |
---|
69 | return *this; |
---|
70 | } |
---|
71 | std::string toString()const{std::stringstream s;s<<v;return s.str();} |
---|
72 | }; |
---|
73 | private: |
---|
74 | public: |
---|
75 | int32_t v; |
---|
76 | friend CircuitTableInt32 operator+(CircuitTableInt32 const &a, CircuitTableInt32 const &b){return CircuitTableInt32(a.v+b.v);} |
---|
77 | friend CircuitTableInt32 operator-(CircuitTableInt32 const &a, CircuitTableInt32 const &b){return CircuitTableInt32(a.v-b.v);} |
---|
78 | friend CircuitTableInt32 operator*(CircuitTableInt32 const &a, CircuitTableInt32 const &b){return CircuitTableInt32(a.v*b.v);} |
---|
79 | public: |
---|
80 | // In the code products of CircuitTableInt32 objects are summed. To avoid overflows one of the factors must "fit in half" and the "number of summands" may not exceed a certain number. The bounds are specified in the following functions: |
---|
81 | bool fitsInHalf()const{return v>-30000 && v<30000;}// Is it better to allow only non-negative numbers? |
---|
82 | static bool isSuitableSummationNumber(int numberOfSummands){return numberOfSummands<30000;} |
---|
83 | CircuitTableInt32(){v=0;}; |
---|
84 | CircuitTableInt32(CircuitTableInt32 const &m):v(m.v){} |
---|
85 | CircuitTableInt32(int32_t val):v(val){}; |
---|
86 | class CircuitTableInt32 &operator=(int32_t a){v=a;return *this;} |
---|
87 | CircuitTableInt32 operator-()const{CircuitTableInt32 ret;ret.v=-v;return ret;} |
---|
88 | CircuitTableInt32 &operator-=(CircuitTableInt32 a){v-=a.v;return *this;} |
---|
89 | CircuitTableInt32 &operator+=(CircuitTableInt32 a){v+=a.v;return *this;} |
---|
90 | CircuitTableInt32 &operator*=(CircuitTableInt32 a){v*=a.v;return *this;} |
---|
91 | friend bool operator<=(CircuitTableInt32 const &a, CircuitTableInt32 const &b){return a.v<=b.v;} |
---|
92 | friend bool operator==(CircuitTableInt32 const &a, CircuitTableInt32 const &b){return a.v==b.v;} |
---|
93 | bool isZero()const{return v==0;} |
---|
94 | bool isOne()const{return v==1;} |
---|
95 | bool isNonZero()const{return v!=0;} |
---|
96 | bool isNegative()const{return v<0;} |
---|
97 | bool isPositive()const{return v>0;} |
---|
98 | friend int determinantSign1(CircuitTableInt32 const &a, CircuitTableInt32 const &c, CircuitTableInt32 const &b, CircuitTableInt32 const &d)//NOTICE MIXED ORDER. MUST WORK BY EXTENDING |
---|
99 | { |
---|
100 | int64_t r=((int64_t)a.v)*((int64_t)c.v)-((int64_t)b.v)*((int64_t)d.v); |
---|
101 | if(r>0)return 1; |
---|
102 | if(r<0)return -1; |
---|
103 | return 0; |
---|
104 | } |
---|
105 | friend int determinantSign2(CircuitTableInt32::Double const &a, CircuitTableInt32 const &c, CircuitTableInt32::Double const &b, CircuitTableInt32 const &d)//NOTICE MIXED ORDER. MUST WORK BY EXTENDING |
---|
106 | { |
---|
107 | int64_t r1=(a.v)*((int64_t)c.v); |
---|
108 | int64_t r2=(b.v)*((int64_t)d.v); |
---|
109 | if(r1>r2)return 1; |
---|
110 | if(r1<r2)return -1; |
---|
111 | return 0; |
---|
112 | } |
---|
113 | std::string toString()const{std::stringstream s;s<<v;return s.str();} |
---|
114 | friend std::ostream &operator<<(std::ostream &f, CircuitTableInt32 const &a){f<<(int)a.v;return f;} |
---|
115 | Double extend()const{Double ret;ret.v=v;return ret;} |
---|
116 | CircuitTableInt32 &maddWithOverflowChecking(CircuitTableInt32 const &a, CircuitTableInt32 const&b){Double t=this->extend();t+=extendedMultiplication(a,b);*this=t.castToSingle();return *this;} |
---|
117 | CircuitTableInt32 &msubWithOverflowChecking(CircuitTableInt32 const &a, CircuitTableInt32 const&b){Double s=this->extend();s-=extendedMultiplication(a,b);*this=s.castToSingle();return *this;} |
---|
118 | CircuitTableInt32 &subWithOverflowChecking(CircuitTableInt32 const &a){Double t=this->extend();t-=a.extend();*this=t.castToSingle();return *this;} |
---|
119 | CircuitTableInt32 &addWithOverflowChecking(CircuitTableInt32 const &a){Double t=this->extend();t+=a.extend();*this=t.castToSingle();return *this;} |
---|
120 | CircuitTableInt32 negatedWithOverflowChecking()const{return (-extend()).castToSingle();} |
---|
121 | friend Double extendedMultiplication(CircuitTableInt32 const &a, CircuitTableInt32 const &b){Double ret;ret.v=((int64_t)a.v)*((int64_t)b.v);return ret;} |
---|
122 | friend Double extendedMultiplication(CircuitTableInt32 const &a, int32_t b){Double ret;ret.v=((int64_t)a.v)*((int64_t)b);return ret;}//to be removed? |
---|
123 | friend CircuitTableInt32 min(CircuitTableInt32 const &a, CircuitTableInt32 const &b){return (a.v>=b.v)?b:a;} |
---|
124 | friend CircuitTableInt32 max(CircuitTableInt32 const &a, CircuitTableInt32 const &b){return (a.v<=b.v)?b:a;} |
---|
125 | friend CircuitTableInt32 negabs(CircuitTableInt32 const &a){return min(a,-a);} |
---|
126 | friend CircuitTableInt32 dotDiv(CircuitTableInt32 const &s, CircuitTableInt32 const &a, CircuitTableInt32 const &t, CircuitTableInt32 const &b, CircuitTableInt32::Divisor const &q) |
---|
127 | { |
---|
128 | return CircuitTableInt32(((((int32_t)(((uint64_t)(((int64_t)s.v)*((int64_t)a.v)+((int64_t)t.v)*((int64_t)b.v)))>>q.shift)))*q.multiplicativeInverse)); |
---|
129 | } |
---|
130 | friend void dotDivAssign(CircuitTableInt32 &s, CircuitTableInt32 const &a, CircuitTableInt32 const &t, CircuitTableInt32 const &b, CircuitTableInt32::Divisor const &q)//as above but assigning to first argument. This helps the vectorizer. |
---|
131 | { |
---|
132 | s.v=((((int32_t)(((uint64_t)(((int64_t)s.v)*((int64_t)a.v)+((int64_t)t.v)*((int64_t)b.v)))>>q.shift)))*q.multiplicativeInverse); |
---|
133 | } |
---|
134 | static int MIN(int32_t a, int32_t b) |
---|
135 | { |
---|
136 | return (a<b)?a:b; |
---|
137 | } |
---|
138 | static int MAX(int32_t a, int32_t b) |
---|
139 | { |
---|
140 | return (a>b)?a:b; |
---|
141 | } |
---|
142 | static CircuitTableInt32 computeNegativeBound(CircuitTableInt32 * __restrict__ Ai, int w) |
---|
143 | { |
---|
144 | CircuitTableInt32 M=0; |
---|
145 | CircuitTableInt32 m=0; |
---|
146 | for(int j=0;j<w;j++) |
---|
147 | { |
---|
148 | m.v=MIN(m.v,Ai[j].v); |
---|
149 | M.v=MAX(M.v,Ai[j].v); |
---|
150 | } |
---|
151 | return min(m,-M); |
---|
152 | } |
---|
153 | static CircuitTableInt32 quickNoShiftBounded(CircuitTableInt32 * __restrict__ a, CircuitTableInt32 * __restrict__ b, CircuitTableInt32 s, CircuitTableInt32 t, CircuitTableInt32::Divisor denominatorDivisor,int c) |
---|
154 | { |
---|
155 | CircuitTableInt32 *aa=a; |
---|
156 | CircuitTableInt32 *bb=b; |
---|
157 | CircuitTableInt32 max=0; |
---|
158 | CircuitTableInt32 min=0; |
---|
159 | CircuitTableInt32 ret=0; |
---|
160 | for(int i=0;i<c;i++) |
---|
161 | { |
---|
162 | aa[i].v=((s.v*denominatorDivisor.multiplicativeInverse)*aa[i].v+ |
---|
163 | (t.v*denominatorDivisor.multiplicativeInverse)*bb[i].v); |
---|
164 | min.v=MIN(min.v,aa[i].v); |
---|
165 | max.v=MAX(max.v,aa[i].v); |
---|
166 | } |
---|
167 | if(-max<=min)min=-max; |
---|
168 | ret=min; |
---|
169 | return ret; |
---|
170 | } |
---|
171 | static CircuitTableInt32 dotDivVector(CircuitTableInt32 * __restrict__ aa, CircuitTableInt32 * __restrict__ bb, CircuitTableInt32 s, CircuitTableInt32 t, CircuitTableInt32::Divisor denominatorDivisor,int c, CircuitTableInt32 boundA, CircuitTableInt32 boundB)__attribute__ ((always_inline))//assigning to first argument. The return negative of the return value is an upper bound on the absolute values of the produced entries |
---|
172 | { |
---|
173 | while(((s.v|t.v)&1)==0 && denominatorDivisor.shift>0){s.v>>=1;t.v>>=1;denominatorDivisor.shift--;denominatorDivisor.v>>=1;} |
---|
174 | |
---|
175 | CircuitTableInt32 max=0; |
---|
176 | CircuitTableInt32 min=0; |
---|
177 | |
---|
178 | // This is a bound on the absolute value of the sums of products before dividing by the denominator |
---|
179 | // The calculation can overflow, but since we are casting to unsigned long, this is not a problem. |
---|
180 | uint64_t positiveResultBoundTimesD=(negabs(t).v*((int64_t)boundA.v)+negabs(s).v*((int64_t)boundB.v)); |
---|
181 | |
---|
182 | /* The first case is the case where we know that the intermediate results fit in 32bit before shifting down. |
---|
183 | * In other words, the shifted-in bits of the results are equal. |
---|
184 | * In that case the intermediate result can be computed with 32bits. |
---|
185 | */ |
---|
186 | if(positiveResultBoundTimesD<((((int64_t)0x40000000)*denominatorDivisor.v)>>denominatorDivisor.shift))//check this carefully |
---|
187 | {//FAST VERSION |
---|
188 | // debug<<s.v<<" "<<t.v<<" "<<denominatorDivisor.v<<" "<<denominatorDivisor.shift<<"\n"; |
---|
189 | if(denominatorDivisor.shift==0)return quickNoShiftBounded(aa,bb,s,t,denominatorDivisor,c); |
---|
190 | for(int i=0;i<c;i++) |
---|
191 | { |
---|
192 | aa[i].v=((s.v*denominatorDivisor.multiplicativeInverse)*aa[i].v+ |
---|
193 | (t.v*denominatorDivisor.multiplicativeInverse)*bb[i].v)>>denominatorDivisor.shift; |
---|
194 | min.v=MIN(min.v,aa[i].v); |
---|
195 | max.v=MAX(max.v,aa[i].v); |
---|
196 | } |
---|
197 | if(-max<=min)min=-max; |
---|
198 | return min; |
---|
199 | } |
---|
200 | else |
---|
201 | { |
---|
202 | /* The next case would be to check if the result is known to fit in 32bit. |
---|
203 | * In that case intermediate results would be handled in 64 bit, |
---|
204 | * but the final result would not have to be checked for 32 bit overflows. |
---|
205 | */ |
---|
206 | if(positiveResultBoundTimesD<((((int64_t)0x40000000)*denominatorDivisor.v))) |
---|
207 | { |
---|
208 | for(int i=0;i<c;i++) |
---|
209 | { |
---|
210 | aa[i].v=((((int32_t)(((uint64_t)(((int64_t)s.v)*((int64_t)aa[i].v)+((int64_t)t.v)*((int64_t)bb[i].v)))>>denominatorDivisor.shift)))*denominatorDivisor.multiplicativeInverse); |
---|
211 | if(max<=aa[i])max=aa[i]; |
---|
212 | if(aa[i]<=min)min=aa[i]; |
---|
213 | } |
---|
214 | if(-max<=min)min=-max; |
---|
215 | } |
---|
216 | /* The last case would be to where we don't know that the results will fit in 32 bit. |
---|
217 | * Since we need to compute max and min anyway, we can compute these quantities in 64bit |
---|
218 | * and just check if they fit in 32bit at the end. |
---|
219 | */ |
---|
220 | else |
---|
221 | { |
---|
222 | bool doesOverflow=(((uint32_t)t.v)==0x80000000); |
---|
223 | int64_t min64=0; |
---|
224 | int64_t max64=0; |
---|
225 | for(int i=0;i<c;i++) |
---|
226 | { |
---|
227 | // In the unlikely event that all the numbers to be multiplied are -2^31, the following code will overflow. Therefore the overflow flag was set as above. |
---|
228 | int64_t temp=(((int64_t)s.v)*((int64_t)aa[i].v)+((int64_t)t.v)*((int64_t)bb[i].v))/denominatorDivisor.v; |
---|
229 | if(max64<=temp)max64=temp; |
---|
230 | if(temp<=min64)min64=temp; |
---|
231 | aa[i].v=temp; |
---|
232 | } |
---|
233 | if(-max64<=min64)min64=-max64; |
---|
234 | if(min64<=-0x7fffffff)doesOverflow=true; |
---|
235 | if(doesOverflow)throw MVMachineIntegerOverflow; |
---|
236 | min=min64; |
---|
237 | } |
---|
238 | } |
---|
239 | return min; |
---|
240 | } |
---|
241 | }; |
---|
242 | |
---|
243 | inline CircuitTableInt32 CircuitTableInt32::Double::castToSingle()const//casts and checks precission |
---|
244 | { |
---|
245 | if(v>=0x7fffffff || -v>=0x7fffffff) throw MVMachineIntegerOverflow; |
---|
246 | return CircuitTableInt32(v); |
---|
247 | } |
---|
248 | } |
---|
249 | |
---|
250 | |
---|
251 | #endif /* GFANLIB_CIRCUITTABLETYPEINT_H_ */ |
---|