1 | /************************************************************************* |
---|
2 | Copyright (c) 1992-2007 The University of Tennessee. All rights reserved. |
---|
3 | |
---|
4 | Contributors: |
---|
5 | * Sergey Bochkanov (ALGLIB project). Translation from FORTRAN to |
---|
6 | pseudocode. |
---|
7 | |
---|
8 | See subroutines comments for additional copyrights. |
---|
9 | |
---|
10 | Redistribution and use in source and binary forms, with or without |
---|
11 | modification, are permitted provided that the following conditions are |
---|
12 | met: |
---|
13 | |
---|
14 | - Redistributions of source code must retain the above copyright |
---|
15 | notice, this list of conditions and the following disclaimer. |
---|
16 | |
---|
17 | - Redistributions in binary form must reproduce the above copyright |
---|
18 | notice, this list of conditions and the following disclaimer listed |
---|
19 | in this license in the documentation and/or other materials |
---|
20 | provided with the distribution. |
---|
21 | |
---|
22 | - Neither the name of the copyright holders nor the names of its |
---|
23 | contributors may be used to endorse or promote products derived from |
---|
24 | this software without specific prior written permission. |
---|
25 | |
---|
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
---|
27 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
---|
28 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
---|
29 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
---|
30 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
31 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
---|
32 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
---|
33 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
---|
34 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
---|
35 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
---|
36 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
37 | *************************************************************************/ |
---|
38 | |
---|
39 | #ifndef _bdsvd_h |
---|
40 | #define _bdsvd_h |
---|
41 | |
---|
42 | #include "ap.h" |
---|
43 | #include "amp.h" |
---|
44 | #include "rotations.h" |
---|
45 | namespace bdsvd |
---|
46 | { |
---|
47 | template<unsigned int Precision> |
---|
48 | bool rmatrixbdsvd(ap::template_1d_array< amp::ampf<Precision> >& d, |
---|
49 | ap::template_1d_array< amp::ampf<Precision> > e, |
---|
50 | int n, |
---|
51 | bool isupper, |
---|
52 | bool isfractionalaccuracyrequired, |
---|
53 | ap::template_2d_array< amp::ampf<Precision> >& u, |
---|
54 | int nru, |
---|
55 | ap::template_2d_array< amp::ampf<Precision> >& c, |
---|
56 | int ncc, |
---|
57 | ap::template_2d_array< amp::ampf<Precision> >& vt, |
---|
58 | int ncvt); |
---|
59 | template<unsigned int Precision> |
---|
60 | bool bidiagonalsvddecomposition(ap::template_1d_array< amp::ampf<Precision> >& d, |
---|
61 | ap::template_1d_array< amp::ampf<Precision> > e, |
---|
62 | int n, |
---|
63 | bool isupper, |
---|
64 | bool isfractionalaccuracyrequired, |
---|
65 | ap::template_2d_array< amp::ampf<Precision> >& u, |
---|
66 | int nru, |
---|
67 | ap::template_2d_array< amp::ampf<Precision> >& c, |
---|
68 | int ncc, |
---|
69 | ap::template_2d_array< amp::ampf<Precision> >& vt, |
---|
70 | int ncvt); |
---|
71 | template<unsigned int Precision> |
---|
72 | bool bidiagonalsvddecompositioninternal(ap::template_1d_array< amp::ampf<Precision> >& d, |
---|
73 | ap::template_1d_array< amp::ampf<Precision> > e, |
---|
74 | int n, |
---|
75 | bool isupper, |
---|
76 | bool isfractionalaccuracyrequired, |
---|
77 | ap::template_2d_array< amp::ampf<Precision> >& u, |
---|
78 | int ustart, |
---|
79 | int nru, |
---|
80 | ap::template_2d_array< amp::ampf<Precision> >& c, |
---|
81 | int cstart, |
---|
82 | int ncc, |
---|
83 | ap::template_2d_array< amp::ampf<Precision> >& vt, |
---|
84 | int vstart, |
---|
85 | int ncvt); |
---|
86 | template<unsigned int Precision> |
---|
87 | amp::ampf<Precision> extsignbdsqr(amp::ampf<Precision> a, |
---|
88 | amp::ampf<Precision> b); |
---|
89 | template<unsigned int Precision> |
---|
90 | void svd2x2(amp::ampf<Precision> f, |
---|
91 | amp::ampf<Precision> g, |
---|
92 | amp::ampf<Precision> h, |
---|
93 | amp::ampf<Precision>& ssmin, |
---|
94 | amp::ampf<Precision>& ssmax); |
---|
95 | template<unsigned int Precision> |
---|
96 | void svdv2x2(amp::ampf<Precision> f, |
---|
97 | amp::ampf<Precision> g, |
---|
98 | amp::ampf<Precision> h, |
---|
99 | amp::ampf<Precision>& ssmin, |
---|
100 | amp::ampf<Precision>& ssmax, |
---|
101 | amp::ampf<Precision>& snr, |
---|
102 | amp::ampf<Precision>& csr, |
---|
103 | amp::ampf<Precision>& snl, |
---|
104 | amp::ampf<Precision>& csl); |
---|
105 | |
---|
106 | |
---|
107 | /************************************************************************* |
---|
108 | Singular value decomposition of a bidiagonal matrix (extended algorithm) |
---|
109 | |
---|
110 | The algorithm performs the singular value decomposition of a bidiagonal |
---|
111 | matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and P - |
---|
112 | orthogonal matrices, S - diagonal matrix with non-negative elements on the |
---|
113 | main diagonal, in descending order. |
---|
114 | |
---|
115 | The algorithm finds singular values. In addition, the algorithm can |
---|
116 | calculate matrices Q and P (more precisely, not the matrices, but their |
---|
117 | product with given matrices U and VT - U*Q and (P^T)*VT)). Of course, |
---|
118 | matrices U and VT can be of any type, including identity. Furthermore, the |
---|
119 | algorithm can calculate Q'*C (this product is calculated more effectively |
---|
120 | than U*Q, because this calculation operates with rows instead of matrix |
---|
121 | columns). |
---|
122 | |
---|
123 | The feature of the algorithm is its ability to find all singular values |
---|
124 | including those which are arbitrarily close to 0 with relative accuracy |
---|
125 | close to machine precision. If the parameter IsFractionalAccuracyRequired |
---|
126 | is set to True, all singular values will have high relative accuracy close |
---|
127 | to machine precision. If the parameter is set to False, only the biggest |
---|
128 | singular value will have relative accuracy close to machine precision. |
---|
129 | The absolute error of other singular values is equal to the absolute error |
---|
130 | of the biggest singular value. |
---|
131 | |
---|
132 | Input parameters: |
---|
133 | D - main diagonal of matrix B. |
---|
134 | Array whose index ranges within [0..N-1]. |
---|
135 | E - superdiagonal (or subdiagonal) of matrix B. |
---|
136 | Array whose index ranges within [0..N-2]. |
---|
137 | N - size of matrix B. |
---|
138 | IsUpper - True, if the matrix is upper bidiagonal. |
---|
139 | IsFractionalAccuracyRequired - |
---|
140 | accuracy to search singular values with. |
---|
141 | U - matrix to be multiplied by Q. |
---|
142 | Array whose indexes range within [0..NRU-1, 0..N-1]. |
---|
143 | The matrix can be bigger, in that case only the submatrix |
---|
144 | [0..NRU-1, 0..N-1] will be multiplied by Q. |
---|
145 | NRU - number of rows in matrix U. |
---|
146 | C - matrix to be multiplied by Q'. |
---|
147 | Array whose indexes range within [0..N-1, 0..NCC-1]. |
---|
148 | The matrix can be bigger, in that case only the submatrix |
---|
149 | [0..N-1, 0..NCC-1] will be multiplied by Q'. |
---|
150 | NCC - number of columns in matrix C. |
---|
151 | VT - matrix to be multiplied by P^T. |
---|
152 | Array whose indexes range within [0..N-1, 0..NCVT-1]. |
---|
153 | The matrix can be bigger, in that case only the submatrix |
---|
154 | [0..N-1, 0..NCVT-1] will be multiplied by P^T. |
---|
155 | NCVT - number of columns in matrix VT. |
---|
156 | |
---|
157 | Output parameters: |
---|
158 | D - singular values of matrix B in descending order. |
---|
159 | U - if NRU>0, contains matrix U*Q. |
---|
160 | VT - if NCVT>0, contains matrix (P^T)*VT. |
---|
161 | C - if NCC>0, contains matrix Q'*C. |
---|
162 | |
---|
163 | Result: |
---|
164 | True, if the algorithm has converged. |
---|
165 | False, if the algorithm hasn't converged (rare case). |
---|
166 | |
---|
167 | Additional information: |
---|
168 | The type of convergence is controlled by the internal parameter TOL. |
---|
169 | If the parameter is greater than 0, the singular values will have |
---|
170 | relative accuracy TOL. If TOL<0, the singular values will have |
---|
171 | absolute accuracy ABS(TOL)*norm(B). |
---|
172 | By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon, |
---|
173 | where Epsilon is the machine precision. It is not recommended to use |
---|
174 | TOL less than 10*Epsilon since this will considerably slow down the |
---|
175 | algorithm and may not lead to error decreasing. |
---|
176 | History: |
---|
177 | * 31 March, 2007. |
---|
178 | changed MAXITR from 6 to 12. |
---|
179 | |
---|
180 | -- LAPACK routine (version 3.0) -- |
---|
181 | Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., |
---|
182 | Courant Institute, Argonne National Lab, and Rice University |
---|
183 | October 31, 1999. |
---|
184 | *************************************************************************/ |
---|
185 | template<unsigned int Precision> |
---|
186 | bool rmatrixbdsvd(ap::template_1d_array< amp::ampf<Precision> >& d, |
---|
187 | ap::template_1d_array< amp::ampf<Precision> > e, |
---|
188 | int n, |
---|
189 | bool isupper, |
---|
190 | bool isfractionalaccuracyrequired, |
---|
191 | ap::template_2d_array< amp::ampf<Precision> >& u, |
---|
192 | int nru, |
---|
193 | ap::template_2d_array< amp::ampf<Precision> >& c, |
---|
194 | int ncc, |
---|
195 | ap::template_2d_array< amp::ampf<Precision> >& vt, |
---|
196 | int ncvt) |
---|
197 | { |
---|
198 | bool result; |
---|
199 | ap::template_1d_array< amp::ampf<Precision> > d1; |
---|
200 | ap::template_1d_array< amp::ampf<Precision> > e1; |
---|
201 | |
---|
202 | |
---|
203 | d1.setbounds(1, n); |
---|
204 | ap::vmove(d1.getvector(1, n), d.getvector(0, n-1)); |
---|
205 | if( n>1 ) |
---|
206 | { |
---|
207 | e1.setbounds(1, n-1); |
---|
208 | ap::vmove(e1.getvector(1, n-1), e.getvector(0, n-2)); |
---|
209 | } |
---|
210 | result = bidiagonalsvddecompositioninternal<Precision>(d1, e1, n, isupper, isfractionalaccuracyrequired, u, 0, nru, c, 0, ncc, vt, 0, ncvt); |
---|
211 | ap::vmove(d.getvector(0, n-1), d1.getvector(1, n)); |
---|
212 | return result; |
---|
213 | } |
---|
214 | |
---|
215 | |
---|
216 | /************************************************************************* |
---|
217 | Obsolete 1-based subroutine. See RMatrixBDSVD for 0-based replacement. |
---|
218 | |
---|
219 | History: |
---|
220 | * 31 March, 2007. |
---|
221 | changed MAXITR from 6 to 12. |
---|
222 | |
---|
223 | -- LAPACK routine (version 3.0) -- |
---|
224 | Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., |
---|
225 | Courant Institute, Argonne National Lab, and Rice University |
---|
226 | October 31, 1999. |
---|
227 | *************************************************************************/ |
---|
228 | template<unsigned int Precision> |
---|
229 | bool bidiagonalsvddecomposition(ap::template_1d_array< amp::ampf<Precision> >& d, |
---|
230 | ap::template_1d_array< amp::ampf<Precision> > e, |
---|
231 | int n, |
---|
232 | bool isupper, |
---|
233 | bool isfractionalaccuracyrequired, |
---|
234 | ap::template_2d_array< amp::ampf<Precision> >& u, |
---|
235 | int nru, |
---|
236 | ap::template_2d_array< amp::ampf<Precision> >& c, |
---|
237 | int ncc, |
---|
238 | ap::template_2d_array< amp::ampf<Precision> >& vt, |
---|
239 | int ncvt) |
---|
240 | { |
---|
241 | bool result; |
---|
242 | |
---|
243 | |
---|
244 | result = bidiagonalsvddecompositioninternal<Precision>(d, e, n, isupper, isfractionalaccuracyrequired, u, 1, nru, c, 1, ncc, vt, 1, ncvt); |
---|
245 | return result; |
---|
246 | } |
---|
247 | |
---|
248 | |
---|
249 | /************************************************************************* |
---|
250 | Internal working subroutine for bidiagonal decomposition |
---|
251 | *************************************************************************/ |
---|
252 | template<unsigned int Precision> |
---|
253 | bool bidiagonalsvddecompositioninternal(ap::template_1d_array< amp::ampf<Precision> >& d, |
---|
254 | ap::template_1d_array< amp::ampf<Precision> > e, |
---|
255 | int n, |
---|
256 | bool isupper, |
---|
257 | bool isfractionalaccuracyrequired, |
---|
258 | ap::template_2d_array< amp::ampf<Precision> >& u, |
---|
259 | int ustart, |
---|
260 | int nru, |
---|
261 | ap::template_2d_array< amp::ampf<Precision> >& c, |
---|
262 | int cstart, |
---|
263 | int ncc, |
---|
264 | ap::template_2d_array< amp::ampf<Precision> >& vt, |
---|
265 | int vstart, |
---|
266 | int ncvt) |
---|
267 | { |
---|
268 | bool result; |
---|
269 | int i; |
---|
270 | int idir; |
---|
271 | int isub; |
---|
272 | int iter; |
---|
273 | int j; |
---|
274 | int ll; |
---|
275 | int lll; |
---|
276 | int m; |
---|
277 | int maxit; |
---|
278 | int oldll; |
---|
279 | int oldm; |
---|
280 | amp::ampf<Precision> abse; |
---|
281 | amp::ampf<Precision> abss; |
---|
282 | amp::ampf<Precision> cosl; |
---|
283 | amp::ampf<Precision> cosr; |
---|
284 | amp::ampf<Precision> cs; |
---|
285 | amp::ampf<Precision> eps; |
---|
286 | amp::ampf<Precision> f; |
---|
287 | amp::ampf<Precision> g; |
---|
288 | amp::ampf<Precision> h; |
---|
289 | amp::ampf<Precision> mu; |
---|
290 | amp::ampf<Precision> oldcs; |
---|
291 | amp::ampf<Precision> oldsn; |
---|
292 | amp::ampf<Precision> r; |
---|
293 | amp::ampf<Precision> shift; |
---|
294 | amp::ampf<Precision> sigmn; |
---|
295 | amp::ampf<Precision> sigmx; |
---|
296 | amp::ampf<Precision> sinl; |
---|
297 | amp::ampf<Precision> sinr; |
---|
298 | amp::ampf<Precision> sll; |
---|
299 | amp::ampf<Precision> smax; |
---|
300 | amp::ampf<Precision> smin; |
---|
301 | amp::ampf<Precision> sminl; |
---|
302 | amp::ampf<Precision> sminlo; |
---|
303 | amp::ampf<Precision> sminoa; |
---|
304 | amp::ampf<Precision> sn; |
---|
305 | amp::ampf<Precision> thresh; |
---|
306 | amp::ampf<Precision> tol; |
---|
307 | amp::ampf<Precision> tolmul; |
---|
308 | amp::ampf<Precision> unfl; |
---|
309 | ap::template_1d_array< amp::ampf<Precision> > work0; |
---|
310 | ap::template_1d_array< amp::ampf<Precision> > work1; |
---|
311 | ap::template_1d_array< amp::ampf<Precision> > work2; |
---|
312 | ap::template_1d_array< amp::ampf<Precision> > work3; |
---|
313 | int maxitr; |
---|
314 | bool matrixsplitflag; |
---|
315 | bool iterflag; |
---|
316 | ap::template_1d_array< amp::ampf<Precision> > utemp; |
---|
317 | ap::template_1d_array< amp::ampf<Precision> > vttemp; |
---|
318 | ap::template_1d_array< amp::ampf<Precision> > ctemp; |
---|
319 | ap::template_1d_array< amp::ampf<Precision> > etemp; |
---|
320 | bool rightside; |
---|
321 | bool fwddir; |
---|
322 | amp::ampf<Precision> tmp; |
---|
323 | int mm1; |
---|
324 | int mm0; |
---|
325 | bool bchangedir; |
---|
326 | int uend; |
---|
327 | int cend; |
---|
328 | int vend; |
---|
329 | |
---|
330 | |
---|
331 | result = true; |
---|
332 | if( n==0 ) |
---|
333 | { |
---|
334 | return result; |
---|
335 | } |
---|
336 | if( n==1 ) |
---|
337 | { |
---|
338 | if( d(1)<0 ) |
---|
339 | { |
---|
340 | d(1) = -d(1); |
---|
341 | if( ncvt>0 ) |
---|
342 | { |
---|
343 | ap::vmul(vt.getrow(vstart, vstart, vstart+ncvt-1), -1); |
---|
344 | } |
---|
345 | } |
---|
346 | return result; |
---|
347 | } |
---|
348 | |
---|
349 | // |
---|
350 | // init |
---|
351 | // |
---|
352 | work0.setbounds(1, n-1); |
---|
353 | work1.setbounds(1, n-1); |
---|
354 | work2.setbounds(1, n-1); |
---|
355 | work3.setbounds(1, n-1); |
---|
356 | uend = ustart+ap::maxint(nru-1, 0); |
---|
357 | vend = vstart+ap::maxint(ncvt-1, 0); |
---|
358 | cend = cstart+ap::maxint(ncc-1, 0); |
---|
359 | utemp.setbounds(ustart, uend); |
---|
360 | vttemp.setbounds(vstart, vend); |
---|
361 | ctemp.setbounds(cstart, cend); |
---|
362 | maxitr = 12; |
---|
363 | rightside = true; |
---|
364 | fwddir = true; |
---|
365 | |
---|
366 | // |
---|
367 | // resize E from N-1 to N |
---|
368 | // |
---|
369 | etemp.setbounds(1, n); |
---|
370 | for(i=1; i<=n-1; i++) |
---|
371 | { |
---|
372 | etemp(i) = e(i); |
---|
373 | } |
---|
374 | e.setbounds(1, n); |
---|
375 | for(i=1; i<=n-1; i++) |
---|
376 | { |
---|
377 | e(i) = etemp(i); |
---|
378 | } |
---|
379 | e(n) = 0; |
---|
380 | idir = 0; |
---|
381 | |
---|
382 | // |
---|
383 | // Get machine constants |
---|
384 | // |
---|
385 | eps = amp::ampf<Precision>::getAlgoPascalEpsilon(); |
---|
386 | unfl = amp::ampf<Precision>::getAlgoPascalMinNumber(); |
---|
387 | |
---|
388 | // |
---|
389 | // If matrix lower bidiagonal, rotate to be upper bidiagonal |
---|
390 | // by applying Givens rotations on the left |
---|
391 | // |
---|
392 | if( !isupper ) |
---|
393 | { |
---|
394 | for(i=1; i<=n-1; i++) |
---|
395 | { |
---|
396 | rotations::generaterotation<Precision>(d(i), e(i), cs, sn, r); |
---|
397 | d(i) = r; |
---|
398 | e(i) = sn*d(i+1); |
---|
399 | d(i+1) = cs*d(i+1); |
---|
400 | work0(i) = cs; |
---|
401 | work1(i) = sn; |
---|
402 | } |
---|
403 | |
---|
404 | // |
---|
405 | // Update singular vectors if desired |
---|
406 | // |
---|
407 | if( nru>0 ) |
---|
408 | { |
---|
409 | rotations::applyrotationsfromtheright<Precision>(fwddir, ustart, uend, 1+ustart-1, n+ustart-1, work0, work1, u, utemp); |
---|
410 | } |
---|
411 | if( ncc>0 ) |
---|
412 | { |
---|
413 | rotations::applyrotationsfromtheleft<Precision>(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, work0, work1, c, ctemp); |
---|
414 | } |
---|
415 | } |
---|
416 | |
---|
417 | // |
---|
418 | // Compute singular values to relative accuracy TOL |
---|
419 | // (By setting TOL to be negative, algorithm will compute |
---|
420 | // singular values to absolute accuracy ABS(TOL)*norm(input matrix)) |
---|
421 | // |
---|
422 | tolmul = amp::maximum<Precision>(10, amp::minimum<Precision>(100, amp::pow<Precision>(eps, -amp::ampf<Precision>("0.125")))); |
---|
423 | tol = tolmul*eps; |
---|
424 | if( !isfractionalaccuracyrequired ) |
---|
425 | { |
---|
426 | tol = -tol; |
---|
427 | } |
---|
428 | |
---|
429 | // |
---|
430 | // Compute approximate maximum, minimum singular values |
---|
431 | // |
---|
432 | smax = 0; |
---|
433 | for(i=1; i<=n; i++) |
---|
434 | { |
---|
435 | smax = amp::maximum<Precision>(smax, amp::abs<Precision>(d(i))); |
---|
436 | } |
---|
437 | for(i=1; i<=n-1; i++) |
---|
438 | { |
---|
439 | smax = amp::maximum<Precision>(smax, amp::abs<Precision>(e(i))); |
---|
440 | } |
---|
441 | sminl = 0; |
---|
442 | if( tol>=0 ) |
---|
443 | { |
---|
444 | |
---|
445 | // |
---|
446 | // Relative accuracy desired |
---|
447 | // |
---|
448 | sminoa = amp::abs<Precision>(d(1)); |
---|
449 | if( sminoa!=0 ) |
---|
450 | { |
---|
451 | mu = sminoa; |
---|
452 | for(i=2; i<=n; i++) |
---|
453 | { |
---|
454 | mu = amp::abs<Precision>(d(i))*(mu/(mu+amp::abs<Precision>(e(i-1)))); |
---|
455 | sminoa = amp::minimum<Precision>(sminoa, mu); |
---|
456 | if( sminoa==0 ) |
---|
457 | { |
---|
458 | break; |
---|
459 | } |
---|
460 | } |
---|
461 | } |
---|
462 | sminoa = sminoa/amp::sqrt<Precision>(n); |
---|
463 | thresh = amp::maximum<Precision>(tol*sminoa, maxitr*n*n*unfl); |
---|
464 | } |
---|
465 | else |
---|
466 | { |
---|
467 | |
---|
468 | // |
---|
469 | // Absolute accuracy desired |
---|
470 | // |
---|
471 | thresh = amp::maximum<Precision>(amp::abs<Precision>(tol)*smax, maxitr*n*n*unfl); |
---|
472 | } |
---|
473 | |
---|
474 | // |
---|
475 | // Prepare for main iteration loop for the singular values |
---|
476 | // (MAXIT is the maximum number of passes through the inner |
---|
477 | // loop permitted before nonconvergence signalled.) |
---|
478 | // |
---|
479 | maxit = maxitr*n*n; |
---|
480 | iter = 0; |
---|
481 | oldll = -1; |
---|
482 | oldm = -1; |
---|
483 | |
---|
484 | // |
---|
485 | // M points to last element of unconverged part of matrix |
---|
486 | // |
---|
487 | m = n; |
---|
488 | |
---|
489 | // |
---|
490 | // Begin main iteration loop |
---|
491 | // |
---|
492 | while( true ) |
---|
493 | { |
---|
494 | |
---|
495 | // |
---|
496 | // Check for convergence or exceeding iteration count |
---|
497 | // |
---|
498 | if( m<=1 ) |
---|
499 | { |
---|
500 | break; |
---|
501 | } |
---|
502 | if( iter>maxit ) |
---|
503 | { |
---|
504 | result = false; |
---|
505 | return result; |
---|
506 | } |
---|
507 | |
---|
508 | // |
---|
509 | // Find diagonal block of matrix to work on |
---|
510 | // |
---|
511 | if( tol<0 && amp::abs<Precision>(d(m))<=thresh ) |
---|
512 | { |
---|
513 | d(m) = 0; |
---|
514 | } |
---|
515 | smax = amp::abs<Precision>(d(m)); |
---|
516 | smin = smax; |
---|
517 | matrixsplitflag = false; |
---|
518 | for(lll=1; lll<=m-1; lll++) |
---|
519 | { |
---|
520 | ll = m-lll; |
---|
521 | abss = amp::abs<Precision>(d(ll)); |
---|
522 | abse = amp::abs<Precision>(e(ll)); |
---|
523 | if( tol<0 && abss<=thresh ) |
---|
524 | { |
---|
525 | d(ll) = 0; |
---|
526 | } |
---|
527 | if( abse<=thresh ) |
---|
528 | { |
---|
529 | matrixsplitflag = true; |
---|
530 | break; |
---|
531 | } |
---|
532 | smin = amp::minimum<Precision>(smin, abss); |
---|
533 | smax = amp::maximum<Precision>(smax, amp::maximum<Precision>(abss, abse)); |
---|
534 | } |
---|
535 | if( !matrixsplitflag ) |
---|
536 | { |
---|
537 | ll = 0; |
---|
538 | } |
---|
539 | else |
---|
540 | { |
---|
541 | |
---|
542 | // |
---|
543 | // Matrix splits since E(LL) = 0 |
---|
544 | // |
---|
545 | e(ll) = 0; |
---|
546 | if( ll==m-1 ) |
---|
547 | { |
---|
548 | |
---|
549 | // |
---|
550 | // Convergence of bottom singular value, return to top of loop |
---|
551 | // |
---|
552 | m = m-1; |
---|
553 | continue; |
---|
554 | } |
---|
555 | } |
---|
556 | ll = ll+1; |
---|
557 | |
---|
558 | // |
---|
559 | // E(LL) through E(M-1) are nonzero, E(LL-1) is zero |
---|
560 | // |
---|
561 | if( ll==m-1 ) |
---|
562 | { |
---|
563 | |
---|
564 | // |
---|
565 | // 2 by 2 block, handle separately |
---|
566 | // |
---|
567 | svdv2x2<Precision>(d(m-1), e(m-1), d(m), sigmn, sigmx, sinr, cosr, sinl, cosl); |
---|
568 | d(m-1) = sigmx; |
---|
569 | e(m-1) = 0; |
---|
570 | d(m) = sigmn; |
---|
571 | |
---|
572 | // |
---|
573 | // Compute singular vectors, if desired |
---|
574 | // |
---|
575 | if( ncvt>0 ) |
---|
576 | { |
---|
577 | mm0 = m+(vstart-1); |
---|
578 | mm1 = m-1+(vstart-1); |
---|
579 | ap::vmove(vttemp.getvector(vstart, vend), vt.getrow(mm1, vstart, vend), cosr); |
---|
580 | ap::vadd(vttemp.getvector(vstart, vend), vt.getrow(mm0, vstart, vend), sinr); |
---|
581 | ap::vmul(vt.getrow(mm0, vstart, vend), cosr); |
---|
582 | ap::vsub(vt.getrow(mm0, vstart, vend), vt.getrow(mm1, vstart, vend), sinr); |
---|
583 | ap::vmove(vt.getrow(mm1, vstart, vend), vttemp.getvector(vstart, vend)); |
---|
584 | } |
---|
585 | if( nru>0 ) |
---|
586 | { |
---|
587 | mm0 = m+ustart-1; |
---|
588 | mm1 = m-1+ustart-1; |
---|
589 | ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(mm1, ustart, uend), cosl); |
---|
590 | ap::vadd(utemp.getvector(ustart, uend), u.getcolumn(mm0, ustart, uend), sinl); |
---|
591 | ap::vmul(u.getcolumn(mm0, ustart, uend), cosl); |
---|
592 | ap::vsub(u.getcolumn(mm0, ustart, uend), u.getcolumn(mm1, ustart, uend), sinl); |
---|
593 | ap::vmove(u.getcolumn(mm1, ustart, uend), utemp.getvector(ustart, uend)); |
---|
594 | } |
---|
595 | if( ncc>0 ) |
---|
596 | { |
---|
597 | mm0 = m+cstart-1; |
---|
598 | mm1 = m-1+cstart-1; |
---|
599 | ap::vmove(ctemp.getvector(cstart, cend), c.getrow(mm1, cstart, cend), cosl); |
---|
600 | ap::vadd(ctemp.getvector(cstart, cend), c.getrow(mm0, cstart, cend), sinl); |
---|
601 | ap::vmul(c.getrow(mm0, cstart, cend), cosl); |
---|
602 | ap::vsub(c.getrow(mm0, cstart, cend), c.getrow(mm1, cstart, cend), sinl); |
---|
603 | ap::vmove(c.getrow(mm1, cstart, cend), ctemp.getvector(cstart, cend)); |
---|
604 | } |
---|
605 | m = m-2; |
---|
606 | continue; |
---|
607 | } |
---|
608 | |
---|
609 | // |
---|
610 | // If working on new submatrix, choose shift direction |
---|
611 | // (from larger end diagonal element towards smaller) |
---|
612 | // |
---|
613 | // Previously was |
---|
614 | // "if (LL>OLDM) or (M<OLDLL) then" |
---|
615 | // fixed thanks to Michael Rolle < m@rolle.name > |
---|
616 | // Very strange that LAPACK still contains it. |
---|
617 | // |
---|
618 | bchangedir = false; |
---|
619 | if( idir==1 && amp::abs<Precision>(d(ll))<amp::ampf<Precision>("1.0E-3")*amp::abs<Precision>(d(m)) ) |
---|
620 | { |
---|
621 | bchangedir = true; |
---|
622 | } |
---|
623 | if( idir==2 && amp::abs<Precision>(d(m))<amp::ampf<Precision>("1.0E-3")*amp::abs<Precision>(d(ll)) ) |
---|
624 | { |
---|
625 | bchangedir = true; |
---|
626 | } |
---|
627 | if( ll!=oldll || m!=oldm || bchangedir ) |
---|
628 | { |
---|
629 | if( amp::abs<Precision>(d(ll))>=amp::abs<Precision>(d(m)) ) |
---|
630 | { |
---|
631 | |
---|
632 | // |
---|
633 | // Chase bulge from top (big end) to bottom (small end) |
---|
634 | // |
---|
635 | idir = 1; |
---|
636 | } |
---|
637 | else |
---|
638 | { |
---|
639 | |
---|
640 | // |
---|
641 | // Chase bulge from bottom (big end) to top (small end) |
---|
642 | // |
---|
643 | idir = 2; |
---|
644 | } |
---|
645 | } |
---|
646 | |
---|
647 | // |
---|
648 | // Apply convergence tests |
---|
649 | // |
---|
650 | if( idir==1 ) |
---|
651 | { |
---|
652 | |
---|
653 | // |
---|
654 | // Run convergence test in forward direction |
---|
655 | // First apply standard test to bottom of matrix |
---|
656 | // |
---|
657 | if( amp::abs<Precision>(e(m-1))<=amp::abs<Precision>(tol)*amp::abs<Precision>(d(m)) || tol<0 && amp::abs<Precision>(e(m-1))<=thresh ) |
---|
658 | { |
---|
659 | e(m-1) = 0; |
---|
660 | continue; |
---|
661 | } |
---|
662 | if( tol>=0 ) |
---|
663 | { |
---|
664 | |
---|
665 | // |
---|
666 | // If relative accuracy desired, |
---|
667 | // apply convergence criterion forward |
---|
668 | // |
---|
669 | mu = amp::abs<Precision>(d(ll)); |
---|
670 | sminl = mu; |
---|
671 | iterflag = false; |
---|
672 | for(lll=ll; lll<=m-1; lll++) |
---|
673 | { |
---|
674 | if( amp::abs<Precision>(e(lll))<=tol*mu ) |
---|
675 | { |
---|
676 | e(lll) = 0; |
---|
677 | iterflag = true; |
---|
678 | break; |
---|
679 | } |
---|
680 | sminlo = sminl; |
---|
681 | mu = amp::abs<Precision>(d(lll+1))*(mu/(mu+amp::abs<Precision>(e(lll)))); |
---|
682 | sminl = amp::minimum<Precision>(sminl, mu); |
---|
683 | } |
---|
684 | if( iterflag ) |
---|
685 | { |
---|
686 | continue; |
---|
687 | } |
---|
688 | } |
---|
689 | } |
---|
690 | else |
---|
691 | { |
---|
692 | |
---|
693 | // |
---|
694 | // Run convergence test in backward direction |
---|
695 | // First apply standard test to top of matrix |
---|
696 | // |
---|
697 | if( amp::abs<Precision>(e(ll))<=amp::abs<Precision>(tol)*amp::abs<Precision>(d(ll)) || tol<0 && amp::abs<Precision>(e(ll))<=thresh ) |
---|
698 | { |
---|
699 | e(ll) = 0; |
---|
700 | continue; |
---|
701 | } |
---|
702 | if( tol>=0 ) |
---|
703 | { |
---|
704 | |
---|
705 | // |
---|
706 | // If relative accuracy desired, |
---|
707 | // apply convergence criterion backward |
---|
708 | // |
---|
709 | mu = amp::abs<Precision>(d(m)); |
---|
710 | sminl = mu; |
---|
711 | iterflag = false; |
---|
712 | for(lll=m-1; lll>=ll; lll--) |
---|
713 | { |
---|
714 | if( amp::abs<Precision>(e(lll))<=tol*mu ) |
---|
715 | { |
---|
716 | e(lll) = 0; |
---|
717 | iterflag = true; |
---|
718 | break; |
---|
719 | } |
---|
720 | sminlo = sminl; |
---|
721 | mu = amp::abs<Precision>(d(lll))*(mu/(mu+amp::abs<Precision>(e(lll)))); |
---|
722 | sminl = amp::minimum<Precision>(sminl, mu); |
---|
723 | } |
---|
724 | if( iterflag ) |
---|
725 | { |
---|
726 | continue; |
---|
727 | } |
---|
728 | } |
---|
729 | } |
---|
730 | oldll = ll; |
---|
731 | oldm = m; |
---|
732 | |
---|
733 | // |
---|
734 | // Compute shift. First, test if shifting would ruin relative |
---|
735 | // accuracy, and if so set the shift to zero. |
---|
736 | // |
---|
737 | if( tol>=0 && n*tol*(sminl/smax)<=amp::maximum<Precision>(eps, amp::ampf<Precision>("0.01")*tol) ) |
---|
738 | { |
---|
739 | |
---|
740 | // |
---|
741 | // Use a zero shift to avoid loss of relative accuracy |
---|
742 | // |
---|
743 | shift = 0; |
---|
744 | } |
---|
745 | else |
---|
746 | { |
---|
747 | |
---|
748 | // |
---|
749 | // Compute the shift from 2-by-2 block at end of matrix |
---|
750 | // |
---|
751 | if( idir==1 ) |
---|
752 | { |
---|
753 | sll = amp::abs<Precision>(d(ll)); |
---|
754 | svd2x2<Precision>(d(m-1), e(m-1), d(m), shift, r); |
---|
755 | } |
---|
756 | else |
---|
757 | { |
---|
758 | sll = amp::abs<Precision>(d(m)); |
---|
759 | svd2x2<Precision>(d(ll), e(ll), d(ll+1), shift, r); |
---|
760 | } |
---|
761 | |
---|
762 | // |
---|
763 | // Test if shift negligible, and if so set to zero |
---|
764 | // |
---|
765 | if( sll>0 ) |
---|
766 | { |
---|
767 | if( amp::sqr<Precision>(shift/sll)<eps ) |
---|
768 | { |
---|
769 | shift = 0; |
---|
770 | } |
---|
771 | } |
---|
772 | } |
---|
773 | |
---|
774 | // |
---|
775 | // Increment iteration count |
---|
776 | // |
---|
777 | iter = iter+m-ll; |
---|
778 | |
---|
779 | // |
---|
780 | // If SHIFT = 0, do simplified QR iteration |
---|
781 | // |
---|
782 | if( shift==0 ) |
---|
783 | { |
---|
784 | if( idir==1 ) |
---|
785 | { |
---|
786 | |
---|
787 | // |
---|
788 | // Chase bulge from top to bottom |
---|
789 | // Save cosines and sines for later singular vector updates |
---|
790 | // |
---|
791 | cs = 1; |
---|
792 | oldcs = 1; |
---|
793 | for(i=ll; i<=m-1; i++) |
---|
794 | { |
---|
795 | rotations::generaterotation<Precision>(d(i)*cs, e(i), cs, sn, r); |
---|
796 | if( i>ll ) |
---|
797 | { |
---|
798 | e(i-1) = oldsn*r; |
---|
799 | } |
---|
800 | rotations::generaterotation<Precision>(oldcs*r, d(i+1)*sn, oldcs, oldsn, tmp); |
---|
801 | d(i) = tmp; |
---|
802 | work0(i-ll+1) = cs; |
---|
803 | work1(i-ll+1) = sn; |
---|
804 | work2(i-ll+1) = oldcs; |
---|
805 | work3(i-ll+1) = oldsn; |
---|
806 | } |
---|
807 | h = d(m)*cs; |
---|
808 | d(m) = h*oldcs; |
---|
809 | e(m-1) = h*oldsn; |
---|
810 | |
---|
811 | // |
---|
812 | // Update singular vectors |
---|
813 | // |
---|
814 | if( ncvt>0 ) |
---|
815 | { |
---|
816 | rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp); |
---|
817 | } |
---|
818 | if( nru>0 ) |
---|
819 | { |
---|
820 | rotations::applyrotationsfromtheright<Precision>(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp); |
---|
821 | } |
---|
822 | if( ncc>0 ) |
---|
823 | { |
---|
824 | rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp); |
---|
825 | } |
---|
826 | |
---|
827 | // |
---|
828 | // Test convergence |
---|
829 | // |
---|
830 | if( amp::abs<Precision>(e(m-1))<=thresh ) |
---|
831 | { |
---|
832 | e(m-1) = 0; |
---|
833 | } |
---|
834 | } |
---|
835 | else |
---|
836 | { |
---|
837 | |
---|
838 | // |
---|
839 | // Chase bulge from bottom to top |
---|
840 | // Save cosines and sines for later singular vector updates |
---|
841 | // |
---|
842 | cs = 1; |
---|
843 | oldcs = 1; |
---|
844 | for(i=m; i>=ll+1; i--) |
---|
845 | { |
---|
846 | rotations::generaterotation<Precision>(d(i)*cs, e(i-1), cs, sn, r); |
---|
847 | if( i<m ) |
---|
848 | { |
---|
849 | e(i) = oldsn*r; |
---|
850 | } |
---|
851 | rotations::generaterotation<Precision>(oldcs*r, d(i-1)*sn, oldcs, oldsn, tmp); |
---|
852 | d(i) = tmp; |
---|
853 | work0(i-ll) = cs; |
---|
854 | work1(i-ll) = -sn; |
---|
855 | work2(i-ll) = oldcs; |
---|
856 | work3(i-ll) = -oldsn; |
---|
857 | } |
---|
858 | h = d(ll)*cs; |
---|
859 | d(ll) = h*oldcs; |
---|
860 | e(ll) = h*oldsn; |
---|
861 | |
---|
862 | // |
---|
863 | // Update singular vectors |
---|
864 | // |
---|
865 | if( ncvt>0 ) |
---|
866 | { |
---|
867 | rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp); |
---|
868 | } |
---|
869 | if( nru>0 ) |
---|
870 | { |
---|
871 | rotations::applyrotationsfromtheright<Precision>(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp); |
---|
872 | } |
---|
873 | if( ncc>0 ) |
---|
874 | { |
---|
875 | rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp); |
---|
876 | } |
---|
877 | |
---|
878 | // |
---|
879 | // Test convergence |
---|
880 | // |
---|
881 | if( amp::abs<Precision>(e(ll))<=thresh ) |
---|
882 | { |
---|
883 | e(ll) = 0; |
---|
884 | } |
---|
885 | } |
---|
886 | } |
---|
887 | else |
---|
888 | { |
---|
889 | |
---|
890 | // |
---|
891 | // Use nonzero shift |
---|
892 | // |
---|
893 | if( idir==1 ) |
---|
894 | { |
---|
895 | |
---|
896 | // |
---|
897 | // Chase bulge from top to bottom |
---|
898 | // Save cosines and sines for later singular vector updates |
---|
899 | // |
---|
900 | f = (amp::abs<Precision>(d(ll))-shift)*(extsignbdsqr<Precision>(1, d(ll))+shift/d(ll)); |
---|
901 | g = e(ll); |
---|
902 | for(i=ll; i<=m-1; i++) |
---|
903 | { |
---|
904 | rotations::generaterotation<Precision>(f, g, cosr, sinr, r); |
---|
905 | if( i>ll ) |
---|
906 | { |
---|
907 | e(i-1) = r; |
---|
908 | } |
---|
909 | f = cosr*d(i)+sinr*e(i); |
---|
910 | e(i) = cosr*e(i)-sinr*d(i); |
---|
911 | g = sinr*d(i+1); |
---|
912 | d(i+1) = cosr*d(i+1); |
---|
913 | rotations::generaterotation<Precision>(f, g, cosl, sinl, r); |
---|
914 | d(i) = r; |
---|
915 | f = cosl*e(i)+sinl*d(i+1); |
---|
916 | d(i+1) = cosl*d(i+1)-sinl*e(i); |
---|
917 | if( i<m-1 ) |
---|
918 | { |
---|
919 | g = sinl*e(i+1); |
---|
920 | e(i+1) = cosl*e(i+1); |
---|
921 | } |
---|
922 | work0(i-ll+1) = cosr; |
---|
923 | work1(i-ll+1) = sinr; |
---|
924 | work2(i-ll+1) = cosl; |
---|
925 | work3(i-ll+1) = sinl; |
---|
926 | } |
---|
927 | e(m-1) = f; |
---|
928 | |
---|
929 | // |
---|
930 | // Update singular vectors |
---|
931 | // |
---|
932 | if( ncvt>0 ) |
---|
933 | { |
---|
934 | rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp); |
---|
935 | } |
---|
936 | if( nru>0 ) |
---|
937 | { |
---|
938 | rotations::applyrotationsfromtheright<Precision>(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp); |
---|
939 | } |
---|
940 | if( ncc>0 ) |
---|
941 | { |
---|
942 | rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp); |
---|
943 | } |
---|
944 | |
---|
945 | // |
---|
946 | // Test convergence |
---|
947 | // |
---|
948 | if( amp::abs<Precision>(e(m-1))<=thresh ) |
---|
949 | { |
---|
950 | e(m-1) = 0; |
---|
951 | } |
---|
952 | } |
---|
953 | else |
---|
954 | { |
---|
955 | |
---|
956 | // |
---|
957 | // Chase bulge from bottom to top |
---|
958 | // Save cosines and sines for later singular vector updates |
---|
959 | // |
---|
960 | f = (amp::abs<Precision>(d(m))-shift)*(extsignbdsqr<Precision>(1, d(m))+shift/d(m)); |
---|
961 | g = e(m-1); |
---|
962 | for(i=m; i>=ll+1; i--) |
---|
963 | { |
---|
964 | rotations::generaterotation<Precision>(f, g, cosr, sinr, r); |
---|
965 | if( i<m ) |
---|
966 | { |
---|
967 | e(i) = r; |
---|
968 | } |
---|
969 | f = cosr*d(i)+sinr*e(i-1); |
---|
970 | e(i-1) = cosr*e(i-1)-sinr*d(i); |
---|
971 | g = sinr*d(i-1); |
---|
972 | d(i-1) = cosr*d(i-1); |
---|
973 | rotations::generaterotation<Precision>(f, g, cosl, sinl, r); |
---|
974 | d(i) = r; |
---|
975 | f = cosl*e(i-1)+sinl*d(i-1); |
---|
976 | d(i-1) = cosl*d(i-1)-sinl*e(i-1); |
---|
977 | if( i>ll+1 ) |
---|
978 | { |
---|
979 | g = sinl*e(i-2); |
---|
980 | e(i-2) = cosl*e(i-2); |
---|
981 | } |
---|
982 | work0(i-ll) = cosr; |
---|
983 | work1(i-ll) = -sinr; |
---|
984 | work2(i-ll) = cosl; |
---|
985 | work3(i-ll) = -sinl; |
---|
986 | } |
---|
987 | e(ll) = f; |
---|
988 | |
---|
989 | // |
---|
990 | // Test convergence |
---|
991 | // |
---|
992 | if( amp::abs<Precision>(e(ll))<=thresh ) |
---|
993 | { |
---|
994 | e(ll) = 0; |
---|
995 | } |
---|
996 | |
---|
997 | // |
---|
998 | // Update singular vectors if desired |
---|
999 | // |
---|
1000 | if( ncvt>0 ) |
---|
1001 | { |
---|
1002 | rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp); |
---|
1003 | } |
---|
1004 | if( nru>0 ) |
---|
1005 | { |
---|
1006 | rotations::applyrotationsfromtheright<Precision>(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp); |
---|
1007 | } |
---|
1008 | if( ncc>0 ) |
---|
1009 | { |
---|
1010 | rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp); |
---|
1011 | } |
---|
1012 | } |
---|
1013 | } |
---|
1014 | |
---|
1015 | // |
---|
1016 | // QR iteration finished, go back and check convergence |
---|
1017 | // |
---|
1018 | continue; |
---|
1019 | } |
---|
1020 | |
---|
1021 | // |
---|
1022 | // All singular values converged, so make them positive |
---|
1023 | // |
---|
1024 | for(i=1; i<=n; i++) |
---|
1025 | { |
---|
1026 | if( d(i)<0 ) |
---|
1027 | { |
---|
1028 | d(i) = -d(i); |
---|
1029 | |
---|
1030 | // |
---|
1031 | // Change sign of singular vectors, if desired |
---|
1032 | // |
---|
1033 | if( ncvt>0 ) |
---|
1034 | { |
---|
1035 | ap::vmul(vt.getrow(i+vstart-1, vstart, vend), -1); |
---|
1036 | } |
---|
1037 | } |
---|
1038 | } |
---|
1039 | |
---|
1040 | // |
---|
1041 | // Sort the singular values into decreasing order (insertion sort on |
---|
1042 | // singular values, but only one transposition per singular vector) |
---|
1043 | // |
---|
1044 | for(i=1; i<=n-1; i++) |
---|
1045 | { |
---|
1046 | |
---|
1047 | // |
---|
1048 | // Scan for smallest D(I) |
---|
1049 | // |
---|
1050 | isub = 1; |
---|
1051 | smin = d(1); |
---|
1052 | for(j=2; j<=n+1-i; j++) |
---|
1053 | { |
---|
1054 | if( d(j)<=smin ) |
---|
1055 | { |
---|
1056 | isub = j; |
---|
1057 | smin = d(j); |
---|
1058 | } |
---|
1059 | } |
---|
1060 | if( isub!=n+1-i ) |
---|
1061 | { |
---|
1062 | |
---|
1063 | // |
---|
1064 | // Swap singular values and vectors |
---|
1065 | // |
---|
1066 | d(isub) = d(n+1-i); |
---|
1067 | d(n+1-i) = smin; |
---|
1068 | if( ncvt>0 ) |
---|
1069 | { |
---|
1070 | j = n+1-i; |
---|
1071 | ap::vmove(vttemp.getvector(vstart, vend), vt.getrow(isub+vstart-1, vstart, vend)); |
---|
1072 | ap::vmove(vt.getrow(isub+vstart-1, vstart, vend), vt.getrow(j+vstart-1, vstart, vend)); |
---|
1073 | ap::vmove(vt.getrow(j+vstart-1, vstart, vend), vttemp.getvector(vstart, vend)); |
---|
1074 | } |
---|
1075 | if( nru>0 ) |
---|
1076 | { |
---|
1077 | j = n+1-i; |
---|
1078 | ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(isub+ustart-1, ustart, uend)); |
---|
1079 | ap::vmove(u.getcolumn(isub+ustart-1, ustart, uend), u.getcolumn(j+ustart-1, ustart, uend)); |
---|
1080 | ap::vmove(u.getcolumn(j+ustart-1, ustart, uend), utemp.getvector(ustart, uend)); |
---|
1081 | } |
---|
1082 | if( ncc>0 ) |
---|
1083 | { |
---|
1084 | j = n+1-i; |
---|
1085 | ap::vmove(ctemp.getvector(cstart, cend), c.getrow(isub+cstart-1, cstart, cend)); |
---|
1086 | ap::vmove(c.getrow(isub+cstart-1, cstart, cend), c.getrow(j+cstart-1, cstart, cend)); |
---|
1087 | ap::vmove(c.getrow(j+cstart-1, cstart, cend), ctemp.getvector(cstart, cend)); |
---|
1088 | } |
---|
1089 | } |
---|
1090 | } |
---|
1091 | return result; |
---|
1092 | } |
---|
1093 | |
---|
1094 | |
---|
1095 | template<unsigned int Precision> |
---|
1096 | amp::ampf<Precision> extsignbdsqr(amp::ampf<Precision> a, |
---|
1097 | amp::ampf<Precision> b) |
---|
1098 | { |
---|
1099 | amp::ampf<Precision> result; |
---|
1100 | |
---|
1101 | |
---|
1102 | if( b>=0 ) |
---|
1103 | { |
---|
1104 | result = amp::abs<Precision>(a); |
---|
1105 | } |
---|
1106 | else |
---|
1107 | { |
---|
1108 | result = -amp::abs<Precision>(a); |
---|
1109 | } |
---|
1110 | return result; |
---|
1111 | } |
---|
1112 | |
---|
1113 | |
---|
1114 | template<unsigned int Precision> |
---|
1115 | void svd2x2(amp::ampf<Precision> f, |
---|
1116 | amp::ampf<Precision> g, |
---|
1117 | amp::ampf<Precision> h, |
---|
1118 | amp::ampf<Precision>& ssmin, |
---|
1119 | amp::ampf<Precision>& ssmax) |
---|
1120 | { |
---|
1121 | amp::ampf<Precision> aas; |
---|
1122 | amp::ampf<Precision> at; |
---|
1123 | amp::ampf<Precision> au; |
---|
1124 | amp::ampf<Precision> c; |
---|
1125 | amp::ampf<Precision> fa; |
---|
1126 | amp::ampf<Precision> fhmn; |
---|
1127 | amp::ampf<Precision> fhmx; |
---|
1128 | amp::ampf<Precision> ga; |
---|
1129 | amp::ampf<Precision> ha; |
---|
1130 | |
---|
1131 | |
---|
1132 | fa = amp::abs<Precision>(f); |
---|
1133 | ga = amp::abs<Precision>(g); |
---|
1134 | ha = amp::abs<Precision>(h); |
---|
1135 | fhmn = amp::minimum<Precision>(fa, ha); |
---|
1136 | fhmx = amp::maximum<Precision>(fa, ha); |
---|
1137 | if( fhmn==0 ) |
---|
1138 | { |
---|
1139 | ssmin = 0; |
---|
1140 | if( fhmx==0 ) |
---|
1141 | { |
---|
1142 | ssmax = ga; |
---|
1143 | } |
---|
1144 | else |
---|
1145 | { |
---|
1146 | ssmax = amp::maximum<Precision>(fhmx, ga)*amp::sqrt<Precision>(1+amp::sqr<Precision>(amp::minimum<Precision>(fhmx, ga)/amp::maximum<Precision>(fhmx, ga))); |
---|
1147 | } |
---|
1148 | } |
---|
1149 | else |
---|
1150 | { |
---|
1151 | if( ga<fhmx ) |
---|
1152 | { |
---|
1153 | aas = 1+fhmn/fhmx; |
---|
1154 | at = (fhmx-fhmn)/fhmx; |
---|
1155 | au = amp::sqr<Precision>(ga/fhmx); |
---|
1156 | c = 2/(amp::sqrt<Precision>(aas*aas+au)+amp::sqrt<Precision>(at*at+au)); |
---|
1157 | ssmin = fhmn*c; |
---|
1158 | ssmax = fhmx/c; |
---|
1159 | } |
---|
1160 | else |
---|
1161 | { |
---|
1162 | au = fhmx/ga; |
---|
1163 | if( au==0 ) |
---|
1164 | { |
---|
1165 | |
---|
1166 | // |
---|
1167 | // Avoid possible harmful underflow if exponent range |
---|
1168 | // asymmetric (true SSMIN may not underflow even if |
---|
1169 | // AU underflows) |
---|
1170 | // |
---|
1171 | ssmin = fhmn*fhmx/ga; |
---|
1172 | ssmax = ga; |
---|
1173 | } |
---|
1174 | else |
---|
1175 | { |
---|
1176 | aas = 1+fhmn/fhmx; |
---|
1177 | at = (fhmx-fhmn)/fhmx; |
---|
1178 | c = 1/(amp::sqrt<Precision>(1+amp::sqr<Precision>(aas*au))+amp::sqrt<Precision>(1+amp::sqr<Precision>(at*au))); |
---|
1179 | ssmin = fhmn*c*au; |
---|
1180 | ssmin = ssmin+ssmin; |
---|
1181 | ssmax = ga/(c+c); |
---|
1182 | } |
---|
1183 | } |
---|
1184 | } |
---|
1185 | } |
---|
1186 | |
---|
1187 | |
---|
1188 | template<unsigned int Precision> |
---|
1189 | void svdv2x2(amp::ampf<Precision> f, |
---|
1190 | amp::ampf<Precision> g, |
---|
1191 | amp::ampf<Precision> h, |
---|
1192 | amp::ampf<Precision>& ssmin, |
---|
1193 | amp::ampf<Precision>& ssmax, |
---|
1194 | amp::ampf<Precision>& snr, |
---|
1195 | amp::ampf<Precision>& csr, |
---|
1196 | amp::ampf<Precision>& snl, |
---|
1197 | amp::ampf<Precision>& csl) |
---|
1198 | { |
---|
1199 | bool gasmal; |
---|
1200 | bool swp; |
---|
1201 | int pmax; |
---|
1202 | amp::ampf<Precision> a; |
---|
1203 | amp::ampf<Precision> clt; |
---|
1204 | amp::ampf<Precision> crt; |
---|
1205 | amp::ampf<Precision> d; |
---|
1206 | amp::ampf<Precision> fa; |
---|
1207 | amp::ampf<Precision> ft; |
---|
1208 | amp::ampf<Precision> ga; |
---|
1209 | amp::ampf<Precision> gt; |
---|
1210 | amp::ampf<Precision> ha; |
---|
1211 | amp::ampf<Precision> ht; |
---|
1212 | amp::ampf<Precision> l; |
---|
1213 | amp::ampf<Precision> m; |
---|
1214 | amp::ampf<Precision> mm; |
---|
1215 | amp::ampf<Precision> r; |
---|
1216 | amp::ampf<Precision> s; |
---|
1217 | amp::ampf<Precision> slt; |
---|
1218 | amp::ampf<Precision> srt; |
---|
1219 | amp::ampf<Precision> t; |
---|
1220 | amp::ampf<Precision> temp; |
---|
1221 | amp::ampf<Precision> tsign; |
---|
1222 | amp::ampf<Precision> tt; |
---|
1223 | amp::ampf<Precision> v; |
---|
1224 | |
---|
1225 | |
---|
1226 | ft = f; |
---|
1227 | fa = amp::abs<Precision>(ft); |
---|
1228 | ht = h; |
---|
1229 | ha = amp::abs<Precision>(h); |
---|
1230 | |
---|
1231 | // |
---|
1232 | // PMAX points to the maximum absolute element of matrix |
---|
1233 | // PMAX = 1 if F largest in absolute values |
---|
1234 | // PMAX = 2 if G largest in absolute values |
---|
1235 | // PMAX = 3 if H largest in absolute values |
---|
1236 | // |
---|
1237 | pmax = 1; |
---|
1238 | swp = ha>fa; |
---|
1239 | if( swp ) |
---|
1240 | { |
---|
1241 | |
---|
1242 | // |
---|
1243 | // Now FA .ge. HA |
---|
1244 | // |
---|
1245 | pmax = 3; |
---|
1246 | temp = ft; |
---|
1247 | ft = ht; |
---|
1248 | ht = temp; |
---|
1249 | temp = fa; |
---|
1250 | fa = ha; |
---|
1251 | ha = temp; |
---|
1252 | } |
---|
1253 | gt = g; |
---|
1254 | ga = amp::abs<Precision>(gt); |
---|
1255 | if( ga==0 ) |
---|
1256 | { |
---|
1257 | |
---|
1258 | // |
---|
1259 | // Diagonal matrix |
---|
1260 | // |
---|
1261 | ssmin = ha; |
---|
1262 | ssmax = fa; |
---|
1263 | clt = 1; |
---|
1264 | crt = 1; |
---|
1265 | slt = 0; |
---|
1266 | srt = 0; |
---|
1267 | } |
---|
1268 | else |
---|
1269 | { |
---|
1270 | gasmal = true; |
---|
1271 | if( ga>fa ) |
---|
1272 | { |
---|
1273 | pmax = 2; |
---|
1274 | if( fa/ga<amp::ampf<Precision>::getAlgoPascalEpsilon() ) |
---|
1275 | { |
---|
1276 | |
---|
1277 | // |
---|
1278 | // Case of very large GA |
---|
1279 | // |
---|
1280 | gasmal = false; |
---|
1281 | ssmax = ga; |
---|
1282 | if( ha>1 ) |
---|
1283 | { |
---|
1284 | v = ga/ha; |
---|
1285 | ssmin = fa/v; |
---|
1286 | } |
---|
1287 | else |
---|
1288 | { |
---|
1289 | v = fa/ga; |
---|
1290 | ssmin = v*ha; |
---|
1291 | } |
---|
1292 | clt = 1; |
---|
1293 | slt = ht/gt; |
---|
1294 | srt = 1; |
---|
1295 | crt = ft/gt; |
---|
1296 | } |
---|
1297 | } |
---|
1298 | if( gasmal ) |
---|
1299 | { |
---|
1300 | |
---|
1301 | // |
---|
1302 | // Normal case |
---|
1303 | // |
---|
1304 | d = fa-ha; |
---|
1305 | if( d==fa ) |
---|
1306 | { |
---|
1307 | l = 1; |
---|
1308 | } |
---|
1309 | else |
---|
1310 | { |
---|
1311 | l = d/fa; |
---|
1312 | } |
---|
1313 | m = gt/ft; |
---|
1314 | t = 2-l; |
---|
1315 | mm = m*m; |
---|
1316 | tt = t*t; |
---|
1317 | s = amp::sqrt<Precision>(tt+mm); |
---|
1318 | if( l==0 ) |
---|
1319 | { |
---|
1320 | r = amp::abs<Precision>(m); |
---|
1321 | } |
---|
1322 | else |
---|
1323 | { |
---|
1324 | r = amp::sqrt<Precision>(l*l+mm); |
---|
1325 | } |
---|
1326 | a = amp::ampf<Precision>("0.5")*(s+r); |
---|
1327 | ssmin = ha/a; |
---|
1328 | ssmax = fa*a; |
---|
1329 | if( mm==0 ) |
---|
1330 | { |
---|
1331 | |
---|
1332 | // |
---|
1333 | // Note that M is very tiny |
---|
1334 | // |
---|
1335 | if( l==0 ) |
---|
1336 | { |
---|
1337 | t = extsignbdsqr<Precision>(2, ft)*extsignbdsqr<Precision>(1, gt); |
---|
1338 | } |
---|
1339 | else |
---|
1340 | { |
---|
1341 | t = gt/extsignbdsqr<Precision>(d, ft)+m/t; |
---|
1342 | } |
---|
1343 | } |
---|
1344 | else |
---|
1345 | { |
---|
1346 | t = (m/(s+t)+m/(r+l))*(1+a); |
---|
1347 | } |
---|
1348 | l = amp::sqrt<Precision>(t*t+4); |
---|
1349 | crt = 2/l; |
---|
1350 | srt = t/l; |
---|
1351 | clt = (crt+srt*m)/a; |
---|
1352 | v = ht/ft; |
---|
1353 | slt = v*srt/a; |
---|
1354 | } |
---|
1355 | } |
---|
1356 | if( swp ) |
---|
1357 | { |
---|
1358 | csl = srt; |
---|
1359 | snl = crt; |
---|
1360 | csr = slt; |
---|
1361 | snr = clt; |
---|
1362 | } |
---|
1363 | else |
---|
1364 | { |
---|
1365 | csl = clt; |
---|
1366 | snl = slt; |
---|
1367 | csr = crt; |
---|
1368 | snr = srt; |
---|
1369 | } |
---|
1370 | |
---|
1371 | // |
---|
1372 | // Correct signs of SSMAX and SSMIN |
---|
1373 | // |
---|
1374 | if( pmax==1 ) |
---|
1375 | { |
---|
1376 | tsign = extsignbdsqr<Precision>(1, csr)*extsignbdsqr<Precision>(1, csl)*extsignbdsqr<Precision>(1, f); |
---|
1377 | } |
---|
1378 | if( pmax==2 ) |
---|
1379 | { |
---|
1380 | tsign = extsignbdsqr<Precision>(1, snr)*extsignbdsqr<Precision>(1, csl)*extsignbdsqr<Precision>(1, g); |
---|
1381 | } |
---|
1382 | if( pmax==3 ) |
---|
1383 | { |
---|
1384 | tsign = extsignbdsqr<Precision>(1, snr)*extsignbdsqr<Precision>(1, snl)*extsignbdsqr<Precision>(1, h); |
---|
1385 | } |
---|
1386 | ssmax = extsignbdsqr<Precision>(ssmax, tsign); |
---|
1387 | ssmin = extsignbdsqr<Precision>(ssmin, tsign*extsignbdsqr<Precision>(1, f)*extsignbdsqr<Precision>(1, h)); |
---|
1388 | } |
---|
1389 | } // namespace |
---|
1390 | |
---|
1391 | #endif |
---|