Amesos2 - Direct Sparse Solver Interfaces Version of the Day
klu2_kernel.hpp
1/* ========================================================================== */
2/* === KLU_kernel =========================================================== */
3/* ========================================================================== */
4// @HEADER
5// ***********************************************************************
6//
7// KLU2: A Direct Linear Solver package
8// Copyright 2011 Sandia Corporation
9//
10// Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11// U.S. Government retains certain rights in this software.
12//
13// This library is free software; you can redistribute it and/or modify
14// it under the terms of the GNU Lesser General Public License as
15// published by the Free Software Foundation; either version 2.1 of the
16// License, or (at your option) any later version.
17//
18// This library is distributed in the hope that it will be useful, but
19// WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21// Lesser General Public License for more details.
22//
23// You should have received a copy of the GNU Lesser General Public
24// License along with this library; if not, write to the Free Software
25// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26// USA
27// Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28//
29// KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30// University of Florida. The Authors of KLU are Timothy A. Davis and
31// Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32// information for KLU.
33//
34// ***********************************************************************
35// @HEADER
36
37/* Sparse left-looking LU factorization, with partial pivoting. Based on
38 * Gilbert & Peierl's method, with a non-recursive DFS and with Eisenstat &
39 * Liu's symmetric pruning. No user-callable routines are in this file.
40 */
41
42#ifndef KLU2_KERNEL_HPP
43#define KLU2_KERNEL_HPP
44
45#include "klu2_internal.h"
46#include "klu2_memory.hpp"
47
48/* ========================================================================== */
49/* === dfs ================================================================== */
50/* ========================================================================== */
51
52/* Does a depth-first-search, starting at node j. */
53
54template <typename Int>
55static Int dfs
56(
57 /* input, not modified on output: */
58 Int j, /* node at which to start the DFS */
59 Int k, /* mark value, for the Flag array */
60 Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or AMESOS2_KLU2_EMPTY if
61 * row i is not yet pivotal. */
62 Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */
63 Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */
64
65 /* workspace, not defined on input or output */
66 Int Stack [ ], /* size n */
67
68 /* input/output: */
69 Int Flag [ ], /* Flag [i] == k means i is marked */
70 Int Lpend [ ], /* for symmetric pruning */
71 Int top, /* top of stack on input*/
72 Unit LU [],
73 Int *Lik, /* Li row index array of the kth column */
74 Int *plength,
75
76 /* other, not defined on input or output */
77 Int Ap_pos [ ] /* keeps track of position in adj list during DFS */
78)
79{
80 Int i, pos, jnew, head, l_length ;
81 Int *Li ;
82
83 l_length = *plength ;
84
85 head = 0 ;
86 Stack [0] = j ;
87 ASSERT (Flag [j] != k) ;
88
89 while (head >= 0)
90 {
91 j = Stack [head] ;
92 jnew = Pinv [j] ;
93 ASSERT (jnew >= 0 && jnew < k) ; /* j is pivotal */
94
95 if (Flag [j] != k) /* a node is not yet visited */
96 {
97 /* first time that j has been visited */
98 Flag [j] = k ;
99 PRINTF (("[ start dfs at %d : new %d\n", j, jnew)) ;
100 /* set Ap_pos [head] to one past the last entry in col j to scan */
101 Ap_pos [head] =
102 (Lpend [jnew] == AMESOS2_KLU2_EMPTY) ? Llen [jnew] : Lpend [jnew] ;
103 }
104
105 /* add the adjacent nodes to the recursive stack by iterating through
106 * until finding another non-visited pivotal node */
107 Li = (Int *) (LU + Lip [jnew]) ;
108 for (pos = --Ap_pos [head] ; pos >= 0 ; --pos)
109 {
110 i = Li [pos] ;
111 if (Flag [i] != k)
112 {
113 /* node i is not yet visited */
114 if (Pinv [i] >= 0)
115 {
116 /* keep track of where we left off in the scan of the
117 * adjacency list of node j so we can restart j where we
118 * left off. */
119 Ap_pos [head] = pos ;
120
121 /* node i is pivotal; push it onto the recursive stack
122 * and immediately break so we can recurse on node i. */
123 Stack [++head] = i ;
124 break ;
125 }
126 else
127 {
128 /* node i is not pivotal (no outgoing edges). */
129 /* Flag as visited and store directly into L,
130 * and continue with current node j. */
131 Flag [i] = k ;
132 Lik [l_length] = i ;
133 l_length++ ;
134 }
135 }
136 }
137
138 if (pos == -1)
139 {
140 /* if all adjacent nodes of j are already visited, pop j from
141 * recursive stack and push j onto output stack */
142 head-- ;
143 Stack[--top] = j ;
144 PRINTF ((" end dfs at %d ] head : %d\n", j, head)) ;
145 }
146 }
147
148 *plength = l_length ;
149 return (top) ;
150}
151
152
153/* ========================================================================== */
154/* === lsolve_symbolic ====================================================== */
155/* ========================================================================== */
156
157/* Finds the pattern of x, for the solution of Lx=b */
158
159template <typename Int>
160static Int lsolve_symbolic
161(
162 /* input, not modified on output: */
163 Int n, /* L is n-by-n, where n >= 0 */
164 Int k, /* also used as the mark value, for the Flag array */
165 Int Ap [ ],
166 Int Ai [ ],
167 Int Q [ ],
168 Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or AMESOS2_KLU2_EMPTY if row i
169 * is not yet pivotal. */
170
171 /* workspace, not defined on input or output */
172 Int Stack [ ], /* size n */
173
174 /* workspace, defined on input and output */
175 Int Flag [ ], /* size n. Initially, all of Flag [0..n-1] < k. After
176 * lsolve_symbolic is done, Flag [i] == k if i is in
177 * the pattern of the output, and Flag [0..n-1] <= k. */
178
179 /* other */
180 Int Lpend [ ], /* for symmetric pruning */
181 Int Ap_pos [ ], /* workspace used in dfs */
182
183 Unit LU [ ], /* LU factors (pattern and values) */
184 Int lup, /* pointer to free space in LU */
185 Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */
186 Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */
187
188 /* ---- the following are only used in the BTF case --- */
189
190 Int k1, /* the block of A is from k1 to k2-1 */
191 Int PSinv [ ] /* inverse of P from symbolic factorization */
192)
193{
194 Int *Lik ;
195 Int i, p, pend, oldcol, kglobal, top, l_length ;
196
197 top = n ;
198 l_length = 0 ;
199 Lik = (Int *) (LU + lup);
200
201 /* ---------------------------------------------------------------------- */
202 /* BTF factorization of A (k1:k2-1, k1:k2-1) */
203 /* ---------------------------------------------------------------------- */
204
205 kglobal = k + k1 ; /* column k of the block is col kglobal of A */
206 oldcol = Q [kglobal] ; /* Q must be present for BTF case */
207 pend = Ap [oldcol+1] ;
208 for (p = Ap [oldcol] ; p < pend ; p++)
209 {
210 i = PSinv [Ai [p]] - k1 ;
211 if (i < 0) continue ; /* skip entry outside the block */
212
213 /* (i,k) is an entry in the block. start a DFS at node i */
214 PRINTF (("\n ===== DFS at node %d in b, inew: %d\n", i, Pinv [i])) ;
215 if (Flag [i] != k)
216 {
217 if (Pinv [i] >= 0)
218 {
219 top = dfs (i, k, Pinv, Llen, Lip, Stack, Flag,
220 Lpend, top, LU, Lik, &l_length, Ap_pos) ;
221 }
222 else
223 {
224 /* i is not pivotal, and not flagged. Flag and put in L */
225 Flag [i] = k ;
226 Lik [l_length] = i ;
227 l_length++;
228 }
229 }
230 }
231
232 /* If Llen [k] is zero, the matrix is structurally singular */
233 Llen [k] = l_length ;
234 return (top) ;
235}
236
237
238/* ========================================================================== */
239/* === construct_column ===================================================== */
240/* ========================================================================== */
241
242/* Construct the kth column of A, and the off-diagonal part, if requested.
243 * Scatter the numerical values into the workspace X, and construct the
244 * corresponding column of the off-diagonal matrix. */
245
246template <typename Entry, typename Int>
247static void construct_column
248(
249 /* inputs, not modified on output */
250 Int k, /* the column of A (or the column of the block) to get */
251 Int Ap [ ],
252 Int Ai [ ],
253 Entry Ax [ ],
254 Int Q [ ], /* column pre-ordering */
255
256 /* zero on input, modified on output */
257 Entry X [ ],
258
259 /* ---- the following are only used in the BTF case --- */
260
261 /* inputs, not modified on output */
262 Int k1, /* the block of A is from k1 to k2-1 */
263 Int PSinv [ ], /* inverse of P from symbolic factorization */
264 double Rs [ ], /* scale factors for A */
265 Int scale, /* 0: no scaling, nonzero: scale the rows with Rs */
266
267 /* inputs, modified on output */
268 Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
269 Int Offi [ ],
270 Entry Offx [ ]
271)
272{
273 Entry aik ;
274 Int i, p, pend, oldcol, kglobal, poff, oldrow ;
275
276 /* ---------------------------------------------------------------------- */
277 /* Scale and scatter the column into X. */
278 /* ---------------------------------------------------------------------- */
279
280 kglobal = k + k1 ; /* column k of the block is col kglobal of A */
281 poff = Offp [kglobal] ; /* start of off-diagonal column */
282 oldcol = Q [kglobal] ;
283 pend = Ap [oldcol+1] ;
284
285 if (scale <= 0)
286 {
287 /* no scaling */
288 for (p = Ap [oldcol] ; p < pend ; p++)
289 {
290 oldrow = Ai [p] ;
291 i = PSinv [oldrow] - k1 ;
292 aik = Ax [p] ;
293 if (i < 0)
294 {
295 /* this is an entry in the off-diagonal part */
296 Offi [poff] = oldrow ;
297 Offx [poff] = aik ;
298 poff++ ;
299 }
300 else
301 {
302 /* (i,k) is an entry in the block. scatter into X */
303 X [i] = aik ;
304 }
305 }
306 }
307 else
308 {
309 /* row scaling */
310 for (p = Ap [oldcol] ; p < pend ; p++)
311 {
312 oldrow = Ai [p] ;
313 i = PSinv [oldrow] - k1 ;
314 aik = Ax [p] ;
315 SCALE_DIV (aik, Rs [oldrow]) ;
316 if (i < 0)
317 {
318 /* this is an entry in the off-diagonal part */
319 Offi [poff] = oldrow ;
320 Offx [poff] = aik ;
321 poff++ ;
322 }
323 else
324 {
325 /* (i,k) is an entry in the block. scatter into X */
326 X [i] = aik ;
327 }
328 }
329 }
330
331 Offp [kglobal+1] = poff ; /* start of the next col of off-diag part */
332}
333
334
335/* ========================================================================== */
336/* === lsolve_numeric ======================================================= */
337/* ========================================================================== */
338
339/* Computes the numerical values of x, for the solution of Lx=b. Note that x
340 * may include explicit zeros if numerical cancelation occurs. L is assumed
341 * to be unit-diagonal, with possibly unsorted columns (but the first entry in
342 * the column must always be the diagonal entry). */
343
344template <typename Entry, typename Int>
345static void lsolve_numeric
346(
347 /* input, not modified on output: */
348 Int Pinv [ ], /* Pinv [i] = k if i is kth pivot row, or AMESOS2_KLU2_EMPTY if row i
349 * is not yet pivotal. */
350 Unit *LU, /* LU factors (pattern and values) */
351 Int Stack [ ], /* stack for dfs */
352 Int Lip [ ], /* size n, Lip [k] is position in LU of column k of L */
353 Int top, /* top of stack on input */
354 Int n, /* A is n-by-n */
355 Int Llen [ ], /* size n, Llen [k] = # nonzeros in column k of L */
356
357 /* output, must be zero on input: */
358 Entry X [ ] /* size n, initially zero. On output,
359 * X [Ui [up1..up-1]] and X [Li [lp1..lp-1]]
360 * contains the solution. */
361
362)
363{
364 Entry xj ;
365 Entry *Lx ;
366 Int *Li ;
367 Int p, s, j, jnew, len ;
368
369 /* solve Lx=b */
370 for (s = top ; s < n ; s++)
371 {
372 /* forward solve with column j of L */
373 j = Stack [s] ;
374 jnew = Pinv [j] ;
375 ASSERT (jnew >= 0) ;
376 xj = X [j] ;
377 GET_POINTER (LU, Lip, Llen, Li, Lx, jnew, len) ;
378 ASSERT (Lip [jnew] <= Lip [jnew+1]) ;
379 for (p = 0 ; p < len ; p++)
380 {
381 /*X [Li [p]] -= Lx [p] * xj ; */
382 MULT_SUB (X [Li [p]], Lx [p], xj) ;
383 }
384 }
385}
386
387
388/* ========================================================================== */
389/* === lpivot =============================================================== */
390/* ========================================================================== */
391
392/* Find a pivot via partial pivoting, and scale the column of L. */
393
394template <typename Entry, typename Int>
395static Int lpivot
396(
397 Int diagrow,
398 Int *p_pivrow,
399 Entry *p_pivot,
400 double *p_abs_pivot,
401 double tol,
402 Entry X [ ],
403 Unit *LU, /* LU factors (pattern and values) */
404 Int Lip [ ],
405 Int Llen [ ],
406 Int k,
407 Int n,
408
409 Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or AMESOS2_KLU2_EMPTY if
410 * row i is not yet pivotal. */
411
412 Int *p_firstrow,
413 KLU_common<Entry, Int> *Common
414)
415{
416 Entry x, pivot, *Lx ;
417 double abs_pivot, xabs ;
418 Int p, i, ppivrow, pdiag, pivrow, *Li, last_row_index, firstrow, len ;
419
420 pivrow = AMESOS2_KLU2_EMPTY ;
421 if (Llen [k] == 0)
422 {
423 /* matrix is structurally singular */
424 if (Common->halt_if_singular)
425 {
426 return (FALSE) ;
427 }
428 for (firstrow = *p_firstrow ; firstrow < n ; firstrow++)
429 {
430 PRINTF (("check %d\n", firstrow)) ;
431 if (Pinv [firstrow] < 0)
432 {
433 /* found the lowest-numbered non-pivotal row. Pick it. */
434 pivrow = firstrow ;
435 PRINTF (("Got pivotal row: %d\n", pivrow)) ;
436 break ;
437 }
438 }
439 ASSERT (pivrow >= 0 && pivrow < n) ;
440 CLEAR (pivot) ;
441 *p_pivrow = pivrow ;
442 *p_pivot = pivot ;
443 *p_abs_pivot = 0 ;
444 *p_firstrow = firstrow ;
445 return (FALSE) ;
446 }
447
448 pdiag = AMESOS2_KLU2_EMPTY ;
449 ppivrow = AMESOS2_KLU2_EMPTY ;
450 abs_pivot = AMESOS2_KLU2_EMPTY ;
451 i = Llen [k] - 1 ;
452 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
453 last_row_index = Li [i] ;
454
455 /* decrement the length by 1 */
456 Llen [k] = i ;
457 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
458
459 /* look in Li [0 ..Llen [k] - 1 ] for a pivot row */
460 for (p = 0 ; p < len ; p++)
461 {
462 /* gather the entry from X and store in L */
463 i = Li [p] ;
464 x = X [i] ;
465 CLEAR (X [i]) ;
466
467 Lx [p] = x ;
468 /* xabs = ABS (x) ; */
469 KLU2_ABS (xabs, x) ;
470
471 /* find the diagonal */
472 if (i == diagrow)
473 {
474 pdiag = p ;
475 }
476
477 /* find the partial-pivoting choice */
478 if (xabs > abs_pivot)
479 {
480 abs_pivot = xabs ;
481 ppivrow = p ;
482 }
483 }
484
485 /* xabs = ABS (X [last_row_index]) ;*/
486 KLU2_ABS (xabs, X [last_row_index]) ;
487 if (xabs > abs_pivot)
488 {
489 abs_pivot = xabs ;
490 ppivrow = AMESOS2_KLU2_EMPTY ;
491 }
492
493 /* compare the diagonal with the largest entry */
494 if (last_row_index == diagrow)
495 {
496 if (xabs >= tol * abs_pivot)
497 {
498 abs_pivot = xabs ;
499 ppivrow = AMESOS2_KLU2_EMPTY ;
500 }
501 }
502 else if (pdiag != AMESOS2_KLU2_EMPTY)
503 {
504 /* xabs = ABS (Lx [pdiag]) ;*/
505 KLU2_ABS (xabs, Lx [pdiag]) ;
506 if (xabs >= tol * abs_pivot)
507 {
508 /* the diagonal is large enough */
509 abs_pivot = xabs ;
510 ppivrow = pdiag ;
511 }
512 }
513
514 if (ppivrow != AMESOS2_KLU2_EMPTY)
515 {
516 pivrow = Li [ppivrow] ;
517 pivot = Lx [ppivrow] ;
518 /* overwrite the ppivrow values with last index values */
519 Li [ppivrow] = last_row_index ;
520 Lx [ppivrow] = X [last_row_index] ;
521 }
522 else
523 {
524 pivrow = last_row_index ;
525 pivot = X [last_row_index] ;
526 }
527 CLEAR (X [last_row_index]) ;
528
529 *p_pivrow = pivrow ;
530 *p_pivot = pivot ;
531 *p_abs_pivot = abs_pivot ;
532 ASSERT (pivrow >= 0 && pivrow < n) ;
533
534 if (IS_ZERO (pivot) && Common->halt_if_singular)
535 {
536 /* numerically singular case */
537 return (FALSE) ;
538 }
539
540 /* divide L by the pivot value */
541 for (p = 0 ; p < Llen [k] ; p++)
542 {
543 /* Lx [p] /= pivot ; */
544 DIV (Lx [p], Lx [p], pivot) ;
545 }
546
547 return (TRUE) ;
548}
549
550
551/* ========================================================================== */
552/* === prune ================================================================ */
553/* ========================================================================== */
554
555/* Prune the columns of L to reduce work in subsequent depth-first searches */
556template <typename Entry, typename Int>
557static void prune
558(
559 /* input/output: */
560 Int Lpend [ ], /* Lpend [j] marks symmetric pruning point for L(:,j) */
561
562 /* input: */
563 Int Pinv [ ], /* Pinv [i] = k if row i is kth pivot row, or AMESOS2_KLU2_EMPTY if
564 * row i is not yet pivotal. */
565 Int k, /* prune using column k of U */
566 Int pivrow, /* current pivot row */
567
568 /* input/output: */
569 Unit *LU, /* LU factors (pattern and values) */
570
571 /* input */
572 Int Uip [ ], /* size n, column pointers for U */
573 Int Lip [ ], /* size n, column pointers for L */
574 Int Ulen [ ], /* size n, column length of U */
575 Int Llen [ ] /* size n, column length of L */
576)
577{
578 Entry x ;
579 Entry *Lx, *Ux ;
580 Int *Li, *Ui ;
581 Int p, i, j, p2, phead, ptail, llen, ulen ;
582
583 /* check to see if any column of L can be pruned */
584 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ;
585
586 // Try not to warn about Ux never being used
587 (void) Ux;
588 for (p = 0 ; p < ulen ; p++)
589 {
590 j = Ui [p] ;
591 ASSERT (j < k) ;
592 PRINTF (("%d is pruned: %d. Lpend[j] %d Lip[j+1] %d\n",
593 j, Lpend [j] != AMESOS2_KLU2_EMPTY, Lpend [j], Lip [j+1])) ;
594 if (Lpend [j] == AMESOS2_KLU2_EMPTY)
595 {
596 /* scan column j of L for the pivot row */
597 GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ;
598 for (p2 = 0 ; p2 < llen ; p2++)
599 {
600 if (pivrow == Li [p2])
601 {
602 /* found it! This column can be pruned */
603#ifndef NDEBUGKLU2
604 PRINTF (("==== PRUNE: col j %d of L\n", j)) ;
605 {
606 Int p3 ;
607 for (p3 = 0 ; p3 < Llen [j] ; p3++)
608 {
609 PRINTF (("before: %i pivotal: %d\n", Li [p3],
610 Pinv [Li [p3]] >= 0)) ;
611 }
612 }
613#endif
614
615 /* partition column j of L. The unit diagonal of L
616 * is not stored in the column of L. */
617 phead = 0 ;
618 ptail = Llen [j] ;
619 while (phead < ptail)
620 {
621 i = Li [phead] ;
622 if (Pinv [i] >= 0)
623 {
624 /* leave at the head */
625 phead++ ;
626 }
627 else
628 {
629 /* swap with the tail */
630 ptail-- ;
631 Li [phead] = Li [ptail] ;
632 Li [ptail] = i ;
633 x = Lx [phead] ;
634 Lx [phead] = Lx [ptail] ;
635 Lx [ptail] = x ;
636 }
637 }
638
639 /* set Lpend to one past the last entry in the
640 * first part of the column of L. Entries in
641 * Li [0 ... Lpend [j]-1] are the only part of
642 * column j of L that needs to be scanned in the DFS.
643 * Lpend [j] was AMESOS2_KLU2_EMPTY; setting it >= 0 also flags
644 * column j as pruned. */
645 Lpend [j] = ptail ;
646
647#ifndef NDEBUGKLU2
648 {
649 Int p3 ;
650 for (p3 = 0 ; p3 < Llen [j] ; p3++)
651 {
652 if (p3 == Lpend [j]) PRINTF (("----\n")) ;
653 PRINTF (("after: %i pivotal: %d\n", Li [p3],
654 Pinv [Li [p3]] >= 0)) ;
655 }
656 }
657#endif
658
659 break ;
660 }
661 }
662 }
663 }
664}
665
666
667/* ========================================================================== */
668/* === KLU_kernel =========================================================== */
669/* ========================================================================== */
670
671template <typename Entry, typename Int>
672size_t KLU_kernel /* final size of LU on output */
673(
674 /* input, not modified */
675 Int n, /* A is n-by-n */
676 Int Ap [ ], /* size n+1, column pointers for A */
677 Int Ai [ ], /* size nz = Ap [n], row indices for A */
678 Entry Ax [ ], /* size nz, values of A */
679 Int Q [ ], /* size n, optional input permutation */
680 size_t lusize, /* initial size of LU on input */
681
682 /* output, not defined on input */
683 Int Pinv [ ], /* size n, inverse row permutation, where Pinv [i] = k if
684 * row i is the kth pivot row */
685 Int P [ ], /* size n, row permutation, where P [k] = i if row i is the
686 * kth pivot row. */
687 Unit **p_LU, /* LU array, size lusize on input */
688 Entry Udiag [ ], /* size n, diagonal of U */
689 Int Llen [ ], /* size n, column length of L */
690 Int Ulen [ ], /* size n, column length of U */
691 Int Lip [ ], /* size n, column pointers for L */
692 Int Uip [ ], /* size n, column pointers for U */
693 Int *lnz, /* size of L*/
694 Int *unz, /* size of U*/
695 /* workspace, not defined on input */
696 Entry X [ ], /* size n, undefined on input, zero on output */
697
698 /* workspace, not defined on input or output */
699 Int Stack [ ], /* size n */
700 Int Flag [ ], /* size n */
701 Int Ap_pos [ ], /* size n */
702
703 /* other workspace: */
704 Int Lpend [ ], /* size n workspace, for pruning only */
705
706 /* inputs, not modified on output */
707 Int k1, /* the block of A is from k1 to k2-1 */
708 Int PSinv [ ], /* inverse of P from symbolic factorization */
709 double Rs [ ], /* scale factors for A */
710
711 /* inputs, modified on output */
712 Int Offp [ ], /* off-diagonal matrix (modified by this routine) */
713 Int Offi [ ],
714 Entry Offx [ ],
715 /* --------------- */
716 KLU_common<Entry, Int> *Common
717)
718{
719 Entry pivot ;
720 double abs_pivot, xsize, nunits, tol, memgrow ;
721 Entry *Ux ;
722 Int *Li, *Ui ;
723 Unit *LU ; /* LU factors (pattern and values) */
724 Int k, p, i, j, pivrow = 0, kbar, diagrow, firstrow, lup, top, scale, len ;
725 size_t newlusize ;
726
727#ifndef NDEBUGKLU2
728 Entry *Lx ;
729#endif
730
731 ASSERT (Common != NULL) ;
732 scale = Common->scale ;
733 tol = Common->tol ;
734 memgrow = Common->memgrow ;
735 *lnz = 0 ;
736 *unz = 0 ;
737 CLEAR (pivot) ;
738
739 /* ---------------------------------------------------------------------- */
740 /* get initial Li, Lx, Ui, and Ux */
741 /* ---------------------------------------------------------------------- */
742
743 PRINTF (("input: lusize %d \n", lusize)) ;
744 ASSERT (lusize > 0) ;
745 LU = *p_LU ;
746
747 /* ---------------------------------------------------------------------- */
748 /* initializations */
749 /* ---------------------------------------------------------------------- */
750
751 firstrow = 0 ;
752 lup = 0 ;
753
754 for (k = 0 ; k < n ; k++)
755 {
756 /* X [k] = 0 ; */
757 CLEAR (X [k]) ;
758 Flag [k] = AMESOS2_KLU2_EMPTY ;
759 Lpend [k] = AMESOS2_KLU2_EMPTY ; /* flag k as not pruned */
760 }
761
762 /* ---------------------------------------------------------------------- */
763 /* mark all rows as non-pivotal and determine initial diagonal mapping */
764 /* ---------------------------------------------------------------------- */
765
766 /* PSinv does the symmetric permutation, so don't do it here */
767 for (k = 0 ; k < n ; k++)
768 {
769 P [k] = k ;
770 Pinv [k] = FLIP (k) ; /* mark all rows as non-pivotal */
771 }
772 /* initialize the construction of the off-diagonal matrix */
773 Offp [0] = 0 ;
774
775 /* P [k] = row means that UNFLIP (Pinv [row]) = k, and visa versa.
776 * If row is pivotal, then Pinv [row] >= 0. A row is initially "flipped"
777 * (Pinv [k] < AMESOS2_KLU2_EMPTY), and then marked "unflipped" when it becomes
778 * pivotal. */
779
780#ifndef NDEBUGKLU2
781 for (k = 0 ; k < n ; k++)
782 {
783 PRINTF (("Initial P [%d] = %d\n", k, P [k])) ;
784 }
785#endif
786
787 /* ---------------------------------------------------------------------- */
788 /* factorize */
789 /* ---------------------------------------------------------------------- */
790
791 for (k = 0 ; k < n ; k++)
792 {
793
794 PRINTF (("\n\n==================================== k: %d\n", k)) ;
795
796 /* ------------------------------------------------------------------ */
797 /* determine if LU factors have grown too big */
798 /* ------------------------------------------------------------------ */
799
800 /* (n - k) entries for L and k entries for U */
801 nunits = DUNITS (Int, n - k) + DUNITS (Int, k) +
802 DUNITS (Entry, n - k) + DUNITS (Entry, k) ;
803
804 /* LU can grow by at most 'nunits' entries if the column is dense */
805 PRINTF (("lup %d lusize %g lup+nunits: %g\n", lup, (double) lusize,
806 lup+nunits));
807 xsize = ((double) lup) + nunits ;
808 if (xsize > (double) lusize)
809 {
810 /* check here how much to grow */
811 xsize = (memgrow * ((double) lusize) + 4*n + 1) ;
812 if (INT_OVERFLOW (xsize))
813 {
814 PRINTF (("Matrix is too large (Int overflow)\n")) ;
815 Common->status = KLU_TOO_LARGE ;
816 return (lusize) ;
817 }
818 newlusize = (size_t) (memgrow * lusize + 2*n + 1) ;
819 /* Future work: retry mechanism in case of malloc failure */
820 LU = (Unit *) KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ;
821 Common->nrealloc++ ;
822 *p_LU = LU ;
823 if (Common->status == KLU_OUT_OF_MEMORY)
824 {
825 PRINTF (("Matrix is too large (LU)\n")) ;
826 return (lusize) ;
827 }
828 lusize = newlusize ;
829 PRINTF (("inc LU to %d done\n", lusize)) ;
830 }
831
832 /* ------------------------------------------------------------------ */
833 /* start the kth column of L and U */
834 /* ------------------------------------------------------------------ */
835
836 Lip [k] = lup ;
837
838 /* ------------------------------------------------------------------ */
839 /* compute the nonzero pattern of the kth column of L and U */
840 /* ------------------------------------------------------------------ */
841
842#ifndef NDEBUGKLU2
843 for (i = 0 ; i < n ; i++)
844 {
845 ASSERT (Flag [i] < k) ;
846 /* ASSERT (X [i] == 0) ; */
847 ASSERT (IS_ZERO (X [i])) ;
848 }
849#endif
850
851 top = lsolve_symbolic (n, k, Ap, Ai, Q, Pinv, Stack, Flag,
852 Lpend, Ap_pos, LU, lup, Llen, Lip, k1, PSinv) ;
853
854#ifndef NDEBUGKLU2
855 PRINTF (("--- in U:\n")) ;
856 for (p = top ; p < n ; p++)
857 {
858 PRINTF (("pattern of X for U: %d : %d pivot row: %d\n",
859 p, Stack [p], Pinv [Stack [p]])) ;
860 ASSERT (Flag [Stack [p]] == k) ;
861 }
862 PRINTF (("--- in L:\n")) ;
863 Li = (Int *) (LU + Lip [k]);
864 for (p = 0 ; p < Llen [k] ; p++)
865 {
866 PRINTF (("pattern of X in L: %d : %d pivot row: %d\n",
867 p, Li [p], Pinv [Li [p]])) ;
868 ASSERT (Flag [Li [p]] == k) ;
869 }
870 p = 0 ;
871 for (i = 0 ; i < n ; i++)
872 {
873 ASSERT (Flag [i] <= k) ;
874 if (Flag [i] == k) p++ ;
875 }
876#endif
877
878 /* ------------------------------------------------------------------ */
879 /* get the column of the matrix to factorize and scatter into X */
880 /* ------------------------------------------------------------------ */
881
882 construct_column <Entry> (k, Ap, Ai, Ax, Q, X,
883 k1, PSinv, Rs, scale, Offp, Offi, Offx) ;
884
885 /* ------------------------------------------------------------------ */
886 /* compute the numerical values of the kth column (s = L \ A (:,k)) */
887 /* ------------------------------------------------------------------ */
888
889 lsolve_numeric <Entry> (Pinv, LU, Stack, Lip, top, n, Llen, X) ;
890
891#ifndef NDEBUGKLU2
892 for (p = top ; p < n ; p++)
893 {
894 PRINTF (("X for U %d : ", Stack [p])) ;
895 PRINT_ENTRY (X [Stack [p]]) ;
896 }
897 Li = (Int *) (LU + Lip [k]) ;
898 for (p = 0 ; p < Llen [k] ; p++)
899 {
900 PRINTF (("X for L %d : ", Li [p])) ;
901 PRINT_ENTRY (X [Li [p]]) ;
902 }
903#endif
904
905 /* ------------------------------------------------------------------ */
906 /* partial pivoting with diagonal preference */
907 /* ------------------------------------------------------------------ */
908
909 /* determine what the "diagonal" is */
910 diagrow = P [k] ; /* might already be pivotal */
911 PRINTF (("k %d, diagrow = %d, UNFLIP (diagrow) = %d\n",
912 k, diagrow, UNFLIP (diagrow))) ;
913
914 /* find a pivot and scale the pivot column */
915 if (!lpivot <Entry> (diagrow, &pivrow, &pivot, &abs_pivot, tol, X, LU, Lip,
916 Llen, k, n, Pinv, &firstrow, Common))
917 {
918 /* matrix is structurally or numerically singular */
919 Common->status = KLU_SINGULAR ;
920 if (Common->numerical_rank == AMESOS2_KLU2_EMPTY)
921 {
922 Common->numerical_rank = k+k1 ;
923 Common->singular_col = Q [k+k1] ;
924 }
925 if (Common->halt_if_singular)
926 {
927 /* do not continue the factorization */
928 return (lusize) ;
929 }
930 }
931
932 /* we now have a valid pivot row, even if the column has NaN's or
933 * has no entries on or below the diagonal at all. */
934 PRINTF (("\nk %d : Pivot row %d : ", k, pivrow)) ;
935 PRINT_ENTRY (pivot) ;
936 ASSERT (pivrow >= 0 && pivrow < n) ;
937 ASSERT (Pinv [pivrow] < 0) ;
938
939 /* set the Uip pointer */
940 Uip [k] = Lip [k] + UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ;
941
942 /* move the lup pointer to the position where indices of U
943 * should be stored */
944 lup += UNITS (Int, Llen [k]) + UNITS (Entry, Llen [k]) ;
945
946 Ulen [k] = n - top ;
947
948 /* extract Stack [top..n-1] to Ui and the values to Ux and clear X */
949 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
950 for (p = top, i = 0 ; p < n ; p++, i++)
951 {
952 j = Stack [p] ;
953 Ui [i] = Pinv [j] ;
954 Ux [i] = X [j] ;
955 CLEAR (X [j]) ;
956 }
957
958 /* position the lu index at the starting point for next column */
959 lup += UNITS (Int, Ulen [k]) + UNITS (Entry, Ulen [k]) ;
960
961 /* U(k,k) = pivot */
962 Udiag [k] = pivot ;
963
964 /* ------------------------------------------------------------------ */
965 /* log the pivot permutation */
966 /* ------------------------------------------------------------------ */
967
968 ASSERT (UNFLIP (Pinv [diagrow]) < n) ;
969 ASSERT (P [UNFLIP (Pinv [diagrow])] == diagrow) ;
970
971 if (pivrow != diagrow)
972 {
973 /* an off-diagonal pivot has been chosen */
974 Common->noffdiag++ ;
975 PRINTF ((">>>>>>>>>>>>>>>>> pivrow %d k %d off-diagonal\n",
976 pivrow, k)) ;
977 if (Pinv [diagrow] < 0)
978 {
979 /* the former diagonal row index, diagrow, has not yet been
980 * chosen as a pivot row. Log this diagrow as the "diagonal"
981 * entry in the column kbar for which the chosen pivot row,
982 * pivrow, was originally logged as the "diagonal" */
983 kbar = FLIP (Pinv [pivrow]) ;
984 P [kbar] = diagrow ;
985 Pinv [diagrow] = FLIP (kbar) ;
986 }
987 }
988 P [k] = pivrow ;
989 Pinv [pivrow] = k ;
990
991#ifndef NDEBUGKLU2
992 for (i = 0 ; i < n ; i++) { ASSERT (IS_ZERO (X [i])) ;}
993 GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
994 for (p = 0 ; p < len ; p++)
995 {
996 PRINTF (("Column %d of U: %d : ", k, Ui [p])) ;
997 PRINT_ENTRY (Ux [p]) ;
998 }
999 GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
1000 for (p = 0 ; p < len ; p++)
1001 {
1002 PRINTF (("Column %d of L: %d : ", k, Li [p])) ;
1003 PRINT_ENTRY (Lx [p]) ;
1004 }
1005#endif
1006
1007 /* ------------------------------------------------------------------ */
1008 /* symmetric pruning */
1009 /* ------------------------------------------------------------------ */
1010
1011 prune<Entry> (Lpend, Pinv, k, pivrow, LU, Uip, Lip, Ulen, Llen) ;
1012
1013 *lnz += Llen [k] + 1 ; /* 1 added to lnz for diagonal */
1014 *unz += Ulen [k] + 1 ; /* 1 added to unz for diagonal */
1015 }
1016
1017 /* ---------------------------------------------------------------------- */
1018 /* finalize column pointers for L and U, and put L in the pivotal order */
1019 /* ---------------------------------------------------------------------- */
1020
1021 for (p = 0 ; p < n ; p++)
1022 {
1023 Li = (Int *) (LU + Lip [p]) ;
1024 for (i = 0 ; i < Llen [p] ; i++)
1025 {
1026 Li [i] = Pinv [Li [i]] ;
1027 }
1028 }
1029
1030#ifndef NDEBUGKLU2
1031 for (i = 0 ; i < n ; i++)
1032 {
1033 PRINTF (("P [%d] = %d Pinv [%d] = %d\n", i, P [i], i, Pinv [i])) ;
1034 }
1035 for (i = 0 ; i < n ; i++)
1036 {
1037 ASSERT (Pinv [i] >= 0 && Pinv [i] < n) ;
1038 ASSERT (P [i] >= 0 && P [i] < n) ;
1039 ASSERT (P [Pinv [i]] == i) ;
1040 ASSERT (IS_ZERO (X [i])) ;
1041 }
1042#endif
1043
1044 /* ---------------------------------------------------------------------- */
1045 /* shrink the LU factors to just the required size */
1046 /* ---------------------------------------------------------------------- */
1047
1048 newlusize = lup ;
1049 ASSERT ((size_t) newlusize <= lusize) ;
1050
1051 /* this cannot fail, since the block is descreasing in size */
1052 LU = (Unit *) KLU_realloc (newlusize, lusize, sizeof (Unit), LU, Common) ;
1053 *p_LU = LU ;
1054 return (newlusize) ;
1055}
1056
1057#endif
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.