146 using Details::getParamTryingTypes;
147 const char prefix[] =
"Ifpack2::ILUT: ";
156 double fillLevel = LevelOfFill_;
158 const std::string paramName (
"fact: ilut level-of-fill");
159 getParamTryingTypes<double, double, float>
160 (fillLevel, params, paramName, prefix);
161 TEUCHOS_TEST_FOR_EXCEPTION
162 (fillLevel < 1.0, std::runtime_error,
163 "Ifpack2::ILUT: The \"fact: ilut level-of-fill\" parameter must be >= "
164 "1.0, but you set it to " << fillLevel <<
". For ILUT, the fill level "
165 "means something different than it does for ILU(k). ILU(0) produces "
166 "factors with the same sparsity structure as the input matrix A. For "
167 "ILUT, level-of-fill = 1.0 will produce factors with nonzeros matching "
168 "the sparsity structure of A. level-of-fill > 1.0 allows for additional "
174 const std::string paramName (
"fact: absolute threshold");
175 getParamTryingTypes<magnitude_type, magnitude_type, double>
176 (absThresh, params, paramName, prefix);
181 const std::string paramName (
"fact: relative threshold");
182 getParamTryingTypes<magnitude_type, magnitude_type, double>
183 (relThresh, params, paramName, prefix);
188 const std::string paramName (
"fact: relax value");
189 getParamTryingTypes<magnitude_type, magnitude_type, double>
190 (relaxValue, params, paramName, prefix);
195 const std::string paramName (
"fact: drop tolerance");
196 getParamTryingTypes<magnitude_type, magnitude_type, double>
197 (dropTol, params, paramName, prefix);
201 L_solver_->setParameters(params);
202 U_solver_->setParameters(params);
204 LevelOfFill_ = fillLevel;
205 Athresh_ = absThresh;
206 Rthresh_ = relThresh;
207 RelaxValue_ = relaxValue;
208 DropTolerance_ = dropTol;
400 using Teuchos::Array;
401 using Teuchos::ArrayRCP;
402 using Teuchos::ArrayView;
405 using Teuchos::reduceAll;
431 if (! isInitialized ()) {
435 Teuchos::Time timer (
"ILUT::compute");
436 double startTime = timer.wallTime();
438 Teuchos::TimeMonitor timeMon (timer,
true);
447#ifdef IFPACK2_WRITE_FACTORS
448 std::ofstream ofsL(
"L.tif.mtx", std::ios::out);
449 std::ofstream ofsU(
"U.tif.mtx", std::ios::out);
454 double local_nnz =
static_cast<double> (A_local_->getLocalNumEntries ());
455 double fill = ((getLevelOfFill () - 1.0) * local_nnz) / (2 * myNumRows);
460 double fill_ceil=std::ceil(fill);
464 size_type fillL =
static_cast<size_type
>(fill_ceil);
465 size_type fillU =
static_cast<size_type
>(fill_ceil);
467 Array<scalar_type> InvDiagU (myNumRows, zero);
469 Array<Array<local_ordinal_type> > L_tmp_idx(myNumRows);
470 Array<Array<scalar_type> > L_tmpv(myNumRows);
471 Array<Array<local_ordinal_type> > U_tmp_idx(myNumRows);
472 Array<Array<scalar_type> > U_tmpv(myNumRows);
474 enum { UNUSED, ORIG, FILL };
477 Array<int> pattern(max_col, UNUSED);
478 Array<scalar_type> cur_row(max_col, zero);
479 Array<magnitude_type> unorm(max_col);
481 Array<local_ordinal_type> L_cols_heap;
482 Array<local_ordinal_type> U_cols;
483 Array<local_ordinal_type> L_vals_heap;
484 Array<local_ordinal_type> U_vals_heap;
489 greater_indirect<scalar_type,local_ordinal_type> vals_comp(cur_row);
494 nonconst_local_inds_host_view_type ColIndicesARCP;
495 nonconst_values_host_view_type ColValuesARCP;
496 if (! A_local_->supportsRowViews ()) {
497 const size_t maxnz = A_local_->getLocalMaxNumRowEntries ();
498 Kokkos::resize(ColIndicesARCP,maxnz);
499 Kokkos::resize(ColValuesARCP,maxnz);
503 local_inds_host_view_type ColIndicesA;
504 values_host_view_type ColValuesA;
507 if (A_local_->supportsRowViews ()) {
508 A_local_->getLocalRowView (row_i, ColIndicesA, ColValuesA);
509 RowNnz = ColIndicesA.size ();
512 A_local_->getLocalRowCopy (row_i, ColIndicesARCP, ColValuesARCP, RowNnz);
513 ColIndicesA = Kokkos::subview(ColIndicesARCP,std::make_pair((
size_t)0, RowNnz));
514 ColValuesA = Kokkos::subview(ColValuesARCP,std::make_pair((
size_t)0, RowNnz));
519 U_cols.push_back(row_i);
520 cur_row[row_i] = zero;
521 pattern[row_i] = ORIG;
523 size_type L_cols_heaplen = 0;
524 rownorm = STM::zero ();
525 for (
size_t i = 0; i < RowNnz; ++i) {
526 if (ColIndicesA[i] < myNumRows) {
527 if (ColIndicesA[i] < row_i) {
528 add_to_heap(ColIndicesA[i], L_cols_heap, L_cols_heaplen);
530 else if (ColIndicesA[i] > row_i) {
531 U_cols.push_back(ColIndicesA[i]);
534 cur_row[ColIndicesA[i]] = ColValuesA[i];
535 pattern[ColIndicesA[i]] = ORIG;
536 rownorm += scalar_mag(ColValuesA[i]);
545 cur_row[row_i] = as<scalar_type> (getAbsoluteThreshold() * IFPACK2_SGN(v)) + rthresh*v;
547 size_type orig_U_len = U_cols.size();
548 RowNnz = L_cols_heap.size() + orig_U_len;
549 rownorm = getDropTolerance() * rownorm/RowNnz;
552 size_type L_vals_heaplen = 0;
553 while (L_cols_heaplen > 0) {
556 scalar_type multiplier = cur_row[row_k] * InvDiagU[row_k];
557 cur_row[row_k] = multiplier;
559 if (mag_mult*unorm[row_k] < rownorm) {
560 pattern[row_k] = UNUSED;
564 if (pattern[row_k] != ORIG) {
565 if (L_vals_heaplen < fillL) {
566 add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
568 else if (L_vals_heaplen==0 ||
569 mag_mult < scalar_mag(cur_row[L_vals_heap.front()])) {
570 pattern[row_k] = UNUSED;
575 pattern[L_vals_heap.front()] = UNUSED;
577 add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
583 ArrayView<local_ordinal_type> ColIndicesU = U_tmp_idx[row_k]();
584 ArrayView<scalar_type> ColValuesU = U_tmpv[row_k]();
585 size_type ColNnzU = ColIndicesU.size();
587 for(size_type j=0; j<ColNnzU; ++j) {
588 if (ColIndicesU[j] > row_k) {
591 if (pattern[col_j] != UNUSED) {
592 cur_row[col_j] -= tmp;
594 else if (scalar_mag(tmp) > rownorm) {
595 cur_row[col_j] = -tmp;
596 pattern[col_j] = FILL;
598 U_cols.push_back(col_j);
614 for (size_type i = 0; i < (size_type)ColIndicesA.size (); ++i) {
615 if (ColIndicesA[i] < row_i) {
616 L_tmp_idx[row_i].push_back(ColIndicesA[i]);
617 L_tmpv[row_i].push_back(cur_row[ColIndicesA[i]]);
618 pattern[ColIndicesA[i]] = UNUSED;
623 for (size_type j = 0; j < L_vals_heaplen; ++j) {
624 L_tmp_idx[row_i].push_back(L_vals_heap[j]);
625 L_tmpv[row_i].push_back(cur_row[L_vals_heap[j]]);
626 pattern[L_vals_heap[j]] = UNUSED;
634#ifdef IFPACK2_WRITE_FACTORS
635 for (size_type ii = 0; ii < L_tmp_idx[row_i].size (); ++ii) {
636 ofsL << row_i <<
" " << L_tmp_idx[row_i][ii] <<
" "
637 << L_tmpv[row_i][ii] << std::endl;
643 if (cur_row[row_i] == zero) {
644 std::cerr <<
"Ifpack2::ILUT::Compute: zero pivot encountered! "
645 <<
"Replacing with rownorm and continuing..."
646 <<
"(You may need to set the parameter "
647 <<
"'fact: absolute threshold'.)" << std::endl;
648 cur_row[row_i] = rownorm;
650 InvDiagU[row_i] = one / cur_row[row_i];
653 U_tmp_idx[row_i].push_back(row_i);
654 U_tmpv[row_i].push_back(cur_row[row_i]);
655 unorm[row_i] = scalar_mag(cur_row[row_i]);
656 pattern[row_i] = UNUSED;
662 size_type U_vals_heaplen = 0;
663 for(size_type j=1; j<U_cols.size(); ++j) {
665 if (pattern[col] != ORIG) {
666 if (U_vals_heaplen < fillU) {
667 add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
669 else if (U_vals_heaplen!=0 && scalar_mag(cur_row[col]) >
670 scalar_mag(cur_row[U_vals_heap.front()])) {
672 add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
676 U_tmp_idx[row_i].push_back(col);
677 U_tmpv[row_i].push_back(cur_row[col]);
678 unorm[row_i] += scalar_mag(cur_row[col]);
680 pattern[col] = UNUSED;
683 for(size_type j=0; j<U_vals_heaplen; ++j) {
684 U_tmp_idx[row_i].push_back(U_vals_heap[j]);
685 U_tmpv[row_i].push_back(cur_row[U_vals_heap[j]]);
686 unorm[row_i] += scalar_mag(cur_row[U_vals_heap[j]]);
689 unorm[row_i] /= (orig_U_len + U_vals_heaplen);
691#ifdef IFPACK2_WRITE_FACTORS
692 for(
int ii=0; ii<U_tmp_idx[row_i].size(); ++ii) {
693 ofsU <<row_i<<
" " <<U_tmp_idx[row_i][ii]<<
" "
694 <<U_tmpv[row_i][ii]<< std::endl;
705 Array<size_t> nnzPerRow(myNumRows);
711 L_solver_->setMatrix(Teuchos::null);
712 U_solver_->setMatrix(Teuchos::null);
715 nnzPerRow[row_i] = L_tmp_idx[row_i].size();
718 L_ = rcp (
new crs_matrix_type (A_local_->getRowMap(), A_local_->getColMap(),
722 L_->insertLocalValues (row_i, L_tmp_idx[row_i](), L_tmpv[row_i]());
728 nnzPerRow[row_i] = U_tmp_idx[row_i].size();
731 U_ = rcp (
new crs_matrix_type (A_local_->getRowMap(), A_local_->getColMap(),
735 U_->insertLocalValues (row_i, U_tmp_idx[row_i](), U_tmpv[row_i]());
740 L_solver_->setMatrix(L_);
741 L_solver_->initialize ();
742 L_solver_->compute ();
744 U_solver_->setMatrix(U_);
745 U_solver_->initialize ();
746 U_solver_->compute ();
748 ComputeTime_ += (timer.wallTime() - startTime);
756apply (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
757 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
758 Teuchos::ETransp mode,
764 using Teuchos::rcpFromRef;
766 TEUCHOS_TEST_FOR_EXCEPTION(
767 ! isComputed (), std::runtime_error,
768 "Ifpack2::ILUT::apply: You must call compute() to compute the incomplete "
769 "factorization, before calling apply().");
771 TEUCHOS_TEST_FOR_EXCEPTION(
772 X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
773 "Ifpack2::ILUT::apply: X and Y must have the same number of columns. "
774 "X has " << X.getNumVectors () <<
" columns, but Y has "
775 << Y.getNumVectors () <<
" columns.");
780 Teuchos::Time timer (
"ILUT::apply");
781 double startTime = timer.wallTime();
783 Teuchos::TimeMonitor timeMon (timer,
true);
785 if (alpha == one && beta == zero) {
786 if (mode == Teuchos::NO_TRANS) {
788 L_solver_->apply (X, Y, mode);
791 U_solver_->apply (Y, Y, mode);
796 U_solver_->apply (X, Y, mode);
799 L_solver_->apply (Y, Y, mode);
813 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
814 apply (X, Y_tmp, mode);
815 Y.update (alpha, Y_tmp, beta);
821 ApplyTime_ += (timer.wallTime() - startTime);
859describe (Teuchos::FancyOStream& out,
860 const Teuchos::EVerbosityLevel verbLevel)
const
863 using Teuchos::OSTab;
865 using Teuchos::TypeNameTraits;
867 using Teuchos::VERB_DEFAULT;
868 using Teuchos::VERB_NONE;
869 using Teuchos::VERB_LOW;
870 using Teuchos::VERB_MEDIUM;
871 using Teuchos::VERB_HIGH;
872 using Teuchos::VERB_EXTREME;
874 const Teuchos::EVerbosityLevel vl =
875 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
878 if (vl > VERB_NONE) {
879 out <<
"\"Ifpack2::ILUT\":" << endl;
881 out <<
"MatrixType: " << TypeNameTraits<MatrixType>::name () << endl;
882 if (this->getObjectLabel () !=
"") {
883 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl;
885 out <<
"Initialized: " << (isInitialized () ?
"true" :
"false")
887 <<
"Computed: " << (isComputed () ?
"true" :
"false")
889 <<
"Level of fill: " << getLevelOfFill () << endl
890 <<
"Absolute threshold: " << getAbsoluteThreshold () << endl
891 <<
"Relative threshold: " << getRelativeThreshold () << endl
892 <<
"Relax value: " << getRelaxValue () << endl;
894 if (isComputed () && vl >= VERB_HIGH) {
895 const double fillFraction =
896 (double) getGlobalNumEntries () / (double) A_->getGlobalNumEntries ();
897 const double nnzToRows =
898 (double) getGlobalNumEntries () / (double) U_->getGlobalNumRows ();
900 out <<
"Dimensions of L: [" << L_->getGlobalNumRows () <<
", "
901 << L_->getGlobalNumRows () <<
"]" << endl
902 <<
"Dimensions of U: [" << U_->getGlobalNumRows () <<
", "
903 << U_->getGlobalNumRows () <<
"]" << endl
904 <<
"Number of nonzeros in factors: " << getGlobalNumEntries () << endl
905 <<
"Fill fraction of factors over A: " << fillFraction << endl
906 <<
"Ratio of nonzeros to rows: " << nnzToRows << endl;
909 out <<
"Number of initialize calls: " << getNumInitialize () << endl
910 <<
"Number of compute calls: " << getNumCompute () << endl
911 <<
"Number of apply calls: " << getNumApply () << endl
912 <<
"Total time in seconds for initialize: " << getInitializeTime () << endl
913 <<
"Total time in seconds for compute: " << getComputeTime () << endl
914 <<
"Total time in seconds for apply: " << getApplyTime () << endl;
916 out <<
"Local matrix:" << endl;
917 A_local_->describe (out, vl);