53#ifndef AMESOS2_CHOLMOD_DEF_HPP
54#define AMESOS2_CHOLMOD_DEF_HPP
56#include <Teuchos_Tuple.hpp>
57#include <Teuchos_ParameterList.hpp>
58#include <Teuchos_StandardParameterEntryValidators.hpp>
63#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
64#include "KokkosSparse_sptrsv_cholmod.hpp"
70template <
class Matrix,
class Vector>
72 Teuchos::RCP<const Matrix> A,
73 Teuchos::RCP<Vector> X,
74 Teuchos::RCP<const Vector> B )
79 , is_contiguous_(true)
80 , use_triangular_solves_(false)
81 , use_cholmod_int_type_(false)
87 data_.c.supernodal = CHOLMOD_AUTO;
88 data_.c.quick_return_if_not_posdef = 1;
92template <
class Matrix,
class Vector>
95 if(use_cholmod_int_type_) {
96 cholmod_free_factor(&(data_.L), &(data_.c));
97 cholmod_free_dense(&(data_.Y), &data_.c);
98 cholmod_free_dense(&(data_.E), &data_.c);
99 cholmod_finish(&(data_.c));
104 cholmod_l_free_factor(&(data_.L), &(data_.c));
105 cholmod_l_free_dense(&(data_.Y), &data_.c);
106 cholmod_l_free_dense(&(data_.E), &data_.c);
107 cholmod_l_finish(&(data_.c));
111template<
class Matrix,
class Vector>
115#ifdef HAVE_AMESOS2_TIMERS
116 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
121 if(use_cholmod_int_type_) {
122 data_.L = cholmod_analyze(&data_.A, &(data_.c));
125 data_.L = cholmod_l_analyze(&data_.A, &(data_.c));
128 info = data_.c.status;
132 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
134 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
136 "Amesos2 cholmod_l_analyze failure in Cholmod preOrdering_impl");
142template <
class Matrix,
class Vector>
149#ifdef HAVE_AMESOS2_TIMERS
150 Teuchos::TimeMonitor symFactTimer(this->timers_.symFactTime_);
153 if(use_cholmod_int_type_) {
154 cholmod_resymbol (&data_.A, NULL, 0,
true, data_.L, &(data_.c));
157 cholmod_l_resymbol (&data_.A, NULL, 0,
true, data_.L, &(data_.c));
160 info = data_.c.status;
167 skip_symfact =
false;
171 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
173 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
175 "Amesos2 cholmod_l_resymbol failure in Cholmod symbolicFactorization_impl");
177 if(use_triangular_solves_) {
178 triangular_solve_symbolic();
185template <
class Matrix,
class Vector>
191#ifdef HAVE_AMESOS2_DEBUG
192 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.ncol != Teuchos::as<size_t>(this->globalNumCols_),
194 "Error in converting to cholmod_sparse: wrong number of global columns." );
195 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.nrow != Teuchos::as<size_t>(this->globalNumRows_),
197 "Error in converting to cholmod_sparse: wrong number of global rows." );
200#ifdef HAVE_AMESOS2_TIMERS
201 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
204#ifdef HAVE_AMESOS2_VERBOSE_DEBUG
208 if(use_cholmod_int_type_) {
209 cholmod_factorize(&data_.A, data_.L, &(data_.c));
212 cholmod_l_factorize(&data_.A, data_.L, &(data_.c));
215 info = data_.c.status;
218 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
220 TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_OUT_OF_MEMORY,
222 "Amesos2 cholmod_l_factorize error code: CHOLMOD_OUT_OF_MEMORY");
224 TEUCHOS_TEST_FOR_EXCEPTION(info == CHOLMOD_NOT_POSDEF,
226 "Amesos2 cholmod_l_factorize error code: CHOLMOD_NOT_POSDEF.");
228 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
230 "Amesos2 cholmod_l_factorize error code:" << info);
232 if(use_triangular_solves_) {
233 triangular_solve_numeric();
240template <
class Matrix,
class Vector>
243 const Teuchos::Ptr<
const MultiVecAdapter<Vector> > B)
const
245 const global_size_type ld_rhs = X->getGlobalLength();
246 const size_t nrhs = X->getGlobalNumVectors();
250#ifdef HAVE_AMESOS2_TIMERS
251 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
252 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
257 const bool initialize_data =
true;
258 const bool do_not_initialize_data =
false;
259 if(use_triangular_solves_) {
260#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
261 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
262 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
263 Teuchos::as<size_t>(ld_rhs),
265 this->rowIndexBase_);
266 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
267 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
268 Teuchos::as<size_t>(ld_rhs),
270 this->rowIndexBase_);
274 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
275 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
276 Teuchos::as<size_t>(ld_rhs),
278 this->rowIndexBase_);
279 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
280 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
281 Teuchos::as<size_t>(ld_rhs),
283 this->rowIndexBase_);
289#ifdef HAVE_AMESOS2_TIMERS
290 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
293 if(use_triangular_solves_) {
297 function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
298 Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_bValues_.data(), &data_.b);
299 function_map::cholmod_init_dense(Teuchos::as<long>(this->globalNumRows_),
300 Teuchos::as<int>(nrhs), Teuchos::as<int>(ld_rhs), host_xValues_.data(), &data_.x);
302 cholmod_dense *xtemp = &(data_.x);
303 if(use_cholmod_int_type_) {
304 cholmod_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
305 &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
308 cholmod_l_solve2(CHOLMOD_A, data_.L, &data_.b, NULL,
309 &(xtemp), NULL, &data_.Y, &data_.E, &data_.c);
313 ierr = data_.c.status;
316 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
318 TEUCHOS_TEST_FOR_EXCEPTION(ierr == -2, std::runtime_error,
"Ran out of memory" );
325#ifdef HAVE_AMESOS2_TIMERS
326 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
329 if(use_triangular_solves_) {
330#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
331 Util::put_1d_data_helper_kokkos_view<
332 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, device_xValues_,
333 Teuchos::as<size_t>(ld_rhs),
335 this->rowIndexBase_);
339 Util::put_1d_data_helper_kokkos_view<
340 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, host_xValues_,
341 Teuchos::as<size_t>(ld_rhs),
343 this->rowIndexBase_);
351template <
class Matrix,
class Vector>
355 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
359template <
class Matrix,
class Vector>
367 use_cholmod_int_type_ = parameterList->get<
bool>(
"CholmodInt",
false);
369 if(use_cholmod_int_type_) {
370 data_.c.itype = CHOLMOD_INT;
371 cholmod_start(&data_.c);
374 data_.c.itype = CHOLMOD_LONG;
375 cholmod_l_start(&data_.c);
379 using Teuchos::getIntegralValue;
380 using Teuchos::ParameterEntryValidator;
382 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
384 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous",
true);
385 use_triangular_solves_ = parameterList->get<
bool>(
"Enable_KokkosKernels_TriangularSolves",
false);
387 if(use_triangular_solves_) {
388#if not defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) || not defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
389 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
390 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_CHOLMOD not configured." );
394 data_.c.dbound = parameterList->get<
double>(
"dbound", 0.0);
395 data_.c.prefer_upper = (parameterList->get<
bool>(
"PreferUpper",
true)) ? 1 : 0;
396 data_.c.print = parameterList->get<
int>(
"print", 3);
397 data_.c.nmethods = parameterList->get<
int>(
"nmethods", 0);
402#ifdef KOKKOS_ENABLE_CUDA
403 const int default_gpu_setting = 1;
407 const int default_gpu_setting = 0;
410 data_.c.useGPU = parameterList->get<
int>(
"useGPU", default_gpu_setting);
412#ifdef KOKKOS_ENABLE_CUDA
413 TEUCHOS_TEST_FOR_EXCEPTION(data_.c.useGPU != 0 && use_cholmod_int_type_, std::runtime_error,
414 "Amesos2 Cholmod solver must not use GPU (parameter useGPU = 0) if CholmodInt is turned on because Cholmod only supports GPU with long." );
417 bool bSuperNodal = parameterList->get<
bool>(
"SuperNodal",
false);
418 data_.c.supernodal = bSuperNodal ? CHOLMOD_SUPERNODAL : CHOLMOD_AUTO;
422template <
class Matrix,
class Vector>
423Teuchos::RCP<const Teuchos::ParameterList>
427 using Teuchos::tuple;
428 using Teuchos::ParameterList;
429 using Teuchos::EnhancedNumberValidator;
430 using Teuchos::setStringToIntegralParameter;
431 using Teuchos::stringToIntegralParameterEntryValidator;
433 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
435 if( is_null(valid_params) ){
436 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
439 Teuchos::RCP<EnhancedNumberValidator<int> > print_validator
440 = Teuchos::rcp(
new EnhancedNumberValidator<int>(0,5));
442 Teuchos::RCP<EnhancedNumberValidator<int> > nmethods_validator
443 = Teuchos::rcp(
new EnhancedNumberValidator<int>(0,9));
445 pl->set(
"nmethods", 0,
"Specifies the number of different ordering methods to try", nmethods_validator);
447 pl->set(
"print", 3,
"Specifies the verbosity of the print statements", print_validator);
449 pl->set(
"dbound", 0.0,
450 "Specifies the smallest absolute value on the diagonal D for the LDL factorization");
452 pl->set(
"PreferUpper",
true,
453 "Specifies whether the matrix will be "
454 "stored in upper triangular form.");
456 pl->set(
"useGPU", -1,
"1: Use GPU is 1, 0: Do not use GPU, -1: ENV CHOLMOD_USE_GPU set GPU usage.");
458 pl->set(
"Enable_KokkosKernels_TriangularSolves",
false,
"Whether to use triangular solves.");
460 pl->set(
"CholmodInt",
false,
"Whether to use cholmod in int form");
462 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
464 pl->set(
"SuperNodal",
false,
"Whether to use super nodal");
473template <
class Matrix,
class Vector>
477#ifdef HAVE_AMESOS2_TIMERS
478 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
482 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
483 if(use_cholmod_int_type_) {
484 Kokkos::resize(host_rows_int_view_, this->globalNumNonZeros_);
485 Kokkos::resize(host_col_ptr_int_view_, this->globalNumRows_ + 1);
488 Kokkos::resize(host_rows_long_view_, this->globalNumNonZeros_);
489 Kokkos::resize(host_col_ptr_long_view_, this->globalNumRows_ + 1);
493#ifdef HAVE_AMESOS2_TIMERS
494 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
497 TEUCHOS_TEST_FOR_EXCEPTION(this->rowIndexBase_ != this->columnIndexBase_,
499 "Row and column maps have different indexbase ");
501 if(use_cholmod_int_type_) {
503 if ( is_contiguous_ ==
true ) {
505 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_int_type_array,
506 host_size_int_type_array>::do_get(this->matrixA_.ptr(),
507 host_nzvals_view_, host_rows_int_view_,
508 host_col_ptr_int_view_, nnz_ret,
ROOTED,
510 this->rowIndexBase_);
514 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_int_type_array,
515 host_size_int_type_array>::do_get(this->matrixA_.ptr(),
516 host_nzvals_view_, host_rows_int_view_,
519 this->rowIndexBase_);
521 TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
523 "Did not get the expected number of non-zero vals");
527 if ( is_contiguous_ ==
true ) {
529 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_long_type_array,
530 host_size_long_type_array>::do_get(this->matrixA_.ptr(),
531 host_nzvals_view_, host_rows_long_view_,
532 host_col_ptr_long_view_, nnz_ret,
ROOTED,
534 this->rowIndexBase_);
538 MatrixAdapter<Matrix>,host_value_type_array,host_ordinal_long_type_array,
539 host_size_long_type_array>::do_get(this->matrixA_.ptr(),
540 host_nzvals_view_, host_rows_long_view_,
543 this->rowIndexBase_);
545 TEUCHOS_TEST_FOR_EXCEPTION(nnz_ret != Teuchos::as<long>(this->globalNumNonZeros_),
547 "Did not get the expected number of non-zero vals");
551 if(use_cholmod_int_type_) {
552 function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
553 Teuchos::as<size_t>(this->globalNumCols_),
554 Teuchos::as<size_t>(this->globalNumNonZeros_),
556 host_col_ptr_int_view_.data(),
557 host_nzvals_view_.data(),
558 host_rows_int_view_.data(),
563 function_map::cholmod_init_sparse(Teuchos::as<size_t>(this->globalNumRows_),
564 Teuchos::as<size_t>(this->globalNumCols_),
565 Teuchos::as<size_t>(this->globalNumNonZeros_),
567 host_col_ptr_long_view_.data(),
568 host_nzvals_view_.data(),
569 host_rows_long_view_.data(),
574 TEUCHOS_TEST_FOR_EXCEPTION(data_.A.stype == 0, std::runtime_error,
575 "CHOLMOD loadA_impl loaded matrix but it is not symmetric.");
581template <
class Matrix,
class Vector>
585#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
587 if(use_cholmod_int_type_) {
588 device_int_khL_.create_sptrsv_handle(
589 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n,
true);
590 device_int_khU_.create_sptrsv_handle(
591 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n,
false);
594 device_long_khL_.create_sptrsv_handle(
595 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n,
true);
596 device_long_khU_.create_sptrsv_handle(
597 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_ETREE, data_.L->n,
false);
601 Kokkos::resize(host_trsv_etree_, data_.L->nsuper);
602 if(use_cholmod_int_type_) {
603 int *int_etree =
static_cast<int*
>(data_.c.Iwork) + 2 * data_.L->n;
604 for (
size_t i = 0 ; i < data_.L->nsuper; ++i) {
605 host_trsv_etree_(i) = int_etree[i];
609 long *long_etree =
static_cast<long*
>(data_.c.Iwork) + 2 * data_.L->n;
610 for (
size_t i = 0 ; i < data_.L->nsuper; ++i) {
611 host_trsv_etree_(i) =
static_cast<int>(long_etree[i]);
616 if(use_cholmod_int_type_) {
617 device_int_khL_.set_sptrsv_etree(host_trsv_etree_.data());
618 device_int_khU_.set_sptrsv_etree(host_trsv_etree_.data());
621 device_long_khL_.set_sptrsv_etree(host_trsv_etree_.data());
622 device_long_khU_.set_sptrsv_etree(host_trsv_etree_.data());
625 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
626 Kokkos::resize(host_trsv_perm_, ld_rhs);
627 if(use_cholmod_int_type_) {
628 int *int_iperm =
static_cast<int*
>(data_.L->Perm);
629 for (
size_t i = 0; i < ld_rhs; i++) {
630 host_trsv_perm_(int_iperm[i]) = i;
634 long *long_iperm =
static_cast<long*
>(data_.L->Perm);
635 for (
size_t i = 0; i < ld_rhs; i++) {
636 host_trsv_perm_(long_iperm[i]) = i;
639 deep_copy_or_assign_view(device_trsv_perm_, host_trsv_perm_);
642 if(use_cholmod_int_type_) {
643 device_int_khL_.set_sptrsv_perm(host_trsv_perm_.data());
644 device_int_khU_.set_sptrsv_perm(host_trsv_perm_.data());
647 device_long_khL_.set_sptrsv_perm(host_trsv_perm_.data());
648 device_long_khU_.set_sptrsv_perm(host_trsv_perm_.data());
652 if(use_cholmod_int_type_) {
653 KokkosSparse::Experimental::sptrsv_symbolic<int, kernel_handle_int_type>
654 (&device_int_khL_, &device_int_khU_, data_.L, &data_.c);
657 KokkosSparse::Experimental::sptrsv_symbolic<long, kernel_handle_long_type>
658 (&device_long_khL_, &device_long_khU_, data_.L, &data_.c);
664template <
class Matrix,
class Vector>
666Cholmod<Matrix,Vector>::triangular_solve_numeric()
668#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
670 if(use_cholmod_int_type_) {
671 KokkosSparse::Experimental::sptrsv_compute<int, kernel_handle_int_type>
672 (&device_int_khL_, &device_int_khU_, data_.L, &data_.c);
675 KokkosSparse::Experimental::sptrsv_compute<long, kernel_handle_long_type>
676 (&device_long_khL_, &device_long_khU_, data_.L, &data_.c);
682template <
class Matrix,
class Vector>
684Cholmod<Matrix,Vector>::triangular_solve()
const
686#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_CHOLMOD)
687 size_t ld_rhs = device_xValues_.extent(0);
688 size_t nrhs = device_xValues_.extent(1);
690 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
691 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
694 auto local_device_bValues = device_bValues_;
695 auto local_device_trsv_perm = device_trsv_perm_;
696 auto local_device_trsv_rhs = device_trsv_rhs_;
697 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
698 KOKKOS_LAMBDA(
size_t j) {
699 for(
size_t k = 0; k < nrhs; ++k) {
700 local_device_trsv_rhs(local_device_trsv_perm(j),k) = local_device_bValues(j,k);
704 for(
size_t k = 0; k < nrhs; ++k) {
705 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
706 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
709 if(use_cholmod_int_type_) {
710 KokkosSparse::Experimental::sptrsv_solve(&device_int_khL_, sub_sol, sub_rhs);
713 KokkosSparse::Experimental::sptrsv_solve(&device_long_khL_, sub_sol, sub_rhs);
717 if(use_cholmod_int_type_) {
718 KokkosSparse::Experimental::sptrsv_solve(&device_int_khU_, sub_rhs, sub_sol);
721 KokkosSparse::Experimental::sptrsv_solve(&device_long_khU_, sub_rhs, sub_sol);
727 auto local_device_xValues = device_xValues_;
728 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
729 KOKKOS_LAMBDA(
size_t j) {
730 for(
size_t k = 0; k < nrhs; ++k) {
731 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm(j),k);
738template<
class Matrix,
class Vector>
739const char* Cholmod<Matrix,Vector>::name =
"Cholmod";
Amesos2 CHOLMOD declarations.
@ ROOTED
Definition Amesos2_TypeDecl.hpp:127
@ CONTIGUOUS_AND_ROOTED
Definition Amesos2_TypeDecl.hpp:128
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:143
Amesos2 interface to the CHOLMOD package.
Definition Amesos2_Cholmod_decl.hpp:77
~Cholmod()
Destructor.
Definition Amesos2_Cholmod_def.hpp:93
Cholmod(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_Cholmod_def.hpp:71
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:106
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:65
A generic helper class for getting a CCS representation of a Matrix.
Definition Amesos2_Util.hpp:652