Anasazi Version of the Day
Loading...
Searching...
No Matches
AnasaziLOBPCG.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Anasazi: Block Eigensolvers Package
5// Copyright 2004 Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40// @HEADER
41
42
47/*
48 LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities
49 X,H,P,R
50 KX,KH,KP (product of K and the above)
51 MX,MH,MP (product of M and the above, not allocated if we don't have an M matrix)
52 If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to
53 compute the local update of X and P.
54
55 A solver is bound to an eigenproblem at declaration.
56 Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically.
57
58 The orthogonalization manager is used to project away from the auxiliary vectors.
59 If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis.
60 The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product.
61 LOBPCG will not work correctly if the orthomanager uses a different inner product.
62 */
63
64
65#ifndef ANASAZI_LOBPCG_HPP
66#define ANASAZI_LOBPCG_HPP
67
68#include "AnasaziTypes.hpp"
69
73#include "Teuchos_ScalarTraits.hpp"
74
77
78#include "Teuchos_LAPACK.hpp"
79#include "Teuchos_BLAS.hpp"
80#include "Teuchos_SerialDenseMatrix.hpp"
81#include "Teuchos_ParameterList.hpp"
82#include "Teuchos_TimeMonitor.hpp"
83
104namespace Anasazi {
105
107
108
113 template <class ScalarType, class MultiVector>
114 struct LOBPCGState {
116 Teuchos::RCP<const MultiVector> V;
118 Teuchos::RCP<const MultiVector> KV;
120 Teuchos::RCP<const MultiVector> MV;
121
123 Teuchos::RCP<const MultiVector> X;
125 Teuchos::RCP<const MultiVector> KX;
127 Teuchos::RCP<const MultiVector> MX;
128
130 Teuchos::RCP<const MultiVector> P;
132 Teuchos::RCP<const MultiVector> KP;
134 Teuchos::RCP<const MultiVector> MP;
135
140 Teuchos::RCP<const MultiVector> H;
142 Teuchos::RCP<const MultiVector> KH;
144 Teuchos::RCP<const MultiVector> MH;
145
147 Teuchos::RCP<const MultiVector> R;
148
150 Teuchos::RCP<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T;
151
152 LOBPCGState() :
153 V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null),
154 X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null),
155 P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null),
156 H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null),
157 R(Teuchos::null),T(Teuchos::null) {};
158 };
159
161
163
164
178 class LOBPCGRitzFailure : public AnasaziError {public:
179 LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg)
180 {}};
181
194 class LOBPCGInitFailure : public AnasaziError {public:
195 LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg)
196 {}};
197
214 class LOBPCGOrthoFailure : public AnasaziError {public:
215 LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg)
216 {}};
217
219
220
221 template <class ScalarType, class MV, class OP>
222 class LOBPCG : public Eigensolver<ScalarType,MV,OP> {
223 public:
224
226
227
235 LOBPCG( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem,
236 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
237 const Teuchos::RCP<OutputManager<ScalarType> > &printer,
238 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
239 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
240 Teuchos::ParameterList &params
241 );
242
244 virtual ~LOBPCG() {};
245
247
249
250
277 void iterate();
278
304 void initialize(LOBPCGState<ScalarType,MV>& newstate);
305
309 void initialize();
310
330 bool isInitialized() const;
331
342 LOBPCGState<ScalarType,MV> getState() const;
343
345
347
348
350 int getNumIters() const;
351
353 void resetNumIters();
354
362 Teuchos::RCP<const MV> getRitzVectors();
363
369 std::vector<Value<ScalarType> > getRitzValues();
370
378 std::vector<int> getRitzIndex();
379
380
386 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms();
387
388
394 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms();
395
396
404 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms();
405
406
415 int getCurSubspaceDim() const;
416
420 int getMaxSubspaceDim() const;
421
423
425
426
428 void setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test);
429
431 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > getStatusTest() const;
432
434 const Eigenproblem<ScalarType,MV,OP>& getProblem() const;
435
436
445 void setBlockSize(int blockSize);
446
447
449 int getBlockSize() const;
450
451
463 void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs);
464
466 Teuchos::Array<Teuchos::RCP<const MV> > getAuxVecs() const;
467
469
471
472
478 void setFullOrtho(bool fullOrtho);
479
481 bool getFullOrtho() const;
482
484 bool hasP();
485
487
489
490
492 void currentStatus(std::ostream &os);
493
495
496 private:
497 //
498 //
499 //
500 void setupViews();
501 //
502 // Convenience typedefs
503 //
504 typedef SolverUtils<ScalarType,MV,OP> Utils;
505 typedef MultiVecTraits<ScalarType,MV> MVT;
506 typedef OperatorTraits<ScalarType,MV,OP> OPT;
507 typedef Teuchos::ScalarTraits<ScalarType> SCT;
508 typedef typename SCT::magnitudeType MagnitudeType;
509 const MagnitudeType ONE;
510 const MagnitudeType ZERO;
511 const MagnitudeType NANVAL;
512 //
513 // Internal structs
514 //
515 struct CheckList {
516 bool checkX, checkMX, checkKX;
517 bool checkH, checkMH;
518 bool checkP, checkMP, checkKP;
519 bool checkR, checkQ;
520 CheckList() : checkX(false),checkMX(false),checkKX(false),
521 checkH(false),checkMH(false),
522 checkP(false),checkMP(false),checkKP(false),
523 checkR(false),checkQ(false) {};
524 };
525 //
526 // Internal methods
527 //
528 std::string accuracyCheck(const CheckList &chk, const std::string &where) const;
529 //
530 // Classes inputed through constructor that define the eigenproblem to be solved.
531 //
532 const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > problem_;
533 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > sm_;
534 const Teuchos::RCP<OutputManager<ScalarType> > om_;
535 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > tester_;
536 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > orthman_;
537 //
538 // Information obtained from the eigenproblem
539 //
540 Teuchos::RCP<const OP> Op_;
541 Teuchos::RCP<const OP> MOp_;
542 Teuchos::RCP<const OP> Prec_;
543 bool hasM_;
544 //
545 // Internal timers
546 //
547 Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_,
548 timerSort_,
549 timerLocalProj_, timerDS_,
550 timerLocalUpdate_, timerCompRes_,
551 timerOrtho_, timerInit_;
552 //
553 // Counters
554 //
555 // Number of operator applications
556 int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_;
557
558 //
559 // Algorithmic parameters.
560 //
561 // blockSize_ is the solver block size
562 int blockSize_;
563 //
564 // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should
565 // be activated (see citations at the top of this file)
566 bool fullOrtho_;
567
568 //
569 // Current solver state
570 //
571 // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
572 // is capable of running; _initialize is controlled by the initialize() member method
573 // For the implications of the state of initialized_, please see documentation for initialize()
574 bool initialized_;
575 //
576 // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_)
577 // this tells us how many of the values in theta_ are valid Ritz values
578 int nevLocal_;
579 //
580 // hasP_ tells us whether there is valid data in P (and KP,MP)
581 bool hasP_;
582 //
583 // State Multivecs
584 // V_, KV_ MV_ and R_ are primary pointers to allocated multivectors
585 Teuchos::RCP<MV> V_, KV_, MV_, R_;
586 // the rest are multivector views into V_, KV_ and MV_
587 Teuchos::RCP<MV> X_, KX_, MX_,
588 H_, KH_, MH_,
589 P_, KP_, MP_;
590
591 //
592 // if fullOrtho_ == true, then we must produce the following on every iteration:
593 // [newX newP] = [X H P] [CX;CP]
594 // the structure of [CX;CP] when using full orthogonalization does not allow us to
595 // do this in situ, and R_ does not have enough storage for newX and newP. therefore,
596 // we must allocate additional storage for this.
597 // otherwise, when not using full orthogonalization, the structure
598 // [newX newP] = [X H P] [CX1 0 ]
599 // [CX2 CP2] allows us to work using only R as work space
600 // [CX3 CP3]
601 Teuchos::RCP<MV> tmpmvec_;
602 //
603 // auxiliary vectors
604 Teuchos::Array<Teuchos::RCP<const MV> > auxVecs_;
605 int numAuxVecs_;
606 //
607 // Number of iterations that have been performed.
608 int iter_;
609 //
610 // Current eigenvalues, residual norms
611 std::vector<MagnitudeType> theta_, Rnorms_, R2norms_;
612 //
613 // are the residual norms current with the residual?
614 bool Rnorms_current_, R2norms_current_;
615
616 };
617
618
619
620
622 // Constructor
623 template <class ScalarType, class MV, class OP>
625 const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem,
626 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
627 const Teuchos::RCP<OutputManager<ScalarType> > &printer,
628 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
629 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
630 Teuchos::ParameterList &params
631 ) :
632 ONE(Teuchos::ScalarTraits<MagnitudeType>::one()),
633 ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()),
634 NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()),
635 // problem, tools
636 problem_(problem),
637 sm_(sorter),
638 om_(printer),
639 tester_(tester),
640 orthman_(ortho),
641 // timers, counters
642#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
643 timerOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Op*x")),
644 timerMOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation M*x")),
645 timerPrec_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Prec*x")),
646 timerSort_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Sorting eigenvalues")),
647 timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local projection")),
648 timerDS_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Direct solve")),
649 timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local update")),
650 timerCompRes_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Computing residuals")),
651 timerOrtho_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Orthogonalization")),
652 timerInit_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Initialization")),
653#endif
654 count_ApplyOp_(0),
655 count_ApplyM_(0),
656 count_ApplyPrec_(0),
657 // internal data
658 blockSize_(0),
659 fullOrtho_(params.get("Full Ortho", true)),
660 initialized_(false),
661 nevLocal_(0),
662 hasP_(false),
663 auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ),
664 numAuxVecs_(0),
665 iter_(0),
666 Rnorms_current_(false),
667 R2norms_current_(false)
668 {
669 TEUCHOS_TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument,
670 "Anasazi::LOBPCG::constructor: user passed null problem pointer.");
671 TEUCHOS_TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument,
672 "Anasazi::LOBPCG::constructor: user passed null sort manager pointer.");
673 TEUCHOS_TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument,
674 "Anasazi::LOBPCG::constructor: user passed null output manager pointer.");
675 TEUCHOS_TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument,
676 "Anasazi::LOBPCG::constructor: user passed null status test pointer.");
677 TEUCHOS_TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument,
678 "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer.");
679 TEUCHOS_TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument,
680 "Anasazi::LOBPCG::constructor: problem is not set.");
681 TEUCHOS_TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument,
682 "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem.");
683
684 // get the problem operators
685 Op_ = problem_->getOperator();
686 TEUCHOS_TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument,
687 "Anasazi::LOBPCG::constructor: problem provides no operator.");
688 MOp_ = problem_->getM();
689 Prec_ = problem_->getPrec();
690 hasM_ = (MOp_ != Teuchos::null);
691
692 // set the block size and allocate data
693 int bs = params.get("Block Size", problem_->getNEV());
694 setBlockSize(bs);
695 }
696
697
699 // Set the block size and make necessary adjustments.
700 template <class ScalarType, class MV, class OP>
702 {
703 // time spent here counts towards timerInit_
704#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
705 Teuchos::TimeMonitor lcltimer( *timerInit_ );
706#endif
707
708 // This routine only allocates space; it doesn't not perform any computation
709 // if size is decreased, take the first newBS vectors of all and leave state as is
710 // otherwise, grow/allocate space and set solver to unitialized
711
712 Teuchos::RCP<const MV> tmp;
713 // grab some Multivector to Clone
714 // in practice, getInitVec() should always provide this, but it is possible to use a
715 // Eigenproblem with nothing in getInitVec() by manually initializing with initialize();
716 // in case of that strange scenario, we will try to Clone from R_ because it is smaller
717 // than V_, and we don't want to keep V_ around longer than necessary
718 if (blockSize_ > 0) {
719 tmp = R_;
720 }
721 else {
722 tmp = problem_->getInitVec();
723 TEUCHOS_TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error,
724 "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from.");
725 }
726
727 TEUCHOS_TEST_FOR_EXCEPTION(newBS <= 0 || static_cast<ptrdiff_t>(newBS) > MVT::GetGlobalLength(*tmp),
728 std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive.");
729 if (newBS == blockSize_) {
730 // do nothing
731 return;
732 }
733 else if (newBS < blockSize_ && initialized_) {
734 //
735 // shrink vectors
736
737 // release views so we can modify the bases
738 X_ = Teuchos::null;
739 KX_ = Teuchos::null;
740 MX_ = Teuchos::null;
741 H_ = Teuchos::null;
742 KH_ = Teuchos::null;
743 MH_ = Teuchos::null;
744 P_ = Teuchos::null;
745 KP_ = Teuchos::null;
746 MP_ = Teuchos::null;
747
748 // make new indices vectors
749 std::vector<int> newind(newBS), oldind(newBS);
750 for (int i=0; i<newBS; i++) {
751 newind[i] = i;
752 oldind[i] = i;
753 }
754
755 Teuchos::RCP<MV> newV, newMV, newKV, newR;
756 Teuchos::RCP<const MV> src;
757 // allocate R and newV
758 newR = MVT::Clone(*tmp,newBS);
759 newV = MVT::Clone(*tmp,newBS*3);
760 newKV = MVT::Clone(*tmp,newBS*3);
761 if (hasM_) {
762 newMV = MVT::Clone(*tmp,newBS*3);
763 }
764
765 //
766 // if we are initialized, we want to pull the data from V_ into newV:
767 // bs | bs | bs
768 // newV = [newX | **** |newP ]
769 // newKV = [newKX| **** |newKP]
770 // newMV = [newMX| **** |newMP]
771 // where
772 // oldbs | oldbs | oldbs
773 // V_ = [newX *** | ******* | newP ***]
774 // KV_ = [newKX *** | ******* | newKP ***]
775 // MV_ = [newMX *** | ******* | newMP ***]
776 //
777 // we don't care to copy the data corresponding to H
778 // we will not copy the M data if !hasM_, because it doesn't exist
779 //
780
781 // these are shrink operations which preserve their data
782 theta_.resize(3*newBS);
783 Rnorms_.resize(newBS);
784 R2norms_.resize(newBS);
785
786 // copy residual vectors: oldind,newind currently contains [0,...,newBS-1]
787 src = MVT::CloneView(*R_,newind);
788 MVT::SetBlock(*src,newind,*newR);
789 // free old memory and point to new memory
790 R_ = newR;
791
792 // copy in order: newX newKX newMX, then newP newKP newMP
793 // for X: [0,bs-1] <-- [0,bs-1]
794 src = MVT::CloneView(*V_,oldind);
795 MVT::SetBlock(*src,newind,*newV);
796 src = MVT::CloneView(*KV_,oldind);
797 MVT::SetBlock(*src,newind,*newKV);
798 if (hasM_) {
799 src = MVT::CloneView(*MV_,oldind);
800 MVT::SetBlock(*src,newind,*newMV);
801 }
802 // for P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1]
803 for (int i=0; i<newBS; i++) {
804 newind[i] += 2*newBS;
805 oldind[i] += 2*blockSize_;
806 }
807 src = MVT::CloneView(*V_,oldind);
808 MVT::SetBlock(*src,newind,*newV);
809 src = MVT::CloneView(*KV_,oldind);
810 MVT::SetBlock(*src,newind,*newKV);
811 if (hasM_) {
812 src = MVT::CloneView(*MV_,oldind);
813 MVT::SetBlock(*src,newind,*newMV);
814 }
815
816 // release temp view
817 src = Teuchos::null;
818
819 // release old allocations and point at new memory
820 V_ = newV;
821 KV_ = newKV;
822 if (hasM_) {
823 MV_ = newMV;
824 }
825 else {
826 MV_ = V_;
827 }
828 }
829 else {
830 // newBS > blockSize_ or not initialized
831 // this is also the scenario for our initial call to setBlockSize(), in the constructor
832 initialized_ = false;
833 hasP_ = false;
834
835 // release views
836 X_ = Teuchos::null;
837 KX_ = Teuchos::null;
838 MX_ = Teuchos::null;
839 H_ = Teuchos::null;
840 KH_ = Teuchos::null;
841 MH_ = Teuchos::null;
842 P_ = Teuchos::null;
843 KP_ = Teuchos::null;
844 MP_ = Teuchos::null;
845
846 // free allocated storage
847 R_ = Teuchos::null;
848 V_ = Teuchos::null;
849
850 // allocate scalar vectors
851 theta_.resize(3*newBS,NANVAL);
852 Rnorms_.resize(newBS,NANVAL);
853 R2norms_.resize(newBS,NANVAL);
854
855 // clone multivectors off of tmp
856 R_ = MVT::Clone(*tmp,newBS);
857 V_ = MVT::Clone(*tmp,newBS*3);
858 KV_ = MVT::Clone(*tmp,newBS*3);
859 if (hasM_) {
860 MV_ = MVT::Clone(*tmp,newBS*3);
861 }
862 else {
863 MV_ = V_;
864 }
865 }
866
867 // allocate tmp space
868 tmpmvec_ = Teuchos::null;
869 if (fullOrtho_) {
870 tmpmvec_ = MVT::Clone(*tmp,newBS);
871 }
872
873 // set new block size
874 blockSize_ = newBS;
875
876 // setup new views
877 setupViews();
878 }
879
880
882 // Setup views into V,KV,MV
883 template <class ScalarType, class MV, class OP>
885 {
886 std::vector<int> ind(blockSize_);
887
888 for (int i=0; i<blockSize_; i++) {
889 ind[i] = i;
890 }
891 X_ = MVT::CloneViewNonConst(*V_,ind);
892 KX_ = MVT::CloneViewNonConst(*KV_,ind);
893 if (hasM_) {
894 MX_ = MVT::CloneViewNonConst(*MV_,ind);
895 }
896 else {
897 MX_ = X_;
898 }
899
900 for (int i=0; i<blockSize_; i++) {
901 ind[i] += blockSize_;
902 }
903 H_ = MVT::CloneViewNonConst(*V_,ind);
904 KH_ = MVT::CloneViewNonConst(*KV_,ind);
905 if (hasM_) {
906 MH_ = MVT::CloneViewNonConst(*MV_,ind);
907 }
908 else {
909 MH_ = H_;
910 }
911
912 for (int i=0; i<blockSize_; i++) {
913 ind[i] += blockSize_;
914 }
915 P_ = MVT::CloneViewNonConst(*V_,ind);
916 KP_ = MVT::CloneViewNonConst(*KV_,ind);
917 if (hasM_) {
918 MP_ = MVT::CloneViewNonConst(*MV_,ind);
919 }
920 else {
921 MP_ = P_;
922 }
923 }
924
925
927 // Set the auxiliary vectors
928 template <class ScalarType, class MV, class OP>
929 void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs) {
930 typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv;
931
932 // set new auxiliary vectors
933 auxVecs_ = auxvecs;
934
935 numAuxVecs_ = 0;
936 for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) {
937 numAuxVecs_ += MVT::GetNumberVecs(**i);
938 }
939
940 // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors
941 if (numAuxVecs_ > 0 && initialized_) {
942 initialized_ = false;
943 hasP_ = false;
944 }
945
946 if (om_->isVerbosity( Debug ) ) {
947 // Check almost everything here
948 CheckList chk;
949 chk.checkQ = true;
950 om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") );
951 }
952 }
953
954
956 /* Initialize the state of the solver
957 *
958 * POST-CONDITIONS:
959 *
960 * initialized_ == true
961 * X is orthonormal, orthogonal to auxVecs_
962 * KX = Op*X
963 * MX = M*X if hasM_
964 * theta_ contains Ritz values of X
965 * R = KX - MX*diag(theta_)
966 * if hasP() == true,
967 * P orthogonal to auxVecs_
968 * if fullOrtho_ == true,
969 * P orthonormal and orthogonal to X
970 * KP = Op*P
971 * MP = M*P
972 */
973 template <class ScalarType, class MV, class OP>
974 void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV>& newstate)
975 {
976 // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone
977 // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives
978
979#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
980 Teuchos::TimeMonitor inittimer( *timerInit_ );
981#endif
982
983 std::vector<int> bsind(blockSize_);
984 for (int i=0; i<blockSize_; i++) bsind[i] = i;
985
986 // in LOBPCG, X (the subspace iterate) is primary
987 // the order of dependence follows like so.
988 // --init-> X
989 // --op apply-> MX,KX
990 // --ritz analysis-> theta
991 // --optional-> P,MP,KP
992 //
993 // if the user specifies all data for a level, we will accept it.
994 // otherwise, we will generate the whole level, and all subsequent levels.
995 //
996 // the data members are ordered based on dependence, and the levels are
997 // partitioned according to the amount of work required to produce the
998 // items in a level.
999 //
1000 // inconsitent multivectors widths and lengths will not be tolerated, and
1001 // will be treated with exceptions.
1002
1003 // set up X, KX, MX: get them from "state" if user specified them
1004
1005 //----------------------------------------
1006 // set up X, MX, KX
1007 //----------------------------------------
1008 if (newstate.X != Teuchos::null) {
1009 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.X) != MVT::GetGlobalLength(*X_),
1010 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." );
1011 // newstate.X must have blockSize_ vectors; any more will be ignored
1012 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_,
1013 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors.");
1014
1015 // put X data in X_
1016 MVT::SetBlock(*newstate.X,bsind,*X_);
1017
1018 // put MX data in MX_
1019 if (hasM_) {
1020 if (newstate.MX != Teuchos::null) {
1021 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.MX) != MVT::GetGlobalLength(*MX_),
1022 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." );
1023 // newstate.MX must have blockSize_ vectors; any more will be ignored
1024 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_,
1025 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors.");
1026 MVT::SetBlock(*newstate.MX,bsind,*MX_);
1027 }
1028 else {
1029 // user didn't specify MX, compute it
1030 {
1031#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1032 Teuchos::TimeMonitor lcltimer( *timerMOp_ );
1033#endif
1034 OPT::Apply(*MOp_,*X_,*MX_);
1035 count_ApplyM_ += blockSize_;
1036 }
1037 // we generated MX; we will generate R as well
1038 newstate.R = Teuchos::null;
1039 }
1040 }
1041
1042 // put data in KX
1043 if (newstate.KX != Teuchos::null) {
1044 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.KX) != MVT::GetGlobalLength(*KX_),
1045 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." );
1046 // newstate.KX must have blockSize_ vectors; any more will be ignored
1047 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_,
1048 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors.");
1049 MVT::SetBlock(*newstate.KX,bsind,*KX_);
1050 }
1051 else {
1052 // user didn't specify KX, compute it
1053 {
1054#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1055 Teuchos::TimeMonitor lcltimer( *timerOp_ );
1056#endif
1057 OPT::Apply(*Op_,*X_,*KX_);
1058 count_ApplyOp_ += blockSize_;
1059 }
1060 // we generated KX; we will generate R as well
1061 newstate.R = Teuchos::null;
1062 }
1063 }
1064 else {
1065 // user did not specify X
1066 // we will initialize X, compute KX and MX, and compute R
1067 //
1068 // clear state so we won't use any data from it below
1069 newstate.P = Teuchos::null;
1070 newstate.KP = Teuchos::null;
1071 newstate.MP = Teuchos::null;
1072 newstate.R = Teuchos::null;
1073 newstate.T = Teuchos::null;
1074
1075 // generate a basis and projectAndNormalize
1076 Teuchos::RCP<const MV> ivec = problem_->getInitVec();
1077 TEUCHOS_TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error,
1078 "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from.");
1079
1080 int initSize = MVT::GetNumberVecs(*ivec);
1081 if (initSize > blockSize_) {
1082 // we need only the first blockSize_ vectors from ivec; get a view of them
1083 initSize = blockSize_;
1084 std::vector<int> ind(blockSize_);
1085 for (int i=0; i<blockSize_; i++) ind[i] = i;
1086 ivec = MVT::CloneView(*ivec,ind);
1087 }
1088
1089 // assign ivec to first part of X_
1090 if (initSize > 0) {
1091 std::vector<int> ind(initSize);
1092 for (int i=0; i<initSize; i++) ind[i] = i;
1093 MVT::SetBlock(*ivec,ind,*X_);
1094 }
1095 // fill the rest of X_ with random
1096 if (blockSize_ > initSize) {
1097 std::vector<int> ind(blockSize_ - initSize);
1098 for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i;
1099 Teuchos::RCP<MV> rX = MVT::CloneViewNonConst(*X_,ind);
1100 MVT::MvRandom(*rX);
1101 rX = Teuchos::null;
1102 }
1103
1104 // put data in MX
1105 if (hasM_) {
1106#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1107 Teuchos::TimeMonitor lcltimer( *timerMOp_ );
1108#endif
1109 OPT::Apply(*MOp_,*X_,*MX_);
1110 count_ApplyM_ += blockSize_;
1111 }
1112
1113 // remove auxVecs from X_ and normalize it
1114 if (numAuxVecs_ > 0) {
1115#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1116 Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
1117#endif
1118 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy;
1119 int rank = orthman_->projectAndNormalizeMat(*X_,auxVecs_,dummy,Teuchos::null,MX_);
1120 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
1121 "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
1122 }
1123 else {
1124#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1125 Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
1126#endif
1127 int rank = orthman_->normalizeMat(*X_,Teuchos::null,MX_);
1128 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
1129 "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
1130 }
1131
1132 // put data in KX
1133 {
1134#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1135 Teuchos::TimeMonitor lcltimer( *timerOp_ );
1136#endif
1137 OPT::Apply(*Op_,*X_,*KX_);
1138 count_ApplyOp_ += blockSize_;
1139 }
1140 } // end if (newstate.X != Teuchos::null)
1141
1142
1143 //----------------------------------------
1144 // set up Ritz values
1145 //----------------------------------------
1146 theta_.resize(3*blockSize_,NANVAL);
1147 if (newstate.T != Teuchos::null) {
1148 TEUCHOS_TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_,
1149 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values.");
1150 for (int i=0; i<blockSize_; i++) {
1151 theta_[i] = (*newstate.T)[i];
1152 }
1153 nevLocal_ = blockSize_;
1154 }
1155 else {
1156 // get ritz vecs/vals
1157 Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_),
1158 MM(blockSize_,blockSize_),
1159 S(blockSize_,blockSize_);
1160 {
1161#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1162 Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
1163#endif
1164 // project K
1165 MVT::MvTransMv(ONE,*X_,*KX_,KK);
1166 // project M
1167 MVT::MvTransMv(ONE,*X_,*MX_,MM);
1168 nevLocal_ = blockSize_;
1169 }
1170
1171 // solve the projected problem
1172 {
1173#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1174 Teuchos::TimeMonitor lcltimer( *timerDS_ );
1175#endif
1176 Utils::directSolver(blockSize_, KK, Teuchos::rcpFromRef(MM), S, theta_, nevLocal_, 1);
1177 TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure,
1178 "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm.");
1179 }
1180
1181 // We only have blockSize_ ritz pairs, ergo we do not need to select.
1182 // However, we still require them to be ordered correctly
1183 {
1184#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1185 Teuchos::TimeMonitor lcltimer( *timerSort_ );
1186#endif
1187
1188 std::vector<int> order(blockSize_);
1189 //
1190 // sort the first blockSize_ values in theta_
1191 sm_->sort(theta_, Teuchos::rcpFromRef(order), blockSize_); // don't catch exception
1192 //
1193 // apply the same ordering to the primitive ritz vectors
1194 Utils::permuteVectors(order,S);
1195 }
1196
1197 // update the solution, use R for storage
1198 {
1199#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1200 Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
1201#endif
1202 // X <- X*S
1203 MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );
1204 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ );
1205 // KX <- KX*S
1206 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
1207 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ );
1208 if (hasM_) {
1209 // MX <- MX*S
1210 MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );
1211 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ );
1212 }
1213 }
1214 }
1215
1216 //----------------------------------------
1217 // compute R
1218 //----------------------------------------
1219 if (newstate.R != Teuchos::null) {
1220 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.R) != MVT::GetGlobalLength(*R_),
1221 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." );
1222 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_,
1223 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." );
1224 MVT::SetBlock(*newstate.R,bsind,*R_);
1225 }
1226 else {
1227#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1228 Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
1229#endif
1230 // form R <- KX - MX*T
1231 MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_);
1232 Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_);
1233 for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i];
1234 MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_);
1235 }
1236
1237 // R has been updated; mark the norms as out-of-date
1238 Rnorms_current_ = false;
1239 R2norms_current_ = false;
1240
1241 // put data in P,KP,MP: P is not used to set theta
1242 if (newstate.P != Teuchos::null) {
1243 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ ,
1244 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." );
1245 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.P) != MVT::GetGlobalLength(*P_),
1246 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." );
1247 hasP_ = true;
1248
1249 // set P_
1250 MVT::SetBlock(*newstate.P,bsind,*P_);
1251
1252 // set/compute KP_
1253 if (newstate.KP != Teuchos::null) {
1254 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_,
1255 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." );
1256 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.KP) != MVT::GetGlobalLength(*KP_),
1257 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." );
1258 MVT::SetBlock(*newstate.KP,bsind,*KP_);
1259 }
1260 else {
1261#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1262 Teuchos::TimeMonitor lcltimer( *timerOp_ );
1263#endif
1264 OPT::Apply(*Op_,*P_,*KP_);
1265 count_ApplyOp_ += blockSize_;
1266 }
1267
1268 // set/compute MP_
1269 if (hasM_) {
1270 if (newstate.MP != Teuchos::null) {
1271 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_,
1272 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." );
1273 TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetGlobalLength(*newstate.MP) != MVT::GetGlobalLength(*MP_),
1274 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." );
1275 MVT::SetBlock(*newstate.MP,bsind,*MP_);
1276 }
1277 else {
1278#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1279 Teuchos::TimeMonitor lcltimer( *timerMOp_ );
1280#endif
1281 OPT::Apply(*MOp_,*P_,*MP_);
1282 count_ApplyM_ += blockSize_;
1283 }
1284 }
1285 }
1286 else {
1287 hasP_ = false;
1288 }
1289
1290 // finally, we are initialized
1291 initialized_ = true;
1292
1293 if (om_->isVerbosity( Debug ) ) {
1294 // Check almost everything here
1295 CheckList chk;
1296 chk.checkX = true;
1297 chk.checkKX = true;
1298 chk.checkMX = true;
1299 chk.checkP = true;
1300 chk.checkKP = true;
1301 chk.checkMP = true;
1302 chk.checkR = true;
1303 chk.checkQ = true;
1304 om_->print( Debug, accuracyCheck(chk, ": after initialize()") );
1305 }
1306
1307 }
1308
1309 template <class ScalarType, class MV, class OP>
1311 {
1312 LOBPCGState<ScalarType,MV> empty;
1313 initialize(empty);
1314 }
1315
1316
1318 // Instruct the solver to use full orthogonalization
1319 template <class ScalarType, class MV, class OP>
1321 {
1322 if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) {
1323 // state is already orthogonalized or solver is not initialized
1324 fullOrtho_ = fullOrtho;
1325 }
1326 else {
1327 // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization
1328 // ergo, we must throw away data in P
1329 fullOrtho_ = true;
1330 hasP_ = false;
1331 }
1332
1333 // the user has called setFullOrtho, so the class has been instantiated
1334 // ergo, the data has already been allocated, i.e., setBlockSize() has been called
1335 // if it is already allocated, it should be the proper size
1336 if (fullOrtho_ && tmpmvec_ == Teuchos::null) {
1337 // allocated the workspace
1338 tmpmvec_ = MVT::Clone(*X_,blockSize_);
1339 }
1340 else if (fullOrtho_==false) {
1341 // free the workspace
1342 tmpmvec_ = Teuchos::null;
1343 }
1344 }
1345
1346
1348 // Perform LOBPCG iterations until the StatusTest tells us to stop.
1349 template <class ScalarType, class MV, class OP>
1351 {
1352 //
1353 // Allocate/initialize data structures
1354 //
1355 if (initialized_ == false) {
1356 initialize();
1357 }
1358
1359 //
1360 // Miscellaneous definitions
1361 const int oneBlock = blockSize_;
1362 const int twoBlocks = 2*blockSize_;
1363 const int threeBlocks = 3*blockSize_;
1364
1365 std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_);
1366 for (int i=0; i<blockSize_; i++) {
1367 indblock1[i] = i;
1368 indblock2[i] = i + blockSize_;
1369 indblock3[i] = i + 2*blockSize_;
1370 }
1371
1372 //
1373 // Define dense projected/local matrices
1374 // KK = Local stiffness matrix (size: 3*blockSize_ x 3*blockSize_)
1375 // MM = Local mass matrix (size: 3*blockSize_ x 3*blockSize_)
1376 // S = Local eigenvectors (size: 3*blockSize_ x 3*blockSize_)
1377 Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ),
1378 MM( threeBlocks, threeBlocks ),
1379 S( threeBlocks, threeBlocks );
1380
1381 while (tester_->checkStatus(this) != Passed) {
1382
1383 // Print information on current status
1384 if (om_->isVerbosity(Debug)) {
1385 currentStatus( om_->stream(Debug) );
1386 }
1387 else if (om_->isVerbosity(IterationDetails)) {
1388 currentStatus( om_->stream(IterationDetails) );
1389 }
1390
1391 // increment iteration counter
1392 iter_++;
1393
1394 // Apply the preconditioner on the residuals: H <- Prec*R
1395 if (Prec_ != Teuchos::null) {
1396#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1397 Teuchos::TimeMonitor lcltimer( *timerPrec_ );
1398#endif
1399 OPT::Apply( *Prec_, *R_, *H_ ); // don't catch the exception
1400 count_ApplyPrec_ += blockSize_;
1401 }
1402 else {
1403 MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_);
1404 }
1405
1406 // Apply the mass matrix on H
1407 if (hasM_) {
1408#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1409 Teuchos::TimeMonitor lcltimer( *timerMOp_ );
1410#endif
1411 OPT::Apply( *MOp_, *H_, *MH_); // don't catch the exception
1412 count_ApplyM_ += blockSize_;
1413 }
1414
1415 // orthogonalize H against the auxiliary vectors
1416 // optionally: orthogonalize H against X and P ([X P] is already orthonormal)
1417 Teuchos::Array<Teuchos::RCP<const MV> > Q;
1418 Q = auxVecs_;
1419 if (fullOrtho_) {
1420 // X and P are not contiguous, so there is not much point in putting them under
1421 // a single multivector view
1422 Q.push_back(X_);
1423 if (hasP_) {
1424 Q.push_back(P_);
1425 }
1426 }
1427 {
1428#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1429 Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
1430#endif
1431 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummyC =
1432 Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null);
1433 int rank = orthman_->projectAndNormalizeMat(*H_,Q,dummyC,Teuchos::null,MH_);
1434 // our views are currently in place; it is safe to throw an exception
1435 TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure,
1436 "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H.");
1437 }
1438
1439 if (om_->isVerbosity( Debug ) ) {
1440 CheckList chk;
1441 chk.checkH = true;
1442 chk.checkMH = true;
1443 om_->print( Debug, accuracyCheck(chk, ": after ortho H") );
1444 }
1445 else if (om_->isVerbosity( OrthoDetails ) ) {
1446 CheckList chk;
1447 chk.checkH = true;
1448 chk.checkMH = true;
1449 om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") );
1450 }
1451
1452 // Apply the stiffness matrix to H
1453 {
1454#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1455 Teuchos::TimeMonitor lcltimer( *timerOp_ );
1456#endif
1457 OPT::Apply( *Op_, *H_, *KH_); // don't catch the exception
1458 count_ApplyOp_ += blockSize_;
1459 }
1460
1461 if (hasP_) {
1462 nevLocal_ = threeBlocks;
1463 }
1464 else {
1465 nevLocal_ = twoBlocks;
1466 }
1467
1468 //
1469 // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false)
1470 // we need to perform the following operations:
1471 // X' [KX KH KP]
1472 // X' [MX MH MP]
1473 // H' [KH KP]
1474 // H' [MH MP]
1475 // P' [KP]
1476 // P' [MP]
1477 // [X H P] CX
1478 // [X H P] CP if fullOrtho
1479 // [H P] CP if !fullOrtho
1480 //
1481 // since M[X H P] is potentially the same memory as [X H P], and
1482 // because we are not allowed to have overlapping non-const views of
1483 // a multivector, we will now abandon our non-const views in favor of
1484 // const views
1485 //
1486 X_ = Teuchos::null;
1487 KX_ = Teuchos::null;
1488 MX_ = Teuchos::null;
1489 H_ = Teuchos::null;
1490 KH_ = Teuchos::null;
1491 MH_ = Teuchos::null;
1492 P_ = Teuchos::null;
1493 KP_ = Teuchos::null;
1494 MP_ = Teuchos::null;
1495 Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P;
1496 {
1497 cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1);
1498 cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2);
1499
1500 std::vector<int> indXHP(nevLocal_);
1501 for (int i=0; i<nevLocal_; i++) {
1502 indXHP[i] = i;
1503 }
1504 cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP);
1505 cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP);
1506 if (hasM_) {
1507 cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP);
1508 }
1509 else {
1510 cM_XHP = cXHP;
1511 }
1512
1513 std::vector<int> indHP(nevLocal_-blockSize_);
1514 for (int i=blockSize_; i<nevLocal_; i++) {
1515 indHP[i-blockSize_] = i;
1516 }
1517 cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP);
1518 cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP);
1519 if (hasM_) {
1520 cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP);
1521 }
1522 else {
1523 cM_HP = cHP;
1524 }
1525
1526 if (nevLocal_ == threeBlocks) {
1527 cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3);
1528 cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3);
1529 if (hasM_) {
1530 cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3);
1531 }
1532 else {
1533 cM_P = cP;
1534 }
1535 }
1536 }
1537
1538 //
1539 //----------------------------------------
1540 // Form "local" mass and stiffness matrices
1541 //----------------------------------------
1542 {
1543 // We will form only the block upper triangular part of
1544 // [X H P]' K [X H P] and [X H P]' M [X H P]
1545 // Get the necessary views into KK and MM:
1546 // [--K1--] [--M1--]
1547 // KK = [ -K2-] MM = [ -M2-]
1548 // [ K3] [ M3]
1549 //
1550 // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix
1551 //
1552 Teuchos::SerialDenseMatrix<int,ScalarType>
1553 K1(Teuchos::View,KK,blockSize_,nevLocal_ ,0*blockSize_,0*blockSize_),
1554 K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
1555 K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_),
1556 M1(Teuchos::View,MM,blockSize_,nevLocal_ ,0*blockSize_,0*blockSize_),
1557 M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
1558 M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_);
1559 {
1560#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1561 Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
1562#endif
1563 MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 );
1564 MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 );
1565 MVT::MvTransMv( ONE, *cH, *cK_HP , K2 );
1566 MVT::MvTransMv( ONE, *cH, *cM_HP , M2 );
1567 if (nevLocal_ == threeBlocks) {
1568 MVT::MvTransMv( ONE, *cP, *cK_P, K3 );
1569 MVT::MvTransMv( ONE, *cP, *cM_P, M3 );
1570 }
1571 }
1572 }
1573 // below, we only need bases [X H P] and [H P] and friends
1574 // furthermore, we only need [H P] and friends if fullOrtho == false
1575 // clear the others now
1576 cX = Teuchos::null;
1577 cH = Teuchos::null;
1578 cP = Teuchos::null;
1579 cK_P = Teuchos::null;
1580 cM_P = Teuchos::null;
1581 if (fullOrtho_ == true) {
1582 cHP = Teuchos::null;
1583 cK_HP = Teuchos::null;
1584 cM_HP = Teuchos::null;
1585 }
1586
1587 //
1588 //---------------------------------------------------
1589 // Perform a spectral decomposition of (KK,MM)
1590 //---------------------------------------------------
1591 //
1592 // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis
1593 Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_),
1594 lclMM(Teuchos::View,MM,nevLocal_,nevLocal_),
1595 lclS(Teuchos::View, S,nevLocal_,nevLocal_);
1596 {
1597#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1598 Teuchos::TimeMonitor lcltimer( *timerDS_ );
1599#endif
1600 int localSize = nevLocal_;
1601 Utils::directSolver(localSize, lclKK, Teuchos::rcpFromRef(lclMM), lclS, theta_, nevLocal_, 0);
1602 // localSize tells directSolver() how big KK,MM are
1603 // however, directSolver() may choose to use only the principle submatrices of KK,MM
1604 // because of loss of MM-orthogonality in the projected eigenvectors
1605 // nevLocal_ tells us how much it used, telling us the effective localSize
1606 // (i.e., how much of KK,MM used by directSolver)
1607 // we will not tolerate any indefiniteness, and will throw an exception if it was
1608 // detected by directSolver
1609 //
1610 if (nevLocal_ != localSize) {
1611 // before throwing the exception, and thereby leaving iterate(), setup the views again
1612 // first, clear the const views
1613 cXHP = Teuchos::null;
1614 cK_XHP = Teuchos::null;
1615 cM_XHP = Teuchos::null;
1616 cHP = Teuchos::null;
1617 cK_HP = Teuchos::null;
1618 cM_HP = Teuchos::null;
1619 setupViews();
1620 }
1621 TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure,
1622 "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." );
1623 }
1624
1625 //
1626 //---------------------------------------------------
1627 // Sort the ritz values using the sort manager
1628 //---------------------------------------------------
1629 Teuchos::LAPACK<int,ScalarType> lapack;
1630 Teuchos::BLAS<int,ScalarType> blas;
1631 {
1632#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1633 Teuchos::TimeMonitor lcltimer( *timerSort_ );
1634#endif
1635
1636 std::vector<int> order(nevLocal_);
1637 //
1638 // Sort the first nevLocal_ values in theta_
1639 sm_->sort(theta_, Teuchos::rcpFromRef(order), nevLocal_); // don't catch exception
1640 //
1641 // Sort the primitive ritz vectors
1642 Utils::permuteVectors(order,lclS);
1643 }
1644
1645 //
1646 //----------------------------------------
1647 // Compute coefficients for X and P under [X H P]
1648 //----------------------------------------
1649 // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper
1650 // CX will be the coefficients of [X,H,P] for new X, CP for new P
1651 // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM
1652 // Here, we will also orthonormalize CX.
1653 // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP]
1654 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > CX, CP;
1655 if (fullOrtho_) {
1656 // build orthonormal basis for ( 0 ) that is MM orthogonal to ( S11 )
1657 // ( S21 ) ( S21 )
1658 // ( S31 ) ( S31 )
1659 // Do this using Cholesky factorization of ( S11 0 )
1660 // ( S21 S21 )
1661 // ( S31 S31 )
1662 // ( S11 0 )
1663 // Build C = ( S21 S21 )
1664 // ( S31 S31 )
1665 Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks),
1666 MMC(nevLocal_,twoBlocks),
1667 CMMC(twoBlocks ,twoBlocks);
1668
1669 // first block of rows: ( S11 0 )
1670 for (int j=0; j<oneBlock; j++) {
1671 for (int i=0; i<oneBlock; i++) {
1672 // CX
1673 C(i,j) = lclS(i,j);
1674 // CP
1675 C(i,j+oneBlock) = ZERO;
1676 }
1677 }
1678 // second block of rows: (S21 S21)
1679 for (int j=0; j<oneBlock; j++) {
1680 for (int i=oneBlock; i<twoBlocks; i++) {
1681 // CX
1682 C(i,j) = lclS(i,j);
1683 // CP
1684 C(i,j+oneBlock) = lclS(i,j);
1685 }
1686 }
1687 // third block of rows
1688 if (nevLocal_ == threeBlocks) {
1689 for (int j=0; j<oneBlock; j++) {
1690 for (int i=twoBlocks; i<threeBlocks; i++) {
1691 // CX
1692 C(i,j) = lclS(i,j);
1693 // CP
1694 C(i,j+oneBlock) = lclS(i,j);
1695 }
1696 }
1697 }
1698
1699 // compute MMC = lclMM*C
1700 {
1701 int teuchosret;
1702 teuchosret = MMC.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO);
1703 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1704 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1705 // compute CMMC = C^H*MMC == C^H*lclMM*C
1706 teuchosret = CMMC.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,MMC,ZERO);
1707 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1708 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1709 }
1710
1711 // compute R (cholesky) of CMMC
1712 {
1713 int info;
1714 lapack.POTRF('U',twoBlocks,CMMC.values(),CMMC.stride(),&info);
1715 // our views ARE NOT currently in place; we must reestablish them before throwing an exception
1716 if (info != 0) {
1717 // Check symmetry of H'*K*H, this might be the first place a non-Hermitian operator will cause a problem.
1718 Teuchos::SerialDenseMatrix<int,ScalarType> K22(Teuchos::View,KK,blockSize_,blockSize_,1*blockSize_,1*blockSize_);
1719 Teuchos::SerialDenseMatrix<int,ScalarType> K22_trans( K22, Teuchos::CONJ_TRANS );
1720 K22_trans -= K22;
1721 MagnitudeType symNorm = K22_trans.normOne();
1722 MagnitudeType tol = SCT::magnitude(SCT::squareroot(SCT::eps()));
1723
1724 cXHP = Teuchos::null;
1725 cHP = Teuchos::null;
1726 cK_XHP = Teuchos::null;
1727 cK_HP = Teuchos::null;
1728 cM_XHP = Teuchos::null;
1729 cM_HP = Teuchos::null;
1730 setupViews();
1731
1732 std::string errMsg = "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization.";
1733 if ( symNorm < tol )
1734 {
1735 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg );
1736 }
1737 else
1738 {
1739 errMsg += " Check the operator A (or K), it may not be Hermitian!";
1740 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg );
1741 }
1742 }
1743 }
1744 // compute C = C inv(R)
1745 blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG,
1746 nevLocal_,twoBlocks,ONE,CMMC.values(),CMMC.stride(),C.values(),C.stride());
1747 // put C(:,0:oneBlock-1) into CX
1748 CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) );
1749 // put C(:,oneBlock:twoBlocks-1) into CP
1750 CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) );
1751
1752 // check the results
1753 if (om_->isVerbosity( Debug ) ) {
1754 Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock),
1755 tmp2(oneBlock,oneBlock);
1756 MagnitudeType tmp;
1757 int teuchosret;
1758 std::stringstream os;
1759 os.precision(2);
1760 os.setf(std::ios::scientific, std::ios::floatfield);
1761
1762 os << " Checking Full Ortho: iteration " << iter_ << std::endl;
1763
1764 // check CX^T MM CX == I
1765 // compute tmp1 = MM*CX
1766 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO);
1767 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1768 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1769 // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX
1770 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
1771 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1772 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1773 // subtrace tmp2 - I == CX^H * MM * CX - I
1774 for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
1775 tmp = tmp2.normFrobenius();
1776 os << " >> Error in CX^H MM CX == I : " << tmp << std::endl;
1777
1778 // check CP^T MM CP == I
1779 // compute tmp1 = MM*CP
1780 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
1781 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1782 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1783 // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP
1784 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO);
1785 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
1786 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1787 // subtrace tmp2 - I == CP^H * MM * CP - I
1788 for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
1789 tmp = tmp2.normFrobenius();
1790 os << " >> Error in CP^H MM CP == I : " << tmp << std::endl;
1791
1792 // check CX^T MM CP == 0
1793 // compute tmp1 = MM*CP
1794 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
1795 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1796 // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP
1797 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
1798 TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
1799 // subtrace tmp2 == CX^H * MM * CP
1800 tmp = tmp2.normFrobenius();
1801 os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl;
1802
1803 os << std::endl;
1804 om_->print(Debug,os.str());
1805 }
1806 }
1807 else {
1808 // [S11 ... ...]
1809 // S = [S21 ... ...]
1810 // [S31 ... ...]
1811 //
1812 // CX = [S11]
1813 // [S21]
1814 // [S31] -> X = [X H P] CX
1815 //
1816 // CP = [S21] -> P = [H P] CP
1817 // [S31]
1818 //
1819 CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_ ,oneBlock,0 ,0) );
1820 CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) );
1821 }
1822
1823 //
1824 //----------------------------------------
1825 // Compute new X and new P
1826 //----------------------------------------
1827 // Note: Use R as a temporary work space and (if full ortho) tmpMV as well
1828 {
1829#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1830 Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
1831#endif
1832
1833 // if full ortho, then CX and CP are dense
1834 // we multiply [X H P]*CX into tmpMV
1835 // [X H P]*CP into R
1836 // then put V(:,firstblock) <- tmpMV
1837 // V(:,thirdblock) <- R
1838 //
1839 // if no full ortho, then [H P]*CP doesn't reference first block (X)
1840 // of V, so that we can modify it before computing P
1841 // so we multiply [X H P]*CX into R
1842 // V(:,firstblock) <- R
1843 // multiply [H P]*CP into R
1844 // V(:,thirdblock) <- R
1845 //
1846 // mutatis mutandis for K[XP] and M[XP]
1847 //
1848 // use SetBlock to do the assignments into V_
1849 //
1850 // in either case, views are only allowed to be overlapping
1851 // if they are const, and it should be assume that SetBlock
1852 // creates a view of the associated part
1853 //
1854 // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P
1855 //
1856 if (fullOrtho_) {
1857 // X,P
1858 MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_);
1859 MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_);
1860 cXHP = Teuchos::null;
1861 MVT::SetBlock(*tmpmvec_,indblock1,*V_);
1862 MVT::SetBlock(*R_ ,indblock3,*V_);
1863 // KX,KP
1864 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_);
1865 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_);
1866 cK_XHP = Teuchos::null;
1867 MVT::SetBlock(*tmpmvec_,indblock1,*KV_);
1868 MVT::SetBlock(*R_ ,indblock3,*KV_);
1869 // MX,MP
1870 if (hasM_) {
1871 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_);
1872 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_);
1873 cM_XHP = Teuchos::null;
1874 MVT::SetBlock(*tmpmvec_,indblock1,*MV_);
1875 MVT::SetBlock(*R_ ,indblock3,*MV_);
1876 }
1877 else {
1878 cM_XHP = Teuchos::null;
1879 }
1880 }
1881 else {
1882 // X,P
1883 MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_);
1884 cXHP = Teuchos::null;
1885 MVT::SetBlock(*R_,indblock1,*V_);
1886 MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_);
1887 cHP = Teuchos::null;
1888 MVT::SetBlock(*R_,indblock3,*V_);
1889 // KX,KP
1890 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_);
1891 cK_XHP = Teuchos::null;
1892 MVT::SetBlock(*R_,indblock1,*KV_);
1893 MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_);
1894 cK_HP = Teuchos::null;
1895 MVT::SetBlock(*R_,indblock3,*KV_);
1896 // MX,MP
1897 if (hasM_) {
1898 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_);
1899 cM_XHP = Teuchos::null;
1900 MVT::SetBlock(*R_,indblock1,*MV_);
1901 MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_);
1902 cM_HP = Teuchos::null;
1903 MVT::SetBlock(*R_,indblock3,*MV_);
1904 }
1905 else {
1906 cM_XHP = Teuchos::null;
1907 cM_HP = Teuchos::null;
1908 }
1909 }
1910 } // end timing block
1911 // done with coefficient matrices
1912 CX = Teuchos::null;
1913 CP = Teuchos::null;
1914
1915 //
1916 // we now have a P direction
1917 hasP_ = true;
1918
1919 // debugging check: all of our const views should have been cleared by now
1920 // if not, we have a logic error above
1921 TEUCHOS_TEST_FOR_EXCEPTION( cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null
1922 || cHP != Teuchos::null || cK_HP != Teuchos::null || cM_HP != Teuchos::null
1923 || cP != Teuchos::null || cK_P != Teuchos::null || cM_P != Teuchos::null,
1924 std::logic_error,
1925 "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" );
1926
1927 //
1928 // recreate our const MV views of X,H,P and friends
1929 setupViews();
1930
1931 //
1932 // Compute the new residuals, explicitly
1933 {
1934#ifdef ANASAZI_TEUCHOS_TIME_MONITOR
1935 Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
1936#endif
1937 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
1938 Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ );
1939 for (int i = 0; i < blockSize_; i++) {
1940 T(i,i) = theta_[i];
1941 }
1942 MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ );
1943 }
1944
1945 // R has been updated; mark the norms as out-of-date
1946 Rnorms_current_ = false;
1947 R2norms_current_ = false;
1948
1949 // When required, monitor some orthogonalities
1950 if (om_->isVerbosity( Debug ) ) {
1951 // Check almost everything here
1952 CheckList chk;
1953 chk.checkX = true;
1954 chk.checkKX = true;
1955 chk.checkMX = true;
1956 chk.checkP = true;
1957 chk.checkKP = true;
1958 chk.checkMP = true;
1959 chk.checkR = true;
1960 om_->print( Debug, accuracyCheck(chk, ": after local update") );
1961 }
1962 else if (om_->isVerbosity( OrthoDetails )) {
1963 CheckList chk;
1964 chk.checkX = true;
1965 chk.checkP = true;
1966 chk.checkR = true;
1967 om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") );
1968 }
1969 } // end while (statusTest == false)
1970 }
1971
1972
1974 // compute/return residual M-norms
1975 template <class ScalarType, class MV, class OP>
1976 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType>
1978 if (Rnorms_current_ == false) {
1979 // Update the residual norms
1980 orthman_->norm(*R_,Rnorms_);
1981 Rnorms_current_ = true;
1982 }
1983 return Rnorms_;
1984 }
1985
1986
1988 // compute/return residual 2-norms
1989 template <class ScalarType, class MV, class OP>
1990 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType>
1992 if (R2norms_current_ == false) {
1993 // Update the residual 2-norms
1994 MVT::MvNorm(*R_,R2norms_);
1995 R2norms_current_ = true;
1996 }
1997 return R2norms_;
1998 }
1999
2000
2001
2002
2004 // Check accuracy, orthogonality, and other debugging stuff
2005 //
2006 // bools specify which tests we want to run (instead of running more than we actually care about)
2007 //
2008 // we don't bother checking the following because they are computed explicitly:
2009 // H == Prec*R
2010 // KH == K*H
2011 //
2012 //
2013 // checkX : X orthonormal
2014 // orthogonal to auxvecs
2015 // checkMX: check MX == M*X
2016 // checkKX: check KX == K*X
2017 // checkP : if fullortho P orthonormal and orthogonal to X
2018 // orthogonal to auxvecs
2019 // checkMP: check MP == M*P
2020 // checkKP: check KP == K*P
2021 // checkH : if fullortho H orthonormal and orthogonal to X and P
2022 // orthogonal to auxvecs
2023 // checkMH: check MH == M*H
2024 // checkR : check R orthogonal to X
2025 // checkQ : check that auxiliary vectors are actually orthonormal
2026 //
2027 // TODO:
2028 // add checkTheta
2029 //
2030 template <class ScalarType, class MV, class OP>
2031 std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const
2032 {
2033 using std::endl;
2034
2035 std::stringstream os;
2036 os.precision(2);
2037 os.setf(std::ios::scientific, std::ios::floatfield);
2038 MagnitudeType tmp;
2039
2040 os << " Debugging checks: iteration " << iter_ << where << endl;
2041
2042 // X and friends
2043 if (chk.checkX && initialized_) {
2044 tmp = orthman_->orthonormError(*X_);
2045 os << " >> Error in X^H M X == I : " << tmp << endl;
2046 for (Array_size_type i=0; i<auxVecs_.size(); i++) {
2047 tmp = orthman_->orthogError(*X_,*auxVecs_[i]);
2048 os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl;
2049 }
2050 }
2051 if (chk.checkMX && hasM_ && initialized_) {
2052 tmp = Utils::errorEquality(*X_, *MX_, MOp_);
2053 os << " >> Error in MX == M*X : " << tmp << endl;
2054 }
2055 if (chk.checkKX && initialized_) {
2056 tmp = Utils::errorEquality(*X_, *KX_, Op_);
2057 os << " >> Error in KX == K*X : " << tmp << endl;
2058 }
2059
2060 // P and friends
2061 if (chk.checkP && hasP_ && initialized_) {
2062 if (fullOrtho_) {
2063 tmp = orthman_->orthonormError(*P_);
2064 os << " >> Error in P^H M P == I : " << tmp << endl;
2065 tmp = orthman_->orthogError(*P_,*X_);
2066 os << " >> Error in P^H M X == 0 : " << tmp << endl;
2067 }
2068 for (Array_size_type i=0; i<auxVecs_.size(); i++) {
2069 tmp = orthman_->orthogError(*P_,*auxVecs_[i]);
2070 os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl;
2071 }
2072 }
2073 if (chk.checkMP && hasM_ && hasP_ && initialized_) {
2074 tmp = Utils::errorEquality(*P_, *MP_, MOp_);
2075 os << " >> Error in MP == M*P : " << tmp << endl;
2076 }
2077 if (chk.checkKP && hasP_ && initialized_) {
2078 tmp = Utils::errorEquality(*P_, *KP_, Op_);
2079 os << " >> Error in KP == K*P : " << tmp << endl;
2080 }
2081
2082 // H and friends
2083 if (chk.checkH && initialized_) {
2084 if (fullOrtho_) {
2085 tmp = orthman_->orthonormError(*H_);
2086 os << " >> Error in H^H M H == I : " << tmp << endl;
2087 tmp = orthman_->orthogError(*H_,*X_);
2088 os << " >> Error in H^H M X == 0 : " << tmp << endl;
2089 if (hasP_) {
2090 tmp = orthman_->orthogError(*H_,*P_);
2091 os << " >> Error in H^H M P == 0 : " << tmp << endl;
2092 }
2093 }
2094 for (Array_size_type i=0; i<auxVecs_.size(); i++) {
2095 tmp = orthman_->orthogError(*H_,*auxVecs_[i]);
2096 os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl;
2097 }
2098 }
2099 if (chk.checkMH && hasM_ && initialized_) {
2100 tmp = Utils::errorEquality(*H_, *MH_, MOp_);
2101 os << " >> Error in MH == M*H : " << tmp << endl;
2102 }
2103
2104 // R: this is not M-orthogonality, but standard euclidean orthogonality
2105 if (chk.checkR && initialized_) {
2106 Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_);
2107 MVT::MvTransMv(ONE,*X_,*R_,xTx);
2108 tmp = xTx.normFrobenius();
2109 MVT::MvTransMv(ONE,*R_,*R_,xTx);
2110 double normR = xTx.normFrobenius();
2111 os << " >> RelError in X^H R == 0: " << tmp/normR << endl;
2112 }
2113
2114 // Q
2115 if (chk.checkQ) {
2116 for (Array_size_type i=0; i<auxVecs_.size(); i++) {
2117 tmp = orthman_->orthonormError(*auxVecs_[i]);
2118 os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl;
2119 for (Array_size_type j=i+1; j<auxVecs_.size(); j++) {
2120 tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]);
2121 os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl;
2122 }
2123 }
2124 }
2125
2126 os << endl;
2127
2128 return os.str();
2129 }
2130
2131
2133 // Print the current status of the solver
2134 template <class ScalarType, class MV, class OP>
2135 void
2137 {
2138 using std::endl;
2139
2140 os.setf(std::ios::scientific, std::ios::floatfield);
2141 os.precision(6);
2142 os <<endl;
2143 os <<"================================================================================" << endl;
2144 os << endl;
2145 os <<" LOBPCG Solver Status" << endl;
2146 os << endl;
2147 os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl;
2148 os <<"The number of iterations performed is " << iter_ << endl;
2149 os <<"The current block size is " << blockSize_ << endl;
2150 os <<"The number of auxiliary vectors is " << numAuxVecs_ << endl;
2151 os <<"The number of operations Op*x is " << count_ApplyOp_ << endl;
2152 os <<"The number of operations M*x is " << count_ApplyM_ << endl;
2153 os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl;
2154
2155 os.setf(std::ios_base::right, std::ios_base::adjustfield);
2156
2157 if (initialized_) {
2158 os << endl;
2159 os <<"CURRENT EIGENVALUE ESTIMATES "<<endl;
2160 os << std::setw(20) << "Eigenvalue"
2161 << std::setw(20) << "Residual(M)"
2162 << std::setw(20) << "Residual(2)"
2163 << endl;
2164 os <<"--------------------------------------------------------------------------------"<<endl;
2165 for (int i=0; i<blockSize_; i++) {
2166 os << std::setw(20) << theta_[i];
2167 if (Rnorms_current_) os << std::setw(20) << Rnorms_[i];
2168 else os << std::setw(20) << "not current";
2169 if (R2norms_current_) os << std::setw(20) << R2norms_[i];
2170 else os << std::setw(20) << "not current";
2171 os << endl;
2172 }
2173 }
2174 os <<"================================================================================" << endl;
2175 os << endl;
2176 }
2177
2179 // are we initialized or not?
2180 template <class ScalarType, class MV, class OP>
2182 return initialized_;
2183 }
2184
2185
2187 // is P valid or not?
2188 template <class ScalarType, class MV, class OP>
2190 return hasP_;
2191 }
2192
2194 // is full orthogonalization enabled or not?
2195 template <class ScalarType, class MV, class OP>
2197 return(fullOrtho_);
2198 }
2199
2200
2202 // return the current auxilliary vectors
2203 template <class ScalarType, class MV, class OP>
2204 Teuchos::Array<Teuchos::RCP<const MV> > LOBPCG<ScalarType,MV,OP>::getAuxVecs() const {
2205 return auxVecs_;
2206 }
2207
2209 // return the current block size
2210 template <class ScalarType, class MV, class OP>
2212 return(blockSize_);
2213 }
2214
2216 // return the current eigenproblem
2217 template <class ScalarType, class MV, class OP>
2218 const Eigenproblem<ScalarType,MV,OP>& LOBPCG<ScalarType,MV,OP>::getProblem() const {
2219 return(*problem_);
2220 }
2221
2222
2224 // return the max subspace dimension
2225 template <class ScalarType, class MV, class OP>
2227 return 3*blockSize_;
2228 }
2229
2231 // return the current subspace dimension
2232 template <class ScalarType, class MV, class OP>
2234 if (!initialized_) return 0;
2235 return nevLocal_;
2236 }
2237
2238
2240 // return the current ritz residual norms
2241 template <class ScalarType, class MV, class OP>
2242 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType>
2244 {
2245 return this->getRes2Norms();
2246 }
2247
2248
2250 // return the current compression indices
2251 template <class ScalarType, class MV, class OP>
2253 std::vector<int> ret(nevLocal_,0);
2254 return ret;
2255 }
2256
2257
2259 // return the current ritz values
2260 template <class ScalarType, class MV, class OP>
2261 std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() {
2262 std::vector<Value<ScalarType> > ret(nevLocal_);
2263 for (int i=0; i<nevLocal_; i++) {
2264 ret[i].realpart = theta_[i];
2265 ret[i].imagpart = ZERO;
2266 }
2267 return ret;
2268 }
2269
2271 // Set a new StatusTest for the solver.
2272 template <class ScalarType, class MV, class OP>
2273 void LOBPCG<ScalarType,MV,OP>::setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test) {
2274 TEUCHOS_TEST_FOR_EXCEPTION(test == Teuchos::null,std::invalid_argument,
2275 "Anasazi::LOBPCG::setStatusTest() was passed a null StatusTest.");
2276 tester_ = test;
2277 }
2278
2280 // Get the current StatusTest used by the solver.
2281 template <class ScalarType, class MV, class OP>
2282 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > LOBPCG<ScalarType,MV,OP>::getStatusTest() const {
2283 return tester_;
2284 }
2285
2287 // return the current ritz vectors
2288 template <class ScalarType, class MV, class OP>
2290 return X_;
2291 }
2292
2293
2295 // reset the iteration counter
2296 template <class ScalarType, class MV, class OP>
2298 iter_=0;
2299 }
2300
2301
2303 // return the number of iterations
2304 template <class ScalarType, class MV, class OP>
2306 return(iter_);
2307 }
2308
2309
2311 // return the state
2312 template <class ScalarType, class MV, class OP>
2313 LOBPCGState<ScalarType,MV> LOBPCG<ScalarType,MV,OP>::getState() const {
2314 LOBPCGState<ScalarType,MV> state;
2315 state.V = V_;
2316 state.KV = KV_;
2317 state.X = X_;
2318 state.KX = KX_;
2319 state.P = P_;
2320 state.KP = KP_;
2321 state.H = H_;
2322 state.KH = KH_;
2323 state.R = R_;
2324 state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_));
2325 if (hasM_) {
2326 state.MV = MV_;
2327 state.MX = MX_;
2328 state.MP = MP_;
2329 state.MH = MH_;
2330 }
2331 else {
2332 state.MX = Teuchos::null;
2333 state.MP = Teuchos::null;
2334 state.MH = Teuchos::null;
2335 }
2336 return state;
2337 }
2338
2339} // end Anasazi namespace
2340
2341#endif // ANASAZI_LOBPCG_HPP
Pure virtual base class which describes the basic interface to the iterative eigensolver.
Templated virtual class for providing orthogonalization/orthonormalization methods with matrix-based ...
Declaration of basic traits for the multivector type.
Virtual base class which defines basic traits for the operator type.
Class which provides internal utilities for the Anasazi solvers.
Types and exceptions used within Anasazi solvers and interfaces.
An exception class parent to all Anasazi exceptions.
The Eigensolver is a templated virtual base class that defines the basic interface that any eigensolv...
LOBPCGInitFailure is thrown when the LOBPCG solver is unable to generate an initial iterate in the LO...
LOBPCGOrthoFailure is thrown when an orthogonalization attempt fails.
LOBPCGRitzFailure is thrown when the LOBPCG solver is unable to continue a call to LOBPCG::iterate() ...
This class provides the Locally Optimal Block Preconditioned Conjugate Gradient (LOBPCG) iteration,...
void iterate()
This method performs LOBPCG iterations until the status test indicates the need to stop or an error o...
std::vector< typename Teuchos::ScalarTraits< ScalarType >::magnitudeType > getRitzRes2Norms()
Get the 2-norms of the residuals.
void setStatusTest(Teuchos::RCP< StatusTest< ScalarType, MV, OP > > test)
Set a new StatusTest for the solver.
virtual ~LOBPCG()
LOBPCG destructor
void resetNumIters()
Reset the iteration count.
void setBlockSize(int blockSize)
Set the blocksize to be used by the iterative solver in solving this eigenproblem.
std::vector< typename Teuchos::ScalarTraits< ScalarType >::magnitudeType > getResNorms()
Get the current residual norms.
Teuchos::RCP< const MV > getRitzVectors()
Get the Ritz vectors from the previous iteration.
void initialize()
Initialize the solver with the initial vectors from the eigenproblem or random data.
void setFullOrtho(bool fullOrtho)
Instruct the LOBPCG iteration to use full orthogonality.
const Eigenproblem< ScalarType, MV, OP > & getProblem() const
Get a constant reference to the eigenvalue problem.
void currentStatus(std::ostream &os)
This method requests that the solver print out its current status to screen.
std::vector< int > getRitzIndex()
Get the index used for extracting Ritz vectors from getRitzVectors().
Teuchos::RCP< StatusTest< ScalarType, MV, OP > > getStatusTest() const
Get the current StatusTest used by the solver.
int getCurSubspaceDim() const
Get the dimension of the search subspace used to generate the current eigenvectors and eigenvalues.
std::vector< Value< ScalarType > > getRitzValues()
Get the Ritz values from the previous iteration.
std::vector< typename Teuchos::ScalarTraits< ScalarType >::magnitudeType > getRes2Norms()
Get the current residual 2-norms.
LOBPCG(const Teuchos::RCP< Eigenproblem< ScalarType, MV, OP > > &problem, const Teuchos::RCP< SortManager< typename Teuchos::ScalarTraits< ScalarType >::magnitudeType > > &sorter, const Teuchos::RCP< OutputManager< ScalarType > > &printer, const Teuchos::RCP< StatusTest< ScalarType, MV, OP > > &tester, const Teuchos::RCP< MatOrthoManager< ScalarType, MV, OP > > &ortho, Teuchos::ParameterList &params)
LOBPCG constructor with eigenproblem, solver utilities, and parameter list of solver options.
void setAuxVecs(const Teuchos::Array< Teuchos::RCP< const MV > > &auxvecs)
Set the auxiliary vectors for the solver.
int getBlockSize() const
Get the blocksize to be used by the iterative solver in solving this eigenproblem.
int getNumIters() const
Get the current iteration count.
int getMaxSubspaceDim() const
Get the maximum dimension allocated for the search subspace. For LOBPCG, this always returns 3*getBlo...
bool getFullOrtho() const
Determine if the LOBPCG iteration is using full orthogonality.
LOBPCGState< ScalarType, MV > getState() const
Get the current state of the eigensolver.
bool isInitialized() const
Indicates whether the solver has been initialized or not.
Teuchos::Array< Teuchos::RCP< const MV > > getAuxVecs() const
Get the current auxiliary vectors.
bool hasP()
Indicates whether the search direction given by getState() is valid.
Anasazi's templated pure virtual class for managing the sorting of approximate eigenvalues computed b...
Namespace Anasazi contains the classes, structs, enums and utilities used by the Anasazi package.
Structure to contain pointers to Anasazi state variables.
Teuchos::RCP< const MultiVector > KH
The image of the current preconditioned residual vectors under K.
Teuchos::RCP< const MultiVector > X
The current eigenvectors.
Teuchos::RCP< const MultiVector > KV
The image of the current test basis under K.
Teuchos::RCP< const MultiVector > MX
The image of the current eigenvectors under M, or Teuchos::null if M was not specified.
Teuchos::RCP< const MultiVector > V
The current test basis.
Teuchos::RCP< const MultiVector > KX
The image of the current eigenvectors under K.
Teuchos::RCP< const MultiVector > R
The current residual vectors.
Teuchos::RCP< const MultiVector > MP
The image of the current search direction under M, or Teuchos::null if M was not specified.
Teuchos::RCP< const std::vector< typename Teuchos::ScalarTraits< ScalarType >::magnitudeType > > T
The current Ritz values.
Teuchos::RCP< const MultiVector > MV
The image of the current test basis under M, or Teuchos::null if M was not specified.
Teuchos::RCP< const MultiVector > H
The current preconditioned residual vectors.
Teuchos::RCP< const MultiVector > P
The current search direction.
Teuchos::RCP< const MultiVector > MH
The image of the current preconditioned residual vectors under M, or Teuchos::null if M was not speci...
Teuchos::RCP< const MultiVector > KP
The image of the current search direction under K.