00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052 #ifndef ANASAZI_LOBPCG_HPP
00053 #define ANASAZI_LOBPCG_HPP
00054
00055 #include "AnasaziTypes.hpp"
00056
00057 #include "AnasaziEigensolver.hpp"
00058 #include "AnasaziMultiVecTraits.hpp"
00059 #include "AnasaziOperatorTraits.hpp"
00060 #include "Teuchos_ScalarTraits.hpp"
00061
00062 #include "AnasaziMatOrthoManager.hpp"
00063 #include "AnasaziModalSolverUtils.hpp"
00064
00065 #include "Teuchos_LAPACK.hpp"
00066 #include "Teuchos_BLAS.hpp"
00067 #include "Teuchos_SerialDenseMatrix.hpp"
00068 #include "Teuchos_ParameterList.hpp"
00069 #include "Teuchos_TimeMonitor.hpp"
00070
00092 namespace Anasazi {
00093
00095
00096
00101 template <class ScalarType, class MV>
00102 struct LOBPCGState {
00104 Teuchos::RefCountPtr<const MV> X;
00106 Teuchos::RefCountPtr<const MV> KX;
00108 Teuchos::RefCountPtr<const MV> MX;
00110 Teuchos::RefCountPtr<const MV> P;
00112 Teuchos::RefCountPtr<const MV> KP;
00114 Teuchos::RefCountPtr<const MV> MP;
00119 Teuchos::RefCountPtr<const MV> H;
00121 Teuchos::RefCountPtr<const MV> KH;
00123 Teuchos::RefCountPtr<const MV> MH;
00125 Teuchos::RefCountPtr<const MV> R;
00127 Teuchos::RefCountPtr<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T;
00128 LOBPCGState() : X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null),
00129 P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null),
00130 H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null),
00131 R(Teuchos::null),T(Teuchos::null) {};
00132 };
00133
00135
00137
00138
00152 class LOBPCGRitzFailure : public AnasaziError {public:
00153 LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg)
00154 {}};
00155
00168 class LOBPCGInitFailure : public AnasaziError {public:
00169 LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg)
00170 {}};
00171
00188 class LOBPCGOrthoFailure : public AnasaziError {public:
00189 LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg)
00190 {}};
00191
00193
00194
00195 template <class ScalarType, class MV, class OP>
00196 class LOBPCG : public Eigensolver<ScalarType,MV,OP> {
00197 public:
00198
00200
00201
00209 LOBPCG( const Teuchos::RefCountPtr<Eigenproblem<ScalarType,MV,OP> > &problem,
00210 const Teuchos::RefCountPtr<SortManager<ScalarType,MV,OP> > &sorter,
00211 const Teuchos::RefCountPtr<OutputManager<ScalarType> > &printer,
00212 const Teuchos::RefCountPtr<StatusTest<ScalarType,MV,OP> > &tester,
00213 const Teuchos::RefCountPtr<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00214 Teuchos::ParameterList ¶ms
00215 );
00216
00218 virtual ~LOBPCG() {};
00219
00221
00223
00224
00251 void iterate();
00252
00276 void initialize(LOBPCGState<ScalarType,MV> newstate);
00277
00281 void initialize();
00282
00302 bool isInitialized() { return initialized_; }
00303
00314 LOBPCGState<ScalarType,MV> getState() const {
00315 LOBPCGState<ScalarType,MV> state;
00316 state.X = X_;
00317 state.KX = KX_;
00318 state.P = P_;
00319 state.KP = KP_;
00320 state.H = H_;
00321 state.KH = KH_;
00322 state.R = R_;
00323 state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_));
00324 if (hasM_) {
00325 state.MX = MX_;
00326 state.MP = MP_;
00327 state.MH = MH_;
00328 }
00329 else {
00330 state.MX = Teuchos::null;
00331 state.MP = Teuchos::null;
00332 state.MH = Teuchos::null;
00333 }
00334 return state;
00335 }
00336
00338
00340
00341
00343 int getNumIters() const { return(iter_); };
00344
00346 void resetNumIters() { iter_=0; };
00347
00355 Teuchos::RefCountPtr<const MV> getRitzVectors() {return X_;}
00356
00362 std::vector<Value<ScalarType> > getRitzValues() {
00363 std::vector<Value<ScalarType> > ret(nevLocal_);
00364 for (int i=0; i<nevLocal_; i++) {
00365 ret[i].realpart = theta_[i];
00366 ret[i].imagpart = ZERO;
00367 }
00368 return ret;
00369 }
00370
00378 std::vector<int> getRitzIndex() {
00379 std::vector<int> ret(nevLocal_,0);
00380 return ret;
00381 }
00382
00383
00384
00385
00391 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms();
00392
00393
00399 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms();
00400
00401
00406 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms() {
00407 std::vector<MagnitudeType> ret = ritz2norms_;
00408 ret.resize(nevLocal_);
00409 return ret;
00410 }
00411
00412
00421 int getCurSubspaceDim() const {
00422 if (!initialized_) return 0;
00423 return nevLocal_;
00424 }
00425
00429 int getMaxSubspaceDim() const {return 3*blockSize_;}
00430
00432
00434
00435
00436
00438 const Eigenproblem<ScalarType,MV,OP>& getProblem() const { return(*problem_); };
00439
00440
00449 void setBlockSize(int blockSize);
00450
00451
00453 int getBlockSize() const { return(blockSize_); }
00454
00455
00467 void setAuxVecs(const Teuchos::Array<Teuchos::RefCountPtr<const MV> > &auxvecs);
00468
00470 Teuchos::Array<Teuchos::RefCountPtr<const MV> > getAuxVecs() const {return auxVecs_;}
00471
00473
00475
00476
00482 void setFullOrtho(bool fullOrtho);
00483
00485 bool getFullOrtho() const { return(fullOrtho_); }
00486
00488 bool hasP() {return hasP_;}
00489
00491
00493
00494
00496 void currentStatus(ostream &os);
00497
00499
00500 private:
00501
00502
00503
00504 typedef MultiVecTraits<ScalarType,MV> MVT;
00505 typedef OperatorTraits<ScalarType,MV,OP> OPT;
00506 typedef Teuchos::ScalarTraits<ScalarType> SCT;
00507 typedef typename SCT::magnitudeType MagnitudeType;
00508 const MagnitudeType ONE;
00509 const MagnitudeType ZERO;
00510 const MagnitudeType NANVAL;
00511
00512
00513
00514 struct CheckList {
00515 bool checkX, checkMX, checkKX;
00516 bool checkH, checkMH;
00517 bool checkP, checkMP, checkKP;
00518 bool checkR, checkQ;
00519 CheckList() : checkX(false),checkMX(false),checkKX(false),
00520 checkH(false),checkMH(false),
00521 checkP(false),checkMP(false),checkKP(false),
00522 checkR(false),checkQ(false) {};
00523 };
00524
00525
00526
00527 string accuracyCheck(const CheckList &chk, const string &where) const;
00528
00529
00530
00531 const Teuchos::RefCountPtr<Eigenproblem<ScalarType,MV,OP> > problem_;
00532 const Teuchos::RefCountPtr<SortManager<ScalarType,MV,OP> > sm_;
00533 const Teuchos::RefCountPtr<OutputManager<ScalarType> > om_;
00534 const Teuchos::RefCountPtr<StatusTest<ScalarType,MV,OP> > tester_;
00535 const Teuchos::RefCountPtr<MatOrthoManager<ScalarType,MV,OP> > orthman_;
00536
00537
00538
00539 Teuchos::RefCountPtr<OP> Op_;
00540 Teuchos::RefCountPtr<OP> MOp_;
00541 Teuchos::RefCountPtr<OP> Prec_;
00542 bool hasM_;
00543
00544
00545
00546 ModalSolverUtils<ScalarType,MV,OP> MSUtils_;
00547
00548
00549
00550 Teuchos::RefCountPtr<Teuchos::Time> timerOp_, timerMOp_, timerPrec_,
00551 timerSort_,
00552 timerLocalProj_, timerDS_,
00553 timerLocalUpdate_, timerCompRes_,
00554 timerOrtho_, timerInit_;
00555
00556
00557
00558
00559 int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_;
00560
00561
00562
00563
00564
00565 int blockSize_;
00566
00567
00568
00569 bool fullOrtho_;
00570
00571
00572
00573
00574
00575
00576
00577 bool initialized_;
00578
00579
00580
00581 int nevLocal_;
00582
00583
00584 bool hasP_;
00585
00586
00587 Teuchos::RefCountPtr<MV> X_, KX_, MX_, R_,
00588 H_, KH_, MH_,
00589 P_, KP_, MP_;
00590
00591
00592
00593 Teuchos::RefCountPtr<MV> tmpMV_;
00594
00595
00596 Teuchos::Array<Teuchos::RefCountPtr<const MV> > auxVecs_;
00597 int numAuxVecs_;
00598
00599
00600 int iter_;
00601
00602
00603 std::vector<MagnitudeType> theta_, Rnorms_, R2norms_, ritz2norms_;
00604
00605
00606 bool Rnorms_current_, R2norms_current_;
00607
00608 };
00609
00610
00611
00612
00614
00615 template <class ScalarType, class MV, class OP>
00616 LOBPCG<ScalarType,MV,OP>::LOBPCG(
00617 const Teuchos::RefCountPtr<Eigenproblem<ScalarType,MV,OP> > &problem,
00618 const Teuchos::RefCountPtr<SortManager<ScalarType,MV,OP> > &sorter,
00619 const Teuchos::RefCountPtr<OutputManager<ScalarType> > &printer,
00620 const Teuchos::RefCountPtr<StatusTest<ScalarType,MV,OP> > &tester,
00621 const Teuchos::RefCountPtr<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00622 Teuchos::ParameterList ¶ms
00623 ) :
00624 ONE(Teuchos::ScalarTraits<MagnitudeType>::one()),
00625 ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()),
00626 NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()),
00627
00628 problem_(problem),
00629 sm_(sorter),
00630 om_(printer),
00631 tester_(tester),
00632 orthman_(ortho),
00633 MSUtils_(om_),
00634
00635 timerOp_(Teuchos::TimeMonitor::getNewTimer("Operation Op*x")),
00636 timerMOp_(Teuchos::TimeMonitor::getNewTimer("Operation M*x")),
00637 timerPrec_(Teuchos::TimeMonitor::getNewTimer("Operation Prec*x")),
00638 timerSort_(Teuchos::TimeMonitor::getNewTimer("Sorting eigenvalues")),
00639 timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("Local projection")),
00640 timerDS_(Teuchos::TimeMonitor::getNewTimer("Direct solve")),
00641 timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("Local update")),
00642 timerCompRes_(Teuchos::TimeMonitor::getNewTimer("Computing residuals")),
00643 timerOrtho_(Teuchos::TimeMonitor::getNewTimer("Orthogonalization")),
00644 timerInit_(Teuchos::TimeMonitor::getNewTimer("Initialization")),
00645 count_ApplyOp_(0),
00646 count_ApplyM_(0),
00647 count_ApplyPrec_(0),
00648
00649 blockSize_(0),
00650 fullOrtho_(params.get("Full Ortho", true)),
00651 initialized_(false),
00652 nevLocal_(0),
00653 hasP_(false),
00654 auxVecs_( Teuchos::Array<Teuchos::RefCountPtr<const MV> >(0) ),
00655 numAuxVecs_(0),
00656 iter_(0),
00657 Rnorms_current_(false),
00658 R2norms_current_(false)
00659 {
00660 TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument,
00661 "Anasazi::LOBPCG::constructor: user passed null problem pointer.");
00662 TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument,
00663 "Anasazi::LOBPCG::constructor: user passed null sort manager pointer.");
00664 TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument,
00665 "Anasazi::LOBPCG::constructor: user passed null output manager pointer.");
00666 TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument,
00667 "Anasazi::LOBPCG::constructor: user passed null status test pointer.");
00668 TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument,
00669 "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer.");
00670 TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument,
00671 "Anasazi::LOBPCG::constructor: problem is not set.");
00672 TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument,
00673 "Anasazi::LOBPCG::constructor: problem is not hermitian.");
00674
00675
00676 Op_ = problem_->getOperator();
00677 TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument,
00678 "Anasazi::LOBPCG::constructor: problem provides no operator.");
00679 MOp_ = problem_->getM();
00680 Prec_ = problem_->getPrec();
00681 hasM_ = (MOp_ != Teuchos::null);
00682
00683
00684 int bs = params.get("Block Size", problem_->getNEV());
00685 setBlockSize(bs);
00686 }
00687
00688
00690
00691 template <class ScalarType, class MV, class OP>
00692 void LOBPCG<ScalarType,MV,OP>::setBlockSize (int blockSize)
00693 {
00694
00695 Teuchos::TimeMonitor lcltimer( *timerInit_ );
00696
00697
00698
00699
00700
00701
00702 TEST_FOR_EXCEPTION(blockSize <= 0, std::invalid_argument, "Anasazi::LOBPCG::setBlockSize was passed a non-positive block size");
00703 if (blockSize == blockSize_) {
00704
00705 return;
00706 }
00707 else if (blockSize < blockSize_) {
00708
00709 blockSize_ = blockSize;
00710
00711 theta_.resize(3*blockSize_);
00712 ritz2norms_.resize(3*blockSize_);
00713 Rnorms_.resize(blockSize_);
00714 R2norms_.resize(blockSize_);
00715
00716 if (initialized_) {
00717
00718
00719 std::vector<int> ind(blockSize_);
00720 for (int i=0; i<blockSize_; i++) ind[i] = i;
00721
00722 X_ = MVT::CloneCopy(*X_,ind);
00723 KX_ = MVT::CloneCopy(*KX_,ind);
00724 if (hasM_) {
00725 MX_ = MVT::CloneCopy(*MX_,ind);
00726 }
00727 else {
00728 MX_ = X_;
00729 }
00730 R_ = MVT::CloneCopy(*R_,ind);
00731 P_ = MVT::CloneCopy(*P_,ind);
00732 KP_ = MVT::CloneCopy(*KP_,ind);
00733 if (hasM_) {
00734 MP_ = MVT::CloneCopy(*MP_,ind);
00735 }
00736 else {
00737 MP_ = P_;
00738 }
00739 }
00740 else {
00741
00742 X_ = MVT::Clone(*X_,blockSize_);
00743 KX_ = MVT::Clone(*KX_,blockSize_);
00744 if (hasM_) {
00745 MX_ = MVT::Clone(*MX_,blockSize_);
00746 }
00747 else {
00748 MX_ = X_;
00749 }
00750 R_ = MVT::Clone(*R_,blockSize_);
00751 P_ = MVT::Clone(*P_,blockSize_);
00752 KP_ = MVT::Clone(*KP_,blockSize_);
00753 if (hasM_) {
00754 MP_ = MVT::Clone(*MP_,blockSize_);
00755 }
00756 else {
00757 MP_ = P_;
00758 }
00759 }
00760
00761 H_ = MVT::Clone(*H_,blockSize_);
00762 KH_ = MVT::Clone(*KH_,blockSize_);
00763 if (hasM_) {
00764 MH_ = MVT::Clone(*MH_,blockSize_);
00765 }
00766 else {
00767 MH_ = H_;
00768 }
00769 }
00770 else {
00771
00772 initialized_ = false;
00773
00774 Teuchos::RefCountPtr<const MV> tmp;
00775
00776
00777
00778
00779 if (blockSize_ > 0) {
00780 tmp = X_;
00781 }
00782 else {
00783 tmp = problem_->getInitVec();
00784 TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error,
00785 "Anasazi::LOBPCG::setBlockSize(): Eigenproblem did not specify initial vectors to clone from");
00786 }
00787
00788 theta_.resize(3*blockSize,NANVAL);
00789 ritz2norms_.resize(3*blockSize_,NANVAL);
00790 Rnorms_.resize(blockSize,NANVAL);
00791 R2norms_.resize(blockSize,NANVAL);
00792
00793
00794 X_ = MVT::Clone(*tmp,blockSize);
00795 KX_ = MVT::Clone(*tmp,blockSize);
00796 if (hasM_) {
00797 MX_ = MVT::Clone(*tmp,blockSize);
00798 }
00799 else {
00800 MX_ = X_;
00801 }
00802 R_ = MVT::Clone(*tmp,blockSize);
00803 H_ = MVT::Clone(*tmp,blockSize);
00804 KH_ = MVT::Clone(*tmp,blockSize);
00805 if (hasM_) {
00806 MH_ = MVT::Clone(*tmp,blockSize);
00807 }
00808 else {
00809 MH_ = H_;
00810 }
00811 hasP_ = false;
00812 P_ = MVT::Clone(*tmp,blockSize);
00813 KP_ = MVT::Clone(*tmp,blockSize);
00814 if (hasM_) {
00815 MP_ = MVT::Clone(*tmp,blockSize);
00816 }
00817 else {
00818 MP_ = P_;
00819 }
00820 blockSize_ = blockSize;
00821 }
00822 }
00823
00824
00826
00827 template <class ScalarType, class MV, class OP>
00828 void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RefCountPtr<const MV> > &auxvecs) {
00829 typedef typename Teuchos::Array<Teuchos::RefCountPtr<const MV> >::iterator tarcpmv;
00830
00831
00832 auxVecs_ = auxvecs;
00833
00834 numAuxVecs_ = 0;
00835 for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) {
00836 numAuxVecs_ += MVT::GetNumberVecs(**i);
00837 }
00838
00839
00840 if (numAuxVecs_ > 0 && initialized_) {
00841 initialized_ = false;
00842 hasP_ = false;
00843 }
00844
00845 if (om_->isVerbosity( Debug ) ) {
00846
00847 CheckList chk;
00848 chk.checkQ = true;
00849 om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") );
00850 }
00851 }
00852
00853
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872 template <class ScalarType, class MV, class OP>
00873 void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate)
00874 {
00875
00876
00877
00878 hasP_ = false;
00879
00880 std::vector<int> bsind(blockSize_);
00881 for (int i=0; i<blockSize_; i++) bsind[i] = i;
00882
00883
00884 if (newstate.X != Teuchos::null && MVT::GetNumberVecs(*newstate.X) >= blockSize_ && MVT::GetVecLength(*newstate.X) == MVT::GetVecLength(*X_) ) {
00885
00886 Teuchos::TimeMonitor lcltimer( *timerInit_ );
00887
00888
00889 MVT::SetBlock(*newstate.X,bsind,*X_);
00890 if (hasM_) {
00891 if (newstate.MX != Teuchos::null && MVT::GetNumberVecs(*newstate.MX) >= blockSize_ && MVT::GetVecLength(*newstate.MX) == MVT::GetVecLength(*MX_) ) {
00892 MVT::SetBlock(*newstate.MX,bsind,*MX_);
00893 }
00894 else {
00895 Teuchos::TimeMonitor lcltimer( *timerMOp_ );
00896 OPT::Apply(*MOp_,*X_,*MX_);
00897 count_ApplyM_ += blockSize_;
00898 }
00899 }
00900 else {
00901
00902 TEST_FOR_EXCEPTION(MX_ != X_, std::logic_error, "Anasazi::LOBPCG::initialize(): solver invariant not satisfied");
00903 }
00904 if (newstate.KX != Teuchos::null && MVT::GetNumberVecs(*newstate.KX) >= blockSize_ && MVT::GetVecLength(*newstate.KX) == MVT::GetVecLength(*KX_) ) {
00905 MVT::SetBlock(*newstate.KX,bsind,*KX_);
00906 }
00907 else {
00908 Teuchos::TimeMonitor lcltimer( *timerOp_ );
00909 OPT::Apply(*Op_,*X_,*KX_);
00910 count_ApplyOp_ += blockSize_;
00911 }
00912
00913
00914 theta_.resize(3*blockSize_,NANVAL);
00915 ritz2norms_.resize(3*blockSize_,NANVAL);
00916 if (newstate.T != Teuchos::null && (signed int)(newstate.T->size()) >= blockSize_) {
00917 for (int i=0; i<blockSize_; i++) {
00918 theta_[i] = (*newstate.T)[i];
00919 }
00920 }
00921 else {
00922
00923 Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_),
00924 MM(blockSize_,blockSize_),
00925 S(blockSize_,blockSize_);
00926 {
00927 Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
00928
00929 MVT::MvTransMv(ONE,*X_,*KX_,KK);
00930
00931 MVT::MvTransMv(ONE,*X_,*MX_,MM);
00932 nevLocal_ = blockSize_;
00933 }
00934
00935
00936 {
00937 Teuchos::TimeMonitor lcltimer( *timerDS_ );
00938 MSUtils_.directSolver(blockSize_, KK, &MM, &S, &theta_, &nevLocal_, 1);
00939 TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure,
00940 "Anasazi::LOBPCG::initialize(): Not enough Ritz vectors to initialize algorithm.");
00941 }
00942
00943
00944 {
00945 Teuchos::TimeMonitor lcltimer( *timerSort_ );
00946
00947 std::vector<int> order(blockSize_);
00948
00949
00950 sm_->sort( this, blockSize_, theta_, &order );
00951
00952
00953 MSUtils_.permuteVectors(order,S);
00954 }
00955
00956
00957 {
00958 Teuchos::BLAS<int,ScalarType> blas;
00959 Teuchos::SerialDenseMatrix<int,ScalarType> R(blockSize_,blockSize_);
00960
00961 R.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,MM,S,ZERO);
00962 for (int i=0; i<blockSize_; i++) {
00963 blas.SCAL(blockSize_,theta_[i],R[i],1);
00964 }
00965 R.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,-ONE,KK,S,ONE);
00966 for (int i=0; i<blockSize_; i++) {
00967 ritz2norms_[i] = blas.NRM2(blockSize_,R[i],1);
00968 }
00969 }
00970
00971
00972 {
00973 Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
00974
00975 MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );
00976 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ );
00977
00978 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
00979 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ );
00980 if (hasM_) {
00981
00982 MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );
00983 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ );
00984 }
00985 }
00986 }
00987
00988
00989
00990 if (newstate.R != Teuchos::null && MVT::GetNumberVecs(*newstate.R) >= blockSize_ && MVT::GetVecLength(*newstate.R) == MVT::GetVecLength(*R_) ) {
00991 MVT::SetBlock(*newstate.R,bsind,*R_);
00992 }
00993 else {
00994 Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
00995
00996 MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_);
00997 Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_);
00998 T.putScalar(ZERO);
00999 for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i];
01000 MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_);
01001 }
01002
01003
01004 Rnorms_current_ = false;
01005 R2norms_current_ = false;
01006
01007
01008 if (newstate.P != Teuchos::null && MVT::GetNumberVecs(*newstate.P) >= blockSize_ && MVT::GetVecLength(*newstate.P) == MVT::GetVecLength(*P_) ) {
01009 hasP_ = true;
01010
01011 MVT::SetBlock(*newstate.P,bsind,*P_);
01012
01013 if (newstate.KP != Teuchos::null && MVT::GetNumberVecs(*newstate.KP) >= blockSize_ && MVT::GetVecLength(*newstate.KP) == MVT::GetVecLength(*KP_) ) {
01014 MVT::SetBlock(*newstate.KP,bsind,*KP_);
01015 }
01016 else {
01017 Teuchos::TimeMonitor lcltimer( *timerOp_ );
01018 OPT::Apply(*Op_,*P_,*KP_);
01019 count_ApplyOp_ += blockSize_;
01020 }
01021
01022 if (hasM_) {
01023 if (newstate.MP != Teuchos::null && MVT::GetNumberVecs(*newstate.MP) >= blockSize_ && MVT::GetVecLength(*newstate.MP) == MVT::GetVecLength(*MP_) ) {
01024 MVT::SetBlock(*newstate.MP,bsind,*MP_);
01025 }
01026 else {
01027 Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01028 OPT::Apply(*MOp_,*P_,*MP_);
01029 count_ApplyM_ += blockSize_;
01030 }
01031 }
01032 }
01033
01034
01035 initialized_ = true;
01036
01037 if (om_->isVerbosity( Debug ) ) {
01038
01039 CheckList chk;
01040 chk.checkX = true;
01041 chk.checkKX = true;
01042 chk.checkMX = true;
01043 chk.checkP = true;
01044 chk.checkKP = true;
01045 chk.checkMP = true;
01046 chk.checkR = true;
01047 chk.checkQ = true;
01048 om_->print( Debug, accuracyCheck(chk, ": after initialize()") );
01049 }
01050
01051 }
01052 else {
01053
01054 LOBPCGState<ScalarType,MV> newstate;
01055 {
01056 Teuchos::TimeMonitor lcltimer( *timerInit_ );
01057
01058
01059 Teuchos::RefCountPtr<const MV> ivec = problem_->getInitVec();
01060 TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error,
01061 "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from");
01062
01063 int initSize = MVT::GetNumberVecs(*ivec);
01064 if (initSize > blockSize_) {
01065
01066 initSize = blockSize_;
01067 std::vector<int> ind(blockSize_);
01068 for (int i=0; i<blockSize_; i++) ind[i] = i;
01069 ivec = MVT::CloneView(*ivec,ind);
01070 }
01071
01072
01073 Teuchos::RefCountPtr<MV> newMX, newX = MVT::Clone(*ivec,blockSize_);
01074
01075 std::vector<int> ind(initSize);
01076 if (initSize > 0) {
01077 for (int i=0; i<initSize; i++) ind[i] = i;
01078 MVT::SetBlock(*ivec,ind,*newX);
01079 }
01080
01081 if (blockSize_ > initSize) {
01082 ind.resize(blockSize_ - initSize);
01083 for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i;
01084 Teuchos::RefCountPtr<MV> rX = MVT::CloneView(*newX,ind);
01085 MVT::MvRandom(*rX);
01086 rX = Teuchos::null;
01087 }
01088
01089
01090 if (hasM_) {
01091 newMX = MVT::Clone(*ivec,blockSize_);
01092 {
01093 Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01094 OPT::Apply(*MOp_,*newX,*newMX);
01095 count_ApplyM_ += blockSize_;
01096 }
01097 }
01098 else {
01099 newMX = Teuchos::null;
01100 }
01101
01102
01103 if (auxVecs_.size() > 0) {
01104 Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01105 Teuchos::Array<Teuchos::RefCountPtr<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy;
01106 int rank = orthman_->projectAndNormalize(*newX,newMX,dummy,Teuchos::null,auxVecs_);
01107 TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGInitFailure,
01108 "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01109 }
01110 else {
01111 Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01112 int rank = orthman_->normalize(*newX,newMX,Teuchos::null);
01113 TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGInitFailure,
01114 "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01115 }
01116
01117
01118 newstate.X = newX;
01119 newstate.MX = newMX;
01120 }
01121 initialize(newstate);
01122 }
01123 }
01124
01125 template <class ScalarType, class MV, class OP>
01126 void LOBPCG<ScalarType,MV,OP>::initialize()
01127 {
01128 LOBPCGState<ScalarType,MV> empty;
01129 initialize(empty);
01130 }
01131
01132
01134
01135 template <class ScalarType, class MV, class OP>
01136 void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho)
01137 {
01138 if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) {
01139
01140 fullOrtho_ = fullOrtho;
01141 return;
01142 }
01143
01144
01145 fullOrtho_ = true;
01146
01147 hasP_ = false;
01148 }
01149
01150
01152
01153 template <class ScalarType, class MV, class OP>
01154 void LOBPCG<ScalarType,MV,OP>::iterate ()
01155 {
01156
01157
01158
01159 if (initialized_ == false) {
01160 initialize();
01161 }
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172 if (fullOrtho_) {
01173 if (tmpMV_ == Teuchos::null || MVT::GetNumberVecs(*tmpMV_) != blockSize_) {
01174 tmpMV_ = MVT::Clone(*X_,blockSize_);
01175 }
01176 }
01177 else {
01178 tmpMV_ = Teuchos::null;
01179 }
01180
01181
01182
01183 const int oneBlock = blockSize_;
01184 const int twoBlocks = 2*blockSize_;
01185 const int threeBlocks = 3*blockSize_;
01186
01187
01188
01189
01190
01191
01192 Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ),
01193 MM( threeBlocks, threeBlocks ),
01194 S( threeBlocks, threeBlocks );
01195
01196 while (tester_->checkStatus(this) != Passed) {
01197
01198
01199 if (om_->isVerbosity(Debug)) {
01200 currentStatus( om_->stream(Debug) );
01201 }
01202 else if (om_->isVerbosity(IterationDetails)) {
01203 currentStatus( om_->stream(IterationDetails) );
01204 }
01205
01206 iter_++;
01207
01208
01209 if (Prec_ != Teuchos::null) {
01210 Teuchos::TimeMonitor lcltimer( *timerPrec_ );
01211 OPT::Apply( *Prec_, *R_, *H_ );
01212 count_ApplyPrec_ += blockSize_;
01213 }
01214 else {
01215 std::vector<int> ind(blockSize_);
01216 for (int i=0; i<blockSize_; i++) { ind[i] = i; }
01217 MVT::SetBlock(*R_,ind,*H_);
01218 }
01219
01220
01221 if (hasM_) {
01222 Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01223 OPT::Apply( *MOp_, *H_, *MH_);
01224 count_ApplyM_ += blockSize_;
01225 }
01226
01227
01228
01229 Teuchos::Array<Teuchos::RefCountPtr<const MV> > Q;
01230 Teuchos::Array<Teuchos::RefCountPtr<Teuchos::SerialDenseMatrix<int,ScalarType> > > C =
01231 Teuchos::tuple<Teuchos::RefCountPtr<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null);
01232 Q = auxVecs_;
01233 if (fullOrtho_) {
01234 Q.push_back(X_);
01235 if (hasP_) {
01236 Q.push_back(P_);
01237 }
01238 }
01239 {
01240 Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01241 int rank = orthman_->projectAndNormalize(*H_,MH_,C,Teuchos::null,Q);
01242 TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure,
01243 "Anasazi::LOBPCG::iterate(): unable to compute full basis for H");
01244 }
01245
01246 if (om_->isVerbosity( Debug ) ) {
01247 CheckList chk;
01248 chk.checkH = true;
01249 chk.checkMH = true;
01250 om_->print( Debug, accuracyCheck(chk, ": after ortho H") );
01251 }
01252 else if (om_->isVerbosity( OrthoDetails ) ) {
01253 CheckList chk;
01254 chk.checkH = true;
01255 chk.checkMH = true;
01256 om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") );
01257 }
01258
01259
01260 {
01261 Teuchos::TimeMonitor lcltimer( *timerOp_ );
01262 OPT::Apply( *Op_, *H_, *KH_);
01263 count_ApplyOp_ += blockSize_;
01264 }
01265
01266 int localSize;
01267 if (hasP_) {
01268 localSize = threeBlocks;
01269 }
01270 else {
01271 localSize = twoBlocks;
01272 }
01273
01274
01275 {
01276 Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01277
01278
01279
01280
01281
01282
01283
01284
01285 Teuchos::SerialDenseMatrix<int,ScalarType> KK11( Teuchos::View, KK, blockSize_, blockSize_ );
01286 MVT::MvTransMv( ONE, *X_, *KX_, KK11 );
01287 Teuchos::SerialDenseMatrix<int,ScalarType> KK12( Teuchos::View, KK, blockSize_, blockSize_, 0, blockSize_ );
01288 MVT::MvTransMv( ONE, *X_, *KH_, KK12 );
01289 Teuchos::SerialDenseMatrix<int,ScalarType> KK22( Teuchos::View, KK, blockSize_, blockSize_, blockSize_, blockSize_ );
01290 MVT::MvTransMv( ONE, *H_, *KH_, KK22 );
01291
01292 Teuchos::SerialDenseMatrix<int,ScalarType> MM11( Teuchos::View, MM, blockSize_, blockSize_ );
01293 MVT::MvTransMv( ONE, *X_, *MX_, MM11 );
01294 Teuchos::SerialDenseMatrix<int,ScalarType> MM12( Teuchos::View, MM, blockSize_, blockSize_, 0, blockSize_ );
01295 MVT::MvTransMv( ONE, *X_, *MH_, MM12 );
01296 Teuchos::SerialDenseMatrix<int,ScalarType> MM22( Teuchos::View, MM, blockSize_, blockSize_, blockSize_, blockSize_ );
01297 MVT::MvTransMv( ONE, *H_, *MH_, MM22 );
01298
01299 if (hasP_) {
01300 Teuchos::SerialDenseMatrix<int,ScalarType> KK13( Teuchos::View, KK, blockSize_, blockSize_, 0, twoBlocks );
01301 MVT::MvTransMv( ONE, *X_, *KP_, KK13 );
01302 Teuchos::SerialDenseMatrix<int,ScalarType> KK23( Teuchos::View, KK, blockSize_, blockSize_, blockSize_, twoBlocks );
01303 MVT::MvTransMv( ONE, *H_, *KP_, KK23 );
01304 Teuchos::SerialDenseMatrix<int,ScalarType> KK33( Teuchos::View, KK, blockSize_, blockSize_, twoBlocks, twoBlocks );
01305 MVT::MvTransMv( ONE, *P_, *KP_, KK33 );
01306
01307 Teuchos::SerialDenseMatrix<int,ScalarType> MM13( Teuchos::View, MM, blockSize_, blockSize_, 0, twoBlocks );
01308 MVT::MvTransMv( ONE, *X_, *MP_, MM13 );
01309 Teuchos::SerialDenseMatrix<int,ScalarType> MM23( Teuchos::View, MM, blockSize_, blockSize_, blockSize_, twoBlocks );
01310 MVT::MvTransMv( ONE, *H_, *MP_, MM23 );
01311 Teuchos::SerialDenseMatrix<int,ScalarType> MM33( Teuchos::View, MM, blockSize_, blockSize_, twoBlocks, twoBlocks );
01312 MVT::MvTransMv( ONE, *P_, *MP_, MM33 );
01313 }
01314
01315
01316 for (int j=0; j<localSize; j++) {
01317 for (int i=j+1; i<localSize; i++) {
01318 KK(i,j) = KK(j,i);
01319 MM(i,j) = MM(j,i);
01320 }
01321 }
01322 }
01323
01324
01325 {
01326 Teuchos::TimeMonitor lcltimer( *timerDS_ );
01327 nevLocal_ = localSize;
01328 MSUtils_.directSolver(localSize, KK, &MM, &S, &theta_, &nevLocal_, 0);
01329
01330
01331
01332
01333
01334
01335 }
01336 om_->stream(Debug) << " After directSolve: localSize == " << localSize << " \tnevLocal == " << nevLocal_ << endl;
01337 TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, "Indefiniteness detected in projecteded mass matrix." );
01338
01339 Teuchos::LAPACK<int,ScalarType> lapack;
01340 Teuchos::BLAS<int,ScalarType> blas;
01341
01342
01343
01344
01345 {
01346 Teuchos::TimeMonitor lcltimer( *timerSort_ );
01347
01348 std::vector<int> order(nevLocal_);
01349
01350
01351 sm_->sort( this, nevLocal_, theta_, &order );
01352
01353
01354 Teuchos::SerialDenseMatrix<int,ScalarType> curS(Teuchos::View,S,nevLocal_,nevLocal_);
01355 MSUtils_.permuteVectors(order,curS);
01356 }
01357
01358
01359 {
01360 Teuchos::SerialDenseMatrix<int,ScalarType> R(nevLocal_,nevLocal_);
01361 Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_),
01362 lclMM(Teuchos::View,MM,nevLocal_,nevLocal_);
01363
01364 R.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,S,ZERO);
01365 for (int i=0; i<nevLocal_; i++) {
01366 blas.SCAL(nevLocal_,theta_[i],R[i],1);
01367 }
01368 R.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,-ONE,lclKK,S,ONE);
01369 for (int i=0; i<nevLocal_; i++) {
01370 ritz2norms_[i] = blas.NRM2(nevLocal_,R[i],1);
01371 }
01372 }
01373
01374
01375
01376
01377
01378
01379
01380 Teuchos::SerialDenseMatrix<int,ScalarType> CX(threeBlocks,oneBlock), CP(threeBlocks,oneBlock);
01381 if (fullOrtho_ && localSize >= twoBlocks) {
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391 Teuchos::SerialDenseMatrix<int,ScalarType> C(threeBlocks,twoBlocks),
01392 tmp1(threeBlocks,twoBlocks),
01393 tmp2(twoBlocks ,twoBlocks);
01394
01395
01396 for (int i=0; i<oneBlock; i++) {
01397
01398 for (int j=0; j<oneBlock; j++) {
01399 C(i,j) = S(i,j);
01400 }
01401
01402 for (int j=oneBlock; j<twoBlocks; j++) {
01403 C(i,j) = ZERO;
01404 }
01405 }
01406
01407 for (int j=0; j<oneBlock; j++) {
01408 for (int i=oneBlock; i<twoBlocks; i++) {
01409
01410 C(i,j) = S(i,j);
01411
01412 C(i,j+oneBlock) = S(i,j);
01413 }
01414 }
01415
01416 if (localSize == threeBlocks) {
01417 for (int j=0; j<oneBlock; j++) {
01418 for (int i=twoBlocks; i<threeBlocks; i++) {
01419
01420 C(i,j) = S(i,j);
01421
01422 C(i,j+oneBlock) = S(i,j);
01423 }
01424 }
01425 }
01426 else {
01427 for (int j=0; j<twoBlocks; j++) {
01428 for (int i=twoBlocks; i<threeBlocks; i++) {
01429 C(i,j) = ZERO;
01430 }
01431 }
01432 }
01433
01434
01435 int teuchosret;
01436 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,MM,C,ZERO);
01437 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01438
01439
01440 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,tmp1,ZERO);
01441 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01442
01443
01444 int info;
01445 lapack.POTRF('U',twoBlocks,tmp2.values(),tmp2.stride(),&info);
01446 TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure,
01447 "Anasazi::LOBPCG::iterate(): Could not perform full orthogonalization.");
01448
01449 blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG,
01450 threeBlocks,twoBlocks,ONE,tmp2.values(),tmp2.stride(),C.values(),C.stride());
01451
01452 for (int j=0; j<oneBlock; j++) {
01453 for (int i=0; i<threeBlocks; i++) {
01454 CX(i,j) = C(i,j);
01455 }
01456 }
01457
01458 for (int j=0; j<oneBlock; j++) {
01459 for (int i=0; i<threeBlocks; i++) {
01460 CP(i,j) = C(i,oneBlock+j);
01461 }
01462 }
01463
01464
01465 if (om_->isVerbosity( Debug ) ) {
01466 Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(threeBlocks,oneBlock),
01467 tmp2(oneBlock,oneBlock);
01468 MagnitudeType tmp;
01469 int teuchosret;
01470 stringstream os;
01471 os.precision(2);
01472 os.setf(ios::scientific, ios::floatfield);
01473
01474 os << " Checking Full Ortho: iteration " << iter_ << endl;
01475
01476
01477
01478 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,MM,CX,ZERO);
01479 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01480
01481 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,CX,tmp1,ZERO);
01482 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01483
01484 for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01485 tmp = tmp2.normFrobenius();
01486 os << " >> Error in CX^H MM CX == I : " << tmp << endl;
01487
01488
01489
01490 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,MM,CP,ZERO);
01491 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01492
01493 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,CP,tmp1,ZERO);
01494 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01495
01496 for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01497 tmp = tmp2.normFrobenius();
01498 os << " >> Error in CP^H MM CP == I : " << tmp << endl;
01499
01500
01501
01502 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,MM,CP,ZERO);
01503 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01504
01505 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,CX,tmp1,ZERO);
01506 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01507
01508 tmp = tmp2.normFrobenius();
01509 os << " >> Error in CX^H MM CP == 0 : " << tmp << endl;
01510
01511 os << endl;
01512 om_->print(Debug,os.str());
01513 }
01514 }
01515 else {
01516
01517
01518
01519
01520
01521
01522
01523
01524
01525
01526 Teuchos::SerialDenseMatrix<int,ScalarType> S1(Teuchos::View,S,threeBlocks,oneBlock);
01527 CX.assign(S1);
01528 CP.assign(S1);
01529
01530 for (int j=0; j<oneBlock; j++) {
01531 for (int i=0; i<oneBlock; i++) {
01532 CP(i,j) = ZERO;
01533 }
01534 }
01535 }
01536
01537
01538
01539
01540
01541 hasP_ = false;
01542 {
01543 Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01544
01545 TEST_FOR_EXCEPTION(localSize==oneBlock,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error in local update.");
01546 if (localSize == twoBlocks) {
01547 Teuchos::SerialDenseMatrix<int,ScalarType> CX1( Teuchos::View, CX, blockSize_, blockSize_ ),
01548 CX2( Teuchos::View, CX, blockSize_, blockSize_, oneBlock, 0 ),
01549 CP1( Teuchos::View, CP, blockSize_, blockSize_ ),
01550 CP2( Teuchos::View, CP, blockSize_, blockSize_, oneBlock, 0 );
01551
01552 hasP_ = true;
01553
01554
01555
01556
01557
01558
01559
01560
01561
01562 MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );
01563
01564 MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *X_ );
01565 MVT::MvTimesMatAddMv( ONE, *H_, CX2, ONE , *X_ );
01566
01567
01568 if (fullOrtho_) {
01569 MVT::MvTimesMatAddMv( ONE, *R_, CP1, ZERO, *P_ );
01570 MVT::MvTimesMatAddMv( ONE, *H_, CP2, ONE, *P_ );
01571 }
01572 else {
01573 MVT::MvTimesMatAddMv( ONE, *H_, CP2, ZERO, *P_ );
01574 }
01575
01576
01577 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
01578
01579 MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *KX_ );
01580 MVT::MvTimesMatAddMv( ONE, *KH_, CX2, ONE , *KX_ );
01581
01582
01583 if (fullOrtho_) {
01584 MVT::MvTimesMatAddMv( ONE, *R_, CP1, ZERO, *KP_ );
01585 MVT::MvTimesMatAddMv( ONE, *KH_, CP2, ONE, *KP_ );
01586 }
01587 else {
01588 MVT::MvTimesMatAddMv( ONE, *KH_, CP2, ZERO, *KP_ );
01589 }
01590
01591 if (hasM_) {
01592
01593 MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );
01594
01595 MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *MX_ );
01596 MVT::MvTimesMatAddMv( ONE, *MH_, CX2, ONE , *MX_ );
01597
01598
01599 if (fullOrtho_) {
01600 MVT::MvTimesMatAddMv( ONE, *R_, CP1, ZERO, *MP_ );
01601 MVT::MvTimesMatAddMv( ONE, *MH_, CP2, ONE, *MP_ );
01602 }
01603 else {
01604 MVT::MvTimesMatAddMv( ONE, *MH_, CP2, ZERO, *MP_ );
01605 }
01606 }
01607
01608 }
01609 else if (localSize == threeBlocks) {
01610 Teuchos::SerialDenseMatrix<int,ScalarType> CX1( Teuchos::View, CX, blockSize_, blockSize_ ),
01611 CX2( Teuchos::View, CX, blockSize_, blockSize_, oneBlock, 0 ),
01612 CX3( Teuchos::View, CX, blockSize_, blockSize_, twoBlocks, 0 ),
01613 CP1( Teuchos::View, CP, blockSize_, blockSize_ ),
01614 CP2( Teuchos::View, CP, blockSize_, blockSize_, oneBlock, 0 ),
01615 CP3( Teuchos::View, CP, blockSize_, blockSize_, twoBlocks, 0 );
01616
01617 hasP_ = true;
01618
01619
01620
01621 if (fullOrtho_) {
01622
01623 MVT::MvAddMv(ONE,*X_, ZERO,*X_, *R_);
01624 MVT::MvAddMv(ONE,*P_, ZERO,*P_, *tmpMV_);
01625
01626
01627 MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *X_ );
01628 MVT::MvTimesMatAddMv( ONE, *tmpMV_, CX3, ONE, *X_ );
01629 MVT::MvTimesMatAddMv( ONE, *R_, CP1, ZERO, *P_ );
01630 MVT::MvTimesMatAddMv( ONE, *tmpMV_, CP3, ONE, *P_ );
01631 }
01632 else {
01633
01634 MVT::MvAddMv(ONE,*X_, ZERO,*X_, *R_);
01635
01636
01637 MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *X_ );
01638 MVT::MvTimesMatAddMv( ONE, *P_, CX3, ONE, *X_ );
01639
01640 MVT::MvAddMv(ONE,*P_, ZERO,*P_, *R_);
01641 MVT::MvTimesMatAddMv( ONE, *R_, CP3, ZERO, *P_ );
01642 }
01643
01644
01645 MVT::MvTimesMatAddMv( ONE, *H_, CX2, ONE, *X_ );
01646 MVT::MvTimesMatAddMv( ONE, *H_, CP2, ONE, *P_ );
01647
01648
01649
01650
01651 if (fullOrtho_) {
01652
01653 MVT::MvAddMv(ONE,*KX_, ZERO,*KX_, *R_);
01654 MVT::MvAddMv(ONE,*KP_, ZERO,*KP_, *tmpMV_);
01655
01656
01657 MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *KX_ );
01658 MVT::MvTimesMatAddMv( ONE, *tmpMV_, CX3, ONE, *KX_ );
01659 MVT::MvTimesMatAddMv( ONE, *R_, CP1, ZERO, *KP_ );
01660 MVT::MvTimesMatAddMv( ONE, *tmpMV_, CP3, ONE, *KP_ );
01661 }
01662 else {
01663
01664 MVT::MvAddMv(ONE,*KX_, ZERO,*KX_, *R_);
01665
01666
01667 MVT::MvTimesMatAddMv( ONE, *R_ , CX1, ZERO, *KX_ );
01668 MVT::MvTimesMatAddMv( ONE, *KP_, CX3, ONE, *KX_ );
01669
01670 MVT::MvAddMv(ONE,*KP_, ZERO,*KP_, *R_);
01671 MVT::MvTimesMatAddMv( ONE, *R_, CP3, ZERO, *KP_ );
01672 }
01673
01674
01675 MVT::MvTimesMatAddMv( ONE, *KH_, CX2, ONE, *KX_ );
01676 MVT::MvTimesMatAddMv( ONE, *KH_, CP2, ONE, *KP_ );
01677
01678
01679 if (hasM_) {
01680
01681
01682 if (fullOrtho_) {
01683
01684 MVT::MvAddMv(ONE,*MX_, ZERO,*MX_, *R_);
01685 MVT::MvAddMv(ONE,*MP_, ZERO,*MP_, *tmpMV_);
01686
01687
01688 MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *MX_ );
01689 MVT::MvTimesMatAddMv( ONE, *tmpMV_, CX3, ONE, *MX_ );
01690 MVT::MvTimesMatAddMv( ONE, *R_, CP1, ZERO, *MP_ );
01691 MVT::MvTimesMatAddMv( ONE, *tmpMV_, CP3, ONE, *MP_ );
01692 }
01693 else {
01694
01695 MVT::MvAddMv(ONE,*MX_, ZERO,*MX_, *R_);
01696
01697
01698 MVT::MvTimesMatAddMv( ONE, *R_ , CX1, ZERO, *MX_ );
01699 MVT::MvTimesMatAddMv( ONE, *MP_, CX3, ONE, *MX_ );
01700
01701 MVT::MvAddMv(ONE,*MP_, ZERO,*MP_, *R_);
01702 MVT::MvTimesMatAddMv( ONE, *R_, CP3, ZERO, *MP_ );
01703 }
01704
01705
01706 MVT::MvTimesMatAddMv( ONE, *MH_, CX2, ONE, *MX_ );
01707 MVT::MvTimesMatAddMv( ONE, *MH_, CP2, ONE, *MP_ );
01708 }
01709
01710 }
01711 }
01712
01713
01714 {
01715 Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01716 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
01717 Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ );
01718 for (int i = 0; i < blockSize_; i++) {
01719 T(i,i) = theta_[i];
01720 }
01721 MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ );
01722 }
01723
01724
01725 Rnorms_current_ = false;
01726 R2norms_current_ = false;
01727
01728
01729
01730 if (om_->isVerbosity( Debug ) ) {
01731
01732 CheckList chk;
01733 chk.checkX = true;
01734 chk.checkKX = true;
01735 chk.checkMX = true;
01736 chk.checkP = true;
01737 chk.checkKP = true;
01738 chk.checkMP = true;
01739 chk.checkR = true;
01740 om_->print( Debug, accuracyCheck(chk, ": after local update") );
01741 }
01742 else if (om_->isVerbosity( OrthoDetails )) {
01743 CheckList chk;
01744 chk.checkX = true;
01745 chk.checkP = true;
01746 chk.checkR = true;
01747 om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") );
01748 }
01749 }
01750 }
01751
01752
01754
01755 template <class ScalarType, class MV, class OP>
01756 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType>
01757 LOBPCG<ScalarType,MV,OP>::getResNorms() {
01758 if (Rnorms_current_ == false) {
01759
01760 orthman_->norm(*R_,&Rnorms_);
01761 Rnorms_current_ = true;
01762 }
01763 return Rnorms_;
01764 }
01765
01766
01768
01769 template <class ScalarType, class MV, class OP>
01770 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType>
01771 LOBPCG<ScalarType,MV,OP>::getRes2Norms() {
01772 if (R2norms_current_ == false) {
01773
01774 MVT::MvNorm(*R_,&R2norms_);
01775 R2norms_current_ = true;
01776 }
01777 return R2norms_;
01778 }
01779
01780
01781
01782
01784
01785
01786
01787
01788
01789
01790
01791
01792
01793
01794
01795
01796
01797
01798
01799
01800
01801
01802
01803
01804
01805
01806
01807
01808
01809
01810 template <class ScalarType, class MV, class OP>
01811 std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const string &where ) const
01812 {
01813 stringstream os;
01814 os.precision(2);
01815 os.setf(ios::scientific, ios::floatfield);
01816 MagnitudeType tmp;
01817
01818 os << " Debugging checks: iteration " << iter_ << where << endl;
01819
01820
01821 if (chk.checkX && initialized_) {
01822 tmp = orthman_->orthonormError(*X_);
01823 os << " >> Error in X^H M X == I : " << tmp << endl;
01824 for (unsigned int i=0; i<auxVecs_.size(); i++) {
01825 tmp = orthman_->orthogError(*X_,*auxVecs_[i]);
01826 os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl;
01827 }
01828 }
01829 if (chk.checkMX && hasM_ && initialized_) {
01830 tmp = MSUtils_.errorEquality(X_.get(), MX_.get(), MOp_.get());
01831 os << " >> Error in MX == M*X : " << tmp << endl;
01832 }
01833 if (chk.checkKX && initialized_) {
01834 tmp = MSUtils_.errorEquality(X_.get(), KX_.get(), Op_.get());
01835 os << " >> Error in KX == K*X : " << tmp << endl;
01836 }
01837
01838
01839 if (chk.checkP && hasP_ && initialized_) {
01840 if (fullOrtho_) {
01841 tmp = orthman_->orthonormError(*P_);
01842 os << " >> Error in P^H M P == I : " << tmp << endl;
01843 tmp = orthman_->orthogError(*P_,*X_);
01844 os << " >> Error in P^H M X == 0 : " << tmp << endl;
01845 }
01846 for (unsigned int i=0; i<auxVecs_.size(); i++) {
01847 tmp = orthman_->orthogError(*P_,*auxVecs_[i]);
01848 os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl;
01849 }
01850 }
01851 if (chk.checkMP && hasM_ && hasP_ && initialized_) {
01852 tmp = MSUtils_.errorEquality(P_.get(), MP_.get(), MOp_.get());
01853 os << " >> Error in MP == M*P : " << tmp << endl;
01854 }
01855 if (chk.checkKP && hasP_ && initialized_) {
01856 tmp = MSUtils_.errorEquality(P_.get(), KP_.get(), Op_.get());
01857 os << " >> Error in KP == K*P : " << tmp << endl;
01858 }
01859
01860
01861 if (chk.checkH && initialized_) {
01862 if (fullOrtho_) {
01863 tmp = orthman_->orthonormError(*H_);
01864 os << " >> Error in H^H M H == I : " << tmp << endl;
01865 tmp = orthman_->orthogError(*H_,*X_);
01866 os << " >> Error in H^H M X == 0 : " << tmp << endl;
01867 if (hasP_) {
01868 tmp = orthman_->orthogError(*H_,*P_);
01869 os << " >> Error in H^H M P == 0 : " << tmp << endl;
01870 }
01871 }
01872 for (unsigned int i=0; i<auxVecs_.size(); i++) {
01873 tmp = orthman_->orthogError(*H_,*auxVecs_[i]);
01874 os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl;
01875 }
01876 }
01877 if (chk.checkMH && hasM_ && initialized_) {
01878 tmp = MSUtils_.errorEquality(H_.get(), MH_.get(), MOp_.get());
01879 os << " >> Error in MH == M*H : " << tmp << endl;
01880 }
01881
01882
01883 if (chk.checkR && initialized_) {
01884 Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_);
01885 MVT::MvTransMv(ONE,*X_,*R_,xTx);
01886 tmp = xTx.normFrobenius();
01887 os << " >> Error in X^H R == 0 : " << tmp << endl;
01888 }
01889
01890
01891 if (chk.checkQ) {
01892 for (unsigned int i=0; i<auxVecs_.size(); i++) {
01893 tmp = orthman_->orthonormError(*auxVecs_[i]);
01894 os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl;
01895 for (unsigned int j=i+1; j<auxVecs_.size(); j++) {
01896 tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]);
01897 os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl;
01898 }
01899 }
01900 }
01901
01902 os << endl;
01903
01904 return os.str();
01905 }
01906
01907
01909
01910 template <class ScalarType, class MV, class OP>
01911 void
01912 LOBPCG<ScalarType,MV,OP>::currentStatus(ostream &os)
01913 {
01914 os.setf(ios::scientific, ios::floatfield);
01915 os.precision(6);
01916 os <<endl;
01917 os <<"================================================================================" << endl;
01918 os << endl;
01919 os <<" LOBPCG Solver Status" << endl;
01920 os << endl;
01921 os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl;
01922 os <<"The number of iterations performed is " << iter_ << endl;
01923 os <<"The current block size is " << blockSize_ << endl;
01924 os <<"The number of auxiliary vectors is " << numAuxVecs_ << endl;
01925 os <<"The number of operations Op*x is " << count_ApplyOp_ << endl;
01926 os <<"The number of operations M*x is " << count_ApplyM_ << endl;
01927 os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl;
01928
01929 os.setf(ios_base::right, ios_base::adjustfield);
01930
01931 if (initialized_) {
01932 os << endl;
01933 os <<"CURRENT EIGENVALUE ESTIMATES "<<endl;
01934 os << std::setw(20) << "Eigenvalue"
01935 << std::setw(20) << "Residual(M)"
01936 << std::setw(20) << "Residual(2)"
01937 << endl;
01938 os <<"--------------------------------------------------------------------------------"<<endl;
01939 for (int i=0; i<blockSize_; i++) {
01940 os << std::setw(20) << theta_[i];
01941 if (Rnorms_current_) os << std::setw(20) << Rnorms_[i];
01942 else os << std::setw(20) << "not current";
01943 if (R2norms_current_) os << std::setw(20) << R2norms_[i];
01944 else os << std::setw(20) << "not current";
01945 os << endl;
01946 }
01947 }
01948 os <<"================================================================================" << endl;
01949 os << endl;
01950 }
01951
01952
01953 }
01954
01955 #endif // ANASAZI_LOBPCG_HPP