Anasazi: AnasaziLOBPCG.hpp Source File

00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 
00034 /*
00035     LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities
00036       X,H,P,R
00037       KX,KH,KP  (product of K and the above)
00038       MX,MH,MP  (product of M and the above, not allocated if we don't have an M matrix)
00039     If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to 
00040     compute the local update of X and P.
00041     
00042     A solver is bound to an eigenproblem at declaration.
00043     Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically.
00044     
00045     The orthogonalization manager is used to project away from the auxiliary vectors.
00046     If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis.
00047     The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product.
00048     LOBPCG will not work correctly if the orthomanager uses a different inner product.
00049  */
00050 
00051 
00052 #ifndef ANASAZI_LOBPCG_HPP
00053 #define ANASAZI_LOBPCG_HPP
00054 
00055 #include "AnasaziTypes.hpp"
00056 
00057 #include "AnasaziEigensolver.hpp"
00058 #include "AnasaziMultiVecTraits.hpp"
00059 #include "AnasaziOperatorTraits.hpp"
00060 #include "Teuchos_ScalarTraits.hpp"
00061 
00062 #include "AnasaziMatOrthoManager.hpp"
00063 #include "AnasaziModalSolverUtils.hpp"
00064 
00065 #include "Teuchos_LAPACK.hpp"
00066 #include "Teuchos_BLAS.hpp"
00067 #include "Teuchos_SerialDenseMatrix.hpp"
00068 #include "Teuchos_ParameterList.hpp"
00069 #include "Teuchos_TimeMonitor.hpp"
00070 
00092 namespace Anasazi {
00093 
00095 
00096 
00101   template <class ScalarType, class MV>
00102   struct LOBPCGState {
00104     Teuchos::RefCountPtr<const MV> X; 
00106     Teuchos::RefCountPtr<const MV> KX; 
00108     Teuchos::RefCountPtr<const MV> MX;
00110     Teuchos::RefCountPtr<const MV> P; 
00112     Teuchos::RefCountPtr<const MV> KP; 
00114     Teuchos::RefCountPtr<const MV> MP;
00119     Teuchos::RefCountPtr<const MV> H; 
00121     Teuchos::RefCountPtr<const MV> KH; 
00123     Teuchos::RefCountPtr<const MV> MH;
00125     Teuchos::RefCountPtr<const MV> R;
00127     Teuchos::RefCountPtr<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T;
00128     LOBPCGState() : X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null),
00129                     P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null),
00130                     H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null),
00131                     R(Teuchos::null),T(Teuchos::null) {};
00132   };
00133 
00135 
00137 
00138 
00152   class LOBPCGRitzFailure : public AnasaziError {public:
00153     LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg)
00154     {}};
00155 
00168   class LOBPCGInitFailure : public AnasaziError {public:
00169     LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg)
00170     {}};
00171 
00188   class LOBPCGOrthoFailure : public AnasaziError {public:
00189     LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg)
00190     {}};
00191 
00193 
00194 
00195   template <class ScalarType, class MV, class OP>
00196   class LOBPCG : public Eigensolver<ScalarType,MV,OP> { 
00197   public:
00198     
00200 
00201     
00209     LOBPCG( const Teuchos::RefCountPtr<Eigenproblem<ScalarType,MV,OP> > &problem, 
00210             const Teuchos::RefCountPtr<SortManager<ScalarType,MV,OP> > &sorter,
00211             const Teuchos::RefCountPtr<OutputManager<ScalarType> > &printer,
00212             const Teuchos::RefCountPtr<StatusTest<ScalarType,MV,OP> > &tester,
00213             const Teuchos::RefCountPtr<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00214             Teuchos::ParameterList &params 
00215           );
00216     
00218     virtual ~LOBPCG() {};
00219 
00221 
00223 
00224     
00251     void iterate();
00252 
00276     void initialize(LOBPCGState<ScalarType,MV> newstate);
00277 
00281     void initialize();
00282 
00302     bool isInitialized() { return initialized_; }
00303 
00314     LOBPCGState<ScalarType,MV> getState() const {
00315       LOBPCGState<ScalarType,MV> state;
00316       state.X = X_;
00317       state.KX = KX_;
00318       state.P = P_;
00319       state.KP = KP_;
00320       state.H = H_;
00321       state.KH = KH_;
00322       state.R = R_;
00323       state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_));
00324       if (hasM_) {
00325         state.MX = MX_;
00326         state.MP = MP_;
00327         state.MH = MH_;
00328       }
00329       else {
00330         state.MX = Teuchos::null;
00331         state.MP = Teuchos::null;
00332         state.MH = Teuchos::null;
00333       }
00334       return state;
00335     }
00336 
00338 
00340 
00341 
00343     int getNumIters() const { return(iter_); };
00344 
00346     void resetNumIters() { iter_=0; };
00347 
00355     Teuchos::RefCountPtr<const MV> getRitzVectors() {return X_;}
00356 
00362     std::vector<Value<ScalarType> > getRitzValues() { 
00363       std::vector<Value<ScalarType> > ret(nevLocal_);
00364       for (int i=0; i<nevLocal_; i++) {
00365         ret[i].realpart = theta_[i];
00366         ret[i].imagpart = ZERO;
00367       }
00368       return ret;
00369     }
00370 
00378     std::vector<int> getRitzIndex() {
00379       std::vector<int> ret(nevLocal_,0);
00380       return ret;
00381     }
00382 
00383 
00384 
00385 
00391     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms();
00392 
00393 
00399     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms();
00400 
00401 
00406     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms() {
00407       std::vector<MagnitudeType> ret = ritz2norms_;
00408       ret.resize(nevLocal_);
00409       return ret;
00410     }
00411 
00412 
00421     int getCurSubspaceDim() const {
00422       if (!initialized_) return 0;
00423       return nevLocal_;
00424     }
00425 
00429     int getMaxSubspaceDim() const {return 3*blockSize_;}
00430 
00432 
00434 
00435 
00436 
00438     const Eigenproblem<ScalarType,MV,OP>& getProblem() const { return(*problem_); };
00439 
00440 
00449     void setBlockSize(int blockSize);
00450 
00451 
00453     int getBlockSize() const { return(blockSize_); }
00454 
00455 
00467     void setAuxVecs(const Teuchos::Array<Teuchos::RefCountPtr<const MV> > &auxvecs);
00468 
00470     Teuchos::Array<Teuchos::RefCountPtr<const MV> > getAuxVecs() const {return auxVecs_;}
00471 
00473 
00475 
00476 
00482     void setFullOrtho(bool fullOrtho);
00483 
00485     bool getFullOrtho() const { return(fullOrtho_); }
00486     
00488     bool hasP() {return hasP_;}
00489 
00491     
00493 
00494 
00496     void currentStatus(ostream &os);
00497 
00499 
00500   private:
00501     //
00502     // Convenience typedefs
00503     //
00504     typedef MultiVecTraits<ScalarType,MV> MVT;
00505     typedef OperatorTraits<ScalarType,MV,OP> OPT;
00506     typedef Teuchos::ScalarTraits<ScalarType> SCT;
00507     typedef typename SCT::magnitudeType MagnitudeType;
00508     const MagnitudeType ONE;  
00509     const MagnitudeType ZERO; 
00510     const MagnitudeType NANVAL;
00511     //
00512     // Internal structs
00513     //
00514     struct CheckList {
00515       bool checkX, checkMX, checkKX;
00516       bool checkH, checkMH;
00517       bool checkP, checkMP, checkKP;
00518       bool checkR, checkQ;
00519       CheckList() : checkX(false),checkMX(false),checkKX(false),
00520                     checkH(false),checkMH(false),
00521                     checkP(false),checkMP(false),checkKP(false),
00522                     checkR(false),checkQ(false) {};
00523     };
00524     //
00525     // Internal methods
00526     //
00527     string accuracyCheck(const CheckList &chk, const string &where) const;
00528     //
00529     // Classes inputed through constructor that define the eigenproblem to be solved.
00530     //
00531     const Teuchos::RefCountPtr<Eigenproblem<ScalarType,MV,OP> >     problem_;
00532     const Teuchos::RefCountPtr<SortManager<ScalarType,MV,OP> >      sm_;
00533     const Teuchos::RefCountPtr<OutputManager<ScalarType> >          om_;
00534     const Teuchos::RefCountPtr<StatusTest<ScalarType,MV,OP> >       tester_;
00535     const Teuchos::RefCountPtr<MatOrthoManager<ScalarType,MV,OP> >  orthman_;
00536     //
00537     // Information obtained from the eigenproblem
00538     //
00539     Teuchos::RefCountPtr<OP> Op_;
00540     Teuchos::RefCountPtr<OP> MOp_;
00541     Teuchos::RefCountPtr<OP> Prec_;
00542     bool hasM_;
00543     //
00544     // Internal utilities class required by eigensolver.
00545     //
00546     ModalSolverUtils<ScalarType,MV,OP> MSUtils_;
00547     //
00548     // Internal timers
00549     //
00550     Teuchos::RefCountPtr<Teuchos::Time> timerOp_, timerMOp_, timerPrec_,
00551                                         timerSort_, 
00552                                         timerLocalProj_, timerDS_,
00553                                         timerLocalUpdate_, timerCompRes_,
00554                                         timerOrtho_, timerInit_;
00555     //
00556     // Counters
00557     //
00558     // Number of operator applications
00559     int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_;
00560 
00561     //
00562     // Algorithmic parameters.
00563     //
00564     // blockSize_ is the solver block size
00565     int blockSize_;
00566     //
00567     // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should
00568     // be activated (see citations at the top of this file)
00569     bool fullOrtho_;
00570 
00571     //
00572     // Current solver state
00573     //
00574     // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
00575     // is capable of running; _initialize is controlled  by the initialize() member method
00576     // For the implications of the state of initialized_, please see documentation for initialize()
00577     bool initialized_;
00578     //
00579     // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_)
00580     // this tells us how many of the values in theta_ are valid Ritz values
00581     int nevLocal_;
00582     //
00583     // hasP_ tells us whether there is valid data in P (and KP,MP)
00584     bool hasP_;
00585     //
00586     // State Multivecs
00587     Teuchos::RefCountPtr<MV> X_, KX_, MX_, R_,
00588                              H_, KH_, MH_,
00589                              P_, KP_, MP_;
00590     // tmpMV is needed only if fullOrtho_ == true
00591     // because is depends on fullOrtho_, which is easily toggled by the user, we will allocate it 
00592     // and deallocate it inside of iterate()
00593     Teuchos::RefCountPtr<MV> tmpMV_;        
00594     // 
00595     // auxiliary vectors
00596     Teuchos::Array<Teuchos::RefCountPtr<const MV> > auxVecs_;
00597     int numAuxVecs_;
00598     //
00599     // Number of iterations that have been performed.
00600     int iter_;
00601     // 
00602     // Current eigenvalues, residual norms
00603     std::vector<MagnitudeType> theta_, Rnorms_, R2norms_, ritz2norms_;
00604     // 
00605     // are the residual norms current with the residual?
00606     bool Rnorms_current_, R2norms_current_;
00607 
00608   };
00609 
00610 
00611 
00612 
00614   // Constructor
00615   template <class ScalarType, class MV, class OP>
00616   LOBPCG<ScalarType,MV,OP>::LOBPCG(
00617         const Teuchos::RefCountPtr<Eigenproblem<ScalarType,MV,OP> > &problem, 
00618         const Teuchos::RefCountPtr<SortManager<ScalarType,MV,OP> > &sorter,
00619         const Teuchos::RefCountPtr<OutputManager<ScalarType> > &printer,
00620         const Teuchos::RefCountPtr<StatusTest<ScalarType,MV,OP> > &tester,
00621         const Teuchos::RefCountPtr<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00622         Teuchos::ParameterList &params
00623         ) :
00624     ONE(Teuchos::ScalarTraits<MagnitudeType>::one()),
00625     ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()),
00626     NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()),
00627     // problem, tools
00628     problem_(problem), 
00629     sm_(sorter),
00630     om_(printer),
00631     tester_(tester),
00632     orthman_(ortho),
00633     MSUtils_(om_),
00634     // timers, counters
00635     timerOp_(Teuchos::TimeMonitor::getNewTimer("Operation Op*x")),
00636     timerMOp_(Teuchos::TimeMonitor::getNewTimer("Operation M*x")),
00637     timerPrec_(Teuchos::TimeMonitor::getNewTimer("Operation Prec*x")),
00638     timerSort_(Teuchos::TimeMonitor::getNewTimer("Sorting eigenvalues")),
00639     timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("Local projection")),
00640     timerDS_(Teuchos::TimeMonitor::getNewTimer("Direct solve")),
00641     timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("Local update")),
00642     timerCompRes_(Teuchos::TimeMonitor::getNewTimer("Computing residuals")),
00643     timerOrtho_(Teuchos::TimeMonitor::getNewTimer("Orthogonalization")),
00644     timerInit_(Teuchos::TimeMonitor::getNewTimer("Initialization")),
00645     count_ApplyOp_(0),
00646     count_ApplyM_(0),
00647     count_ApplyPrec_(0),
00648     // internal data
00649     blockSize_(0),
00650     fullOrtho_(params.get("Full Ortho", true)),
00651     initialized_(false),
00652     nevLocal_(0),
00653     hasP_(false),
00654     auxVecs_( Teuchos::Array<Teuchos::RefCountPtr<const MV> >(0) ), 
00655     numAuxVecs_(0),
00656     iter_(0),
00657     Rnorms_current_(false),
00658     R2norms_current_(false)
00659   {     
00660     TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument,
00661                        "Anasazi::LOBPCG::constructor: user passed null problem pointer.");
00662     TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument,
00663                        "Anasazi::LOBPCG::constructor: user passed null sort manager pointer.");
00664     TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument,
00665                        "Anasazi::LOBPCG::constructor: user passed null output manager pointer.");
00666     TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument,
00667                        "Anasazi::LOBPCG::constructor: user passed null status test pointer.");
00668     TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument,
00669                        "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer.");
00670     TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument,
00671                        "Anasazi::LOBPCG::constructor: problem is not set.");
00672     TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument,
00673                        "Anasazi::LOBPCG::constructor: problem is not hermitian.");
00674 
00675     // get the problem operators
00676     Op_   = problem_->getOperator();
00677     TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument,
00678                        "Anasazi::LOBPCG::constructor: problem provides no operator.");
00679     MOp_  = problem_->getM();
00680     Prec_ = problem_->getPrec();
00681     hasM_ = (MOp_ != Teuchos::null);
00682 
00683     // set the block size and allocate data
00684     int bs = params.get("Block Size", problem_->getNEV());
00685     setBlockSize(bs);
00686   }
00687 
00688 
00690   // Set the block size and make necessary adjustments.
00691   template <class ScalarType, class MV, class OP>
00692   void LOBPCG<ScalarType,MV,OP>::setBlockSize (int blockSize) 
00693   {
00694     // time spent here counts towards timerInit_
00695     Teuchos::TimeMonitor lcltimer( *timerInit_ );
00696 
00697 
00698     // This routine only allocates space; it doesn't not perform any computation
00699     // if size is decreased, take the first blockSize vectors of all and leave state as is
00700     // otherwise, grow/allocate space and set solver to unitialized
00701 
00702     TEST_FOR_EXCEPTION(blockSize <= 0, std::invalid_argument, "Anasazi::LOBPCG::setBlockSize was passed a non-positive block size");
00703     if (blockSize == blockSize_) {
00704       // do nothing
00705       return;
00706     }
00707     else if (blockSize < blockSize_) {
00708       // shrink vectors
00709       blockSize_ = blockSize;
00710 
00711       theta_.resize(3*blockSize_);
00712       ritz2norms_.resize(3*blockSize_);
00713       Rnorms_.resize(blockSize_);
00714       R2norms_.resize(blockSize_);
00715 
00716       if (initialized_) {
00717         // shrink multivectors with copy
00718         // create ind = {0, 1, ..., blockSize-1}
00719         std::vector<int> ind(blockSize_);
00720         for (int i=0; i<blockSize_; i++) ind[i] = i;
00721         
00722         X_  = MVT::CloneCopy(*X_,ind);
00723         KX_ = MVT::CloneCopy(*KX_,ind);
00724         if (hasM_) {
00725           MX_ = MVT::CloneCopy(*MX_,ind);
00726         }
00727         else {
00728           MX_ = X_;
00729         }
00730         R_  = MVT::CloneCopy(*R_,ind);
00731         P_  = MVT::CloneCopy(*P_,ind);
00732         KP_ = MVT::CloneCopy(*KP_,ind);
00733         if (hasM_) {
00734           MP_ = MVT::CloneCopy(*MP_,ind);
00735         }
00736         else {
00737           MP_ = P_;
00738         }
00739       }
00740       else {
00741         // shrink multivectors without copying
00742         X_ = MVT::Clone(*X_,blockSize_);
00743         KX_ = MVT::Clone(*KX_,blockSize_);
00744         if (hasM_) {
00745           MX_ = MVT::Clone(*MX_,blockSize_);
00746         }
00747         else {
00748           MX_ = X_;
00749         }
00750         R_ = MVT::Clone(*R_,blockSize_);
00751         P_ = MVT::Clone(*P_,blockSize_);
00752         KP_ = MVT::Clone(*KP_,blockSize_);
00753         if (hasM_) {
00754           MP_ = MVT::Clone(*MP_,blockSize_);
00755         }
00756         else {
00757           MP_ = P_;
00758         }
00759       }
00760       // shrink H
00761       H_ = MVT::Clone(*H_,blockSize_);
00762       KH_ = MVT::Clone(*KH_,blockSize_);
00763       if (hasM_) {
00764         MH_ = MVT::Clone(*MH_,blockSize_);
00765       }
00766       else {
00767         MH_ = H_;
00768       }
00769     } 
00770     else {  // blockSize > blockSize_
00771       // this is also the scenario for our initial call to setBlockSize(), in the constructor
00772       initialized_ = false;
00773 
00774       Teuchos::RefCountPtr<const MV> tmp;
00775       // grab some Multivector to Clone
00776       // in practice, getInitVec() should always provide this, but it is possible to use a 
00777       // Eigenproblem with nothing in getInitVec() by manually initializing with initialize(); 
00778       // in case of that strange scenario, we will try to Clone from X_
00779       if (blockSize_ > 0) {
00780         tmp = X_;
00781       }
00782       else {
00783         tmp = problem_->getInitVec();
00784         TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error,
00785                            "Anasazi::LOBPCG::setBlockSize(): Eigenproblem did not specify initial vectors to clone from");
00786       }
00787       // grow/allocate vectors
00788       theta_.resize(3*blockSize,NANVAL);
00789       ritz2norms_.resize(3*blockSize_,NANVAL);
00790       Rnorms_.resize(blockSize,NANVAL);
00791       R2norms_.resize(blockSize,NANVAL);
00792       
00793       // clone multivectors off of tmp
00794       X_ = MVT::Clone(*tmp,blockSize);
00795       KX_ = MVT::Clone(*tmp,blockSize);
00796       if (hasM_) {
00797         MX_ = MVT::Clone(*tmp,blockSize);
00798       }
00799       else {
00800         MX_ = X_;
00801       }
00802       R_ = MVT::Clone(*tmp,blockSize);
00803       H_ = MVT::Clone(*tmp,blockSize);
00804       KH_ = MVT::Clone(*tmp,blockSize);
00805       if (hasM_) {
00806         MH_ = MVT::Clone(*tmp,blockSize);
00807       }
00808       else {
00809         MH_ = H_;
00810       }
00811       hasP_ = false;
00812       P_ = MVT::Clone(*tmp,blockSize);
00813       KP_ = MVT::Clone(*tmp,blockSize);
00814       if (hasM_) {
00815         MP_ = MVT::Clone(*tmp,blockSize);
00816       }
00817       else {
00818         MP_ = P_;
00819       }
00820       blockSize_ = blockSize;
00821     }
00822   }
00823 
00824 
00826   // Set the auxiliary vectors
00827   template <class ScalarType, class MV, class OP>
00828   void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RefCountPtr<const MV> > &auxvecs) {
00829     typedef typename Teuchos::Array<Teuchos::RefCountPtr<const MV> >::iterator tarcpmv;
00830 
00831     // set new auxiliary vectors
00832     auxVecs_ = auxvecs;
00833     
00834     numAuxVecs_ = 0;
00835     for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) {
00836       numAuxVecs_ += MVT::GetNumberVecs(**i);
00837     }
00838     
00839     // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors
00840     if (numAuxVecs_ > 0 && initialized_) {
00841       initialized_ = false;
00842       hasP_ = false;
00843     }
00844 
00845     if (om_->isVerbosity( Debug ) ) {
00846       // Check almost everything here
00847       CheckList chk;
00848       chk.checkQ = true;
00849       om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") );
00850     }
00851   }
00852 
00853 
00855   /* Initialize the state of the solver
00856    * 
00857    * POST-CONDITIONS:
00858    *
00859    * initialized_ == true
00860    * X is orthonormal, orthogonal to auxVecs_
00861    * KX = Op*X
00862    * MX = M*X if hasM_
00863    * theta_ contains Ritz values of X
00864    * R = KX - MX*diag(theta_)
00865    * if hasP() == true,
00866    *   P orthogonal to auxVecs_
00867    *   if fullOrtho_ == true,
00868    *     P orthonormal and orthogonal to X
00869    *   KP = Op*P
00870    *   MP = M*P
00871    */
00872   template <class ScalarType, class MV, class OP>
00873   void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate)
00874   {
00875     // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone
00876     // NOTE: Time spent in this routine is allotted to timerInit_, in addition to the respective sections.
00877 
00878     hasP_ = false;  // this will be set to true below if appropriate
00879 
00880     std::vector<int> bsind(blockSize_);
00881     for (int i=0; i<blockSize_; i++) bsind[i] = i;
00882 
00883     // set up X: if the user doesn't specify X, ignore the rest
00884     if (newstate.X != Teuchos::null && MVT::GetNumberVecs(*newstate.X) >= blockSize_ && MVT::GetVecLength(*newstate.X) == MVT::GetVecLength(*X_) ) {
00885 
00886       Teuchos::TimeMonitor lcltimer( *timerInit_ );
00887 
00888       // put data in X,MX,KX
00889       MVT::SetBlock(*newstate.X,bsind,*X_);
00890       if (hasM_) {
00891         if (newstate.MX != Teuchos::null && MVT::GetNumberVecs(*newstate.MX) >= blockSize_ && MVT::GetVecLength(*newstate.MX) == MVT::GetVecLength(*MX_) ) {
00892           MVT::SetBlock(*newstate.MX,bsind,*MX_);
00893         }
00894         else {
00895           Teuchos::TimeMonitor lcltimer( *timerMOp_ );
00896           OPT::Apply(*MOp_,*X_,*MX_);
00897           count_ApplyM_ += blockSize_;
00898         }
00899       }
00900       else {
00901         // an assignment would be redundant; take advantage of this opportunity to debug a little
00902         TEST_FOR_EXCEPTION(MX_ != X_, std::logic_error, "Anasazi::LOBPCG::initialize(): solver invariant not satisfied");
00903       }
00904       if (newstate.KX != Teuchos::null && MVT::GetNumberVecs(*newstate.KX) >= blockSize_ && MVT::GetVecLength(*newstate.KX) == MVT::GetVecLength(*KX_) ) {
00905         MVT::SetBlock(*newstate.KX,bsind,*KX_);
00906       }
00907       else {
00908         Teuchos::TimeMonitor lcltimer( *timerOp_ );
00909         OPT::Apply(*Op_,*X_,*KX_);
00910         count_ApplyOp_ += blockSize_;
00911       }
00912 
00913       // set up Ritz values
00914       theta_.resize(3*blockSize_,NANVAL);
00915       ritz2norms_.resize(3*blockSize_,NANVAL);
00916       if (newstate.T != Teuchos::null && (signed int)(newstate.T->size()) >= blockSize_) {
00917         for (int i=0; i<blockSize_; i++) {
00918           theta_[i] = (*newstate.T)[i];
00919         }
00920       }
00921       else {
00922         // get ritz vecs/vals
00923         Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_),
00924                                                    MM(blockSize_,blockSize_),
00925                                                     S(blockSize_,blockSize_);
00926         {
00927           Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
00928           // project K
00929           MVT::MvTransMv(ONE,*X_,*KX_,KK);
00930           // project M
00931           MVT::MvTransMv(ONE,*X_,*MX_,MM);
00932           nevLocal_ = blockSize_;
00933         }
00934 
00935         // solve the projected problem
00936         {
00937           Teuchos::TimeMonitor lcltimer( *timerDS_ );
00938           MSUtils_.directSolver(blockSize_, KK, &MM, &S, &theta_, &nevLocal_, 1);
00939           TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure,
00940                              "Anasazi::LOBPCG::initialize(): Not enough Ritz vectors to initialize algorithm.");
00941         }
00942 
00943         // We only have blockSize_ ritz pairs, but we still want them in the correct order
00944         {
00945           Teuchos::TimeMonitor lcltimer( *timerSort_ );
00946 
00947           std::vector<int> order(blockSize_);
00948           // 
00949           // sort the first blockSize_ values in theta_
00950           sm_->sort( this, blockSize_, theta_, &order );   // don't catch exception
00951           //
00952           // apply the same ordering to the primitive ritz vectors
00953           MSUtils_.permuteVectors(order,S);
00954         }
00955 
00956         // compute ritz residual norms
00957         {
00958           Teuchos::BLAS<int,ScalarType> blas;
00959           Teuchos::SerialDenseMatrix<int,ScalarType> R(blockSize_,blockSize_);
00960           // R = MM*S*diag(theta) - KK*S
00961           R.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,MM,S,ZERO);
00962           for (int i=0; i<blockSize_; i++) {
00963             blas.SCAL(blockSize_,theta_[i],R[i],1);
00964           }
00965           R.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,-ONE,KK,S,ONE);
00966           for (int i=0; i<blockSize_; i++) {
00967             ritz2norms_[i] = blas.NRM2(blockSize_,R[i],1);
00968           }
00969         }
00970 
00971         // update the solution
00972         {
00973           Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
00974           // X <- X*S
00975           MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );        
00976           MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ );
00977           // KX <- KX*S
00978           MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );        
00979           MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ );
00980           if (hasM_) {
00981             // MX <- MX*S
00982             MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );        
00983             MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ );
00984           }
00985         }
00986       }
00987 
00988   
00989       // set up R
00990       if (newstate.R != Teuchos::null && MVT::GetNumberVecs(*newstate.R) >= blockSize_ && MVT::GetVecLength(*newstate.R) == MVT::GetVecLength(*R_) ) {
00991         MVT::SetBlock(*newstate.R,bsind,*R_);
00992       }
00993       else {
00994         Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
00995         // form R <- KX - MX*T
00996         MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_);
00997         Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_);
00998         T.putScalar(ZERO);
00999         for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i];
01000         MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_);
01001       }
01002 
01003       // R has been updated; mark the norms as out-of-date
01004       Rnorms_current_ = false;
01005       R2norms_current_ = false;
01006   
01007       // put data in P,KP,MP: P is not used to set theta
01008       if (newstate.P != Teuchos::null && MVT::GetNumberVecs(*newstate.P) >= blockSize_ && MVT::GetVecLength(*newstate.P) == MVT::GetVecLength(*P_) ) {
01009         hasP_ = true;
01010 
01011         MVT::SetBlock(*newstate.P,bsind,*P_);
01012 
01013         if (newstate.KP != Teuchos::null && MVT::GetNumberVecs(*newstate.KP) >= blockSize_ && MVT::GetVecLength(*newstate.KP) == MVT::GetVecLength(*KP_) ) {
01014           MVT::SetBlock(*newstate.KP,bsind,*KP_);
01015         }
01016         else {
01017           Teuchos::TimeMonitor lcltimer( *timerOp_ );
01018           OPT::Apply(*Op_,*P_,*KP_);
01019           count_ApplyOp_ += blockSize_;
01020         }
01021 
01022         if (hasM_) {
01023           if (newstate.MP != Teuchos::null && MVT::GetNumberVecs(*newstate.MP) >= blockSize_ && MVT::GetVecLength(*newstate.MP) == MVT::GetVecLength(*MP_) ) {
01024             MVT::SetBlock(*newstate.MP,bsind,*MP_);
01025           }
01026           else {
01027             Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01028             OPT::Apply(*MOp_,*P_,*MP_);
01029             count_ApplyM_ += blockSize_;
01030           }
01031         }
01032       }
01033 
01034       // finally, we are initialized
01035       initialized_ = true;
01036 
01037       if (om_->isVerbosity( Debug ) ) {
01038         // Check almost everything here
01039         CheckList chk;
01040         chk.checkX = true;
01041         chk.checkKX = true;
01042         chk.checkMX = true;
01043         chk.checkP = true;
01044         chk.checkKP = true;
01045         chk.checkMP = true;
01046         chk.checkR = true;
01047         chk.checkQ = true;
01048         om_->print( Debug, accuracyCheck(chk, ": after initialize()") );
01049       }
01050 
01051     }
01052     else {
01053 
01054       LOBPCGState<ScalarType,MV> newstate;
01055       { // begin timer scope
01056         Teuchos::TimeMonitor lcltimer( *timerInit_ );
01057 
01058         // generate something, projectAndNormalize, call myself recursively
01059         Teuchos::RefCountPtr<const MV> ivec = problem_->getInitVec();
01060         TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error,
01061                            "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from");
01062 
01063         int initSize = MVT::GetNumberVecs(*ivec);
01064         if (initSize > blockSize_) {
01065           // we need only the first blockSize_ vectors from ivec; get a view of them
01066           initSize = blockSize_;
01067           std::vector<int> ind(blockSize_);
01068           for (int i=0; i<blockSize_; i++) ind[i] = i;
01069           ivec = MVT::CloneView(*ivec,ind);
01070         }
01071 
01072         // alloc newX
01073         Teuchos::RefCountPtr<MV> newMX, newX = MVT::Clone(*ivec,blockSize_);
01074         // assign ivec to first part of newX
01075         std::vector<int> ind(initSize);
01076         if (initSize > 0) {
01077           for (int i=0; i<initSize; i++) ind[i] = i;
01078           MVT::SetBlock(*ivec,ind,*newX);
01079         }
01080         // fill the rest of newX with random
01081         if (blockSize_ > initSize) {
01082           ind.resize(blockSize_ - initSize);
01083           for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i;
01084           Teuchos::RefCountPtr<MV> rX = MVT::CloneView(*newX,ind);
01085           MVT::MvRandom(*rX);
01086           rX = Teuchos::null;
01087         }
01088 
01089         // compute newMX if hasM_
01090         if (hasM_) {
01091           newMX = MVT::Clone(*ivec,blockSize_);
01092           {
01093             Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01094             OPT::Apply(*MOp_,*newX,*newMX);
01095             count_ApplyM_ += blockSize_;
01096           }
01097         }
01098         else {
01099           newMX = Teuchos::null;
01100         }
01101 
01102         // remove auxVecs from newX and normalize newX
01103         if (auxVecs_.size() > 0) {
01104           Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01105           Teuchos::Array<Teuchos::RefCountPtr<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy;
01106           int rank = orthman_->projectAndNormalize(*newX,newMX,dummy,Teuchos::null,auxVecs_);
01107           TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGInitFailure,
01108                              "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01109         }
01110         else {
01111           Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01112           int rank = orthman_->normalize(*newX,newMX,Teuchos::null);
01113           TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGInitFailure,
01114                              "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01115         }
01116 
01117         // call myself recursively
01118         newstate.X = newX;
01119         newstate.MX = newMX;
01120       } // end of timer scope; we needed this because the following recursive call to initialize contains its own call to timerInit_
01121       initialize(newstate);
01122     }
01123   }
01124 
01125   template <class ScalarType, class MV, class OP>
01126   void LOBPCG<ScalarType,MV,OP>::initialize()
01127   {
01128     LOBPCGState<ScalarType,MV> empty;
01129     initialize(empty);
01130   }
01131 
01132 
01134   // Instruct the solver to use full orthogonalization
01135   template <class ScalarType, class MV, class OP>
01136   void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho)
01137   {
01138     if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) {
01139       // state is already orthogonalized or solver is not initialized
01140       fullOrtho_ = fullOrtho;
01141       return;
01142     }
01143 
01144     // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization
01145     fullOrtho_ = true;
01146     // throw away data in P
01147     hasP_ = false;
01148   }
01149 
01150 
01152   // Perform LOBPCG iterations until the StatusTest tells us to stop.
01153   template <class ScalarType, class MV, class OP>
01154   void LOBPCG<ScalarType,MV,OP>::iterate () 
01155   {
01156     //
01157     // Allocate/initialize data structures
01158     //
01159     if (initialized_ == false) {
01160       initialize();
01161     }
01162 
01163     // if fullOrtho_ == true, then we must produce the following on every iteration:
01164     // [newX newP] = [X H P] [CX;CP]
01165     // the structure of [CX;CP] when using full orthogonalization does not allow us to 
01166     // do this in place, and R_ does not have enough storage for newX and newP. therefore, 
01167     // we must allocate additional storage for this.
01168     // otherwise, when not using full orthogonalization, the structure
01169     // [newX newP] = [X H P] [CX1  0 ]
01170     //                       [CX2 CP2]  allows us to work using only R as work space
01171     //                       [CX3 CP3] 
01172     if (fullOrtho_) {
01173       if (tmpMV_ == Teuchos::null || MVT::GetNumberVecs(*tmpMV_) != blockSize_) {
01174         tmpMV_ = MVT::Clone(*X_,blockSize_);
01175       }
01176     }
01177     else {
01178       tmpMV_ = Teuchos::null;
01179     }
01180 
01181     //
01182     // Miscellaneous definitions
01183     const int oneBlock    =   blockSize_;
01184     const int twoBlocks   = 2*blockSize_;
01185     const int threeBlocks = 3*blockSize_;
01186     
01187     //
01188     // Define dense projected/local matrices
01189     //   KK = Local stiffness matrix               (size: 3*blockSize_ x 3*blockSize_)
01190     //   MM = Local mass matrix                    (size: 3*blockSize_ x 3*blockSize_)
01191     //    S = Local eigenvectors                   (size: 3*blockSize_ x 3*blockSize_)
01192     Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ), 
01193                                                MM( threeBlocks, threeBlocks ),
01194                                                 S( threeBlocks, threeBlocks );
01195 
01196     while (tester_->checkStatus(this) != Passed) {
01197 
01198       // Print information on current status
01199       if (om_->isVerbosity(Debug)) {
01200         currentStatus( om_->stream(Debug) );
01201       }
01202       else if (om_->isVerbosity(IterationDetails)) {
01203         currentStatus( om_->stream(IterationDetails) );
01204       }
01205 
01206       iter_++;
01207       
01208       // Apply the preconditioner on the residuals: H <- Prec*R
01209       if (Prec_ != Teuchos::null) {
01210         Teuchos::TimeMonitor lcltimer( *timerPrec_ );
01211         OPT::Apply( *Prec_, *R_, *H_ );   // don't catch the exception
01212         count_ApplyPrec_ += blockSize_;
01213       }
01214       else {
01215         std::vector<int> ind(blockSize_);
01216         for (int i=0; i<blockSize_; i++) { ind[i] = i; }
01217         MVT::SetBlock(*R_,ind,*H_);
01218       }
01219 
01220       // Apply the mass matrix on H
01221       if (hasM_) {
01222         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01223         OPT::Apply( *MOp_, *H_, *MH_);    // don't catch the exception
01224         count_ApplyM_ += blockSize_;
01225       }
01226 
01227       // orthogonalize H against the auxiliary vectors
01228       // optionally: orthogonalize H against X and P ([X P] is already orthonormal)
01229       Teuchos::Array<Teuchos::RefCountPtr<const MV> > Q;
01230       Teuchos::Array<Teuchos::RefCountPtr<Teuchos::SerialDenseMatrix<int,ScalarType> > > C = 
01231         Teuchos::tuple<Teuchos::RefCountPtr<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null);
01232       Q = auxVecs_;
01233       if (fullOrtho_) {
01234         Q.push_back(X_);
01235         if (hasP_) {
01236           Q.push_back(P_);
01237         }
01238       }
01239       {
01240         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01241         int rank = orthman_->projectAndNormalize(*H_,MH_,C,Teuchos::null,Q);
01242         TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure,
01243                            "Anasazi::LOBPCG::iterate(): unable to compute full basis for H");
01244       }
01245 
01246       if (om_->isVerbosity( Debug ) ) {
01247         CheckList chk;
01248         chk.checkH = true;
01249         chk.checkMH = true;
01250         om_->print( Debug, accuracyCheck(chk, ": after ortho H") );
01251       }
01252       else if (om_->isVerbosity( OrthoDetails ) ) {
01253         CheckList chk;
01254         chk.checkH = true;
01255         chk.checkMH = true;
01256         om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") );
01257       }
01258 
01259       // Apply the stiffness matrix to H
01260       {
01261         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01262         OPT::Apply( *Op_, *H_, *KH_);   // don't catch the exception
01263         count_ApplyOp_ += blockSize_;
01264       }
01265 
01266       int localSize;
01267       if (hasP_) {
01268         localSize = threeBlocks;
01269       }
01270       else {
01271         localSize = twoBlocks;
01272       }
01273 
01274       // Form "local" mass and stiffness matrices
01275       {
01276         Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01277         /* We will construct (block) upper triangular parts only.
01278                  (X^H)             (KK11 KK12 KK13)
01279             KK = (H^H) K [X H P] = ( --  KK22 KK23)
01280                  (P^H)             ( --   --  KK33)
01281                  (X^H)             (MM11 MM12 MM13)
01282             MM = (H^H) M [X H P] = ( --  MM22 MM23) 
01283                  (P^H)             ( --   --  MM33)
01284         */
01285         Teuchos::SerialDenseMatrix<int,ScalarType> KK11( Teuchos::View, KK, blockSize_, blockSize_ );
01286         MVT::MvTransMv( ONE, *X_, *KX_, KK11 );
01287         Teuchos::SerialDenseMatrix<int,ScalarType> KK12( Teuchos::View, KK, blockSize_, blockSize_, 0, blockSize_ );
01288         MVT::MvTransMv( ONE, *X_, *KH_, KK12 );
01289         Teuchos::SerialDenseMatrix<int,ScalarType> KK22( Teuchos::View, KK, blockSize_, blockSize_, blockSize_, blockSize_ );
01290         MVT::MvTransMv( ONE, *H_, *KH_, KK22 );
01291         
01292         Teuchos::SerialDenseMatrix<int,ScalarType> MM11( Teuchos::View, MM, blockSize_, blockSize_ );
01293         MVT::MvTransMv( ONE, *X_, *MX_, MM11 );
01294         Teuchos::SerialDenseMatrix<int,ScalarType> MM12( Teuchos::View, MM, blockSize_, blockSize_, 0, blockSize_ );
01295         MVT::MvTransMv( ONE, *X_, *MH_, MM12 );
01296         Teuchos::SerialDenseMatrix<int,ScalarType> MM22( Teuchos::View, MM, blockSize_, blockSize_, blockSize_, blockSize_ );
01297         MVT::MvTransMv( ONE, *H_, *MH_, MM22 );
01298 
01299         if (hasP_) {
01300           Teuchos::SerialDenseMatrix<int,ScalarType> KK13( Teuchos::View, KK, blockSize_, blockSize_, 0, twoBlocks );
01301           MVT::MvTransMv( ONE, *X_, *KP_, KK13 );
01302           Teuchos::SerialDenseMatrix<int,ScalarType> KK23( Teuchos::View, KK, blockSize_, blockSize_, blockSize_, twoBlocks );
01303           MVT::MvTransMv( ONE, *H_, *KP_, KK23 );
01304           Teuchos::SerialDenseMatrix<int,ScalarType> KK33( Teuchos::View, KK, blockSize_, blockSize_, twoBlocks, twoBlocks );
01305           MVT::MvTransMv( ONE, *P_, *KP_, KK33 );
01306           
01307           Teuchos::SerialDenseMatrix<int,ScalarType> MM13( Teuchos::View, MM, blockSize_, blockSize_, 0, twoBlocks );
01308           MVT::MvTransMv( ONE, *X_, *MP_, MM13 );
01309           Teuchos::SerialDenseMatrix<int,ScalarType> MM23( Teuchos::View, MM, blockSize_, blockSize_, blockSize_, twoBlocks );
01310           MVT::MvTransMv( ONE, *H_, *MP_, MM23 );
01311           Teuchos::SerialDenseMatrix<int,ScalarType> MM33( Teuchos::View, MM, blockSize_, blockSize_, twoBlocks, twoBlocks );
01312           MVT::MvTransMv( ONE, *P_, *MP_, MM33 );
01313         }
01314 
01315         // make MM and KK symmetric in memory: we need this so we can form ritz residual vectors and use MM for inner product below (with ease)
01316         for (int j=0; j<localSize; j++) {
01317           for (int i=j+1; i<localSize; i++) {
01318             KK(i,j) = KK(j,i);
01319             MM(i,j) = MM(j,i);
01320           }
01321         }
01322       } // end timing block
01323 
01324       // Perform a spectral decomposition
01325       {
01326         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01327         nevLocal_ = localSize;
01328         MSUtils_.directSolver(localSize, KK, &MM, &S, &theta_, &nevLocal_, 0);    // don't catch the exception
01329         // localSize tells directSolver() how big KK,MM are
01330         // however, directSolver() may choose to use only the principle submatrices of KK,MM because of loss of 
01331         // MM-orthogonality in the projected eigenvectors
01332         // nevLocal_ tells us how much it used, in effect dictating back to us how big localSize is, 
01333         // and therefore telling us which of [X H P] to use in computing the new iterates below.
01334         // we will not tolerate this ill-conditioning, and will throw an exception.
01335       }
01336       om_->stream(Debug) << " After directSolve: localSize == " << localSize << " \tnevLocal == " << nevLocal_ << endl;
01337       TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, "Indefiniteness detected in projecteded mass matrix." );
01338 
01339       Teuchos::LAPACK<int,ScalarType> lapack;
01340       Teuchos::BLAS<int,ScalarType> blas;
01341       //
01342       //---------------------------------------------------
01343       // Sort the ritz values using the sort manager
01344       //---------------------------------------------------
01345       {
01346         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01347 
01348         std::vector<int> order(nevLocal_);
01349         // 
01350         // sort the first nevLocal_ values in theta_
01351         sm_->sort( this, nevLocal_, theta_, &order );   // don't catch exception
01352         //
01353         // Sort the primitive ritz vectors
01354         Teuchos::SerialDenseMatrix<int,ScalarType> curS(Teuchos::View,S,nevLocal_,nevLocal_);
01355         MSUtils_.permuteVectors(order,curS);
01356       }
01357 
01358       // compute ritz residual norms
01359       {
01360         Teuchos::SerialDenseMatrix<int,ScalarType> R(nevLocal_,nevLocal_);
01361         Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_),
01362                                                    lclMM(Teuchos::View,MM,nevLocal_,nevLocal_);
01363         // R = MM*S*diag(theta) - KK*S
01364         R.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,S,ZERO);
01365         for (int i=0; i<nevLocal_; i++) {
01366           blas.SCAL(nevLocal_,theta_[i],R[i],1);
01367         }
01368         R.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,-ONE,lclKK,S,ONE);
01369         for (int i=0; i<nevLocal_; i++) {
01370           ritz2norms_[i] = blas.NRM2(nevLocal_,R[i],1);
01371         }
01372       }
01373 
01374 
01375       // before computing X,P: perform second orthogonalization per Ulrich,Rich paper
01376       // CX will be the coefficients of [X,H,P] for new X, CP for new P
01377       // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM
01378       // Here, we will also orthonormalize CX.
01379       // This is accomplished using the Cholesky factorization of [CX CP]^H MM [CX CP]
01380       Teuchos::SerialDenseMatrix<int,ScalarType> CX(threeBlocks,oneBlock), CP(threeBlocks,oneBlock);
01381       if (fullOrtho_ && localSize >= twoBlocks) {
01382         // build orthonormal basis for (  0  ) that is orthogonal to ( S11 )
01383         //                             ( S21 )                       ( S21 )
01384         //                             ( S31 )                       ( S31 )
01385         // Do this using Cholesky factorization of ( S11  0  )
01386         //                                         ( S21 S21 )
01387         //                                         ( S31 S31 )
01388         //           ( S11  0  )
01389         // Build C = ( S21 S21 )
01390         //           ( S31 S31 )
01391         Teuchos::SerialDenseMatrix<int,ScalarType> C(threeBlocks,twoBlocks),
01392                                                 tmp1(threeBlocks,twoBlocks),
01393                                                 tmp2(twoBlocks  ,twoBlocks);
01394 
01395         // first block of rows
01396         for (int i=0; i<oneBlock; i++) {
01397           // CX
01398           for (int j=0; j<oneBlock; j++) {
01399             C(i,j) = S(i,j);
01400           }
01401           // CP
01402           for (int j=oneBlock; j<twoBlocks; j++) {
01403             C(i,j) = ZERO;
01404           }
01405         }
01406         // second block of rows
01407         for (int j=0; j<oneBlock; j++) {
01408           for (int i=oneBlock; i<twoBlocks; i++) {
01409             // CX
01410             C(i,j)          = S(i,j);
01411             // CP
01412             C(i,j+oneBlock) = S(i,j);
01413           }
01414         }
01415         // third block of rows
01416         if (localSize == threeBlocks) {
01417           for (int j=0; j<oneBlock; j++) {
01418             for (int i=twoBlocks; i<threeBlocks; i++) {
01419               // CX
01420               C(i,j)          = S(i,j);
01421               // CP
01422               C(i,j+oneBlock) = S(i,j);
01423             }
01424           }
01425         }
01426         else {
01427           for (int j=0; j<twoBlocks; j++) {
01428             for (int i=twoBlocks; i<threeBlocks; i++) {
01429               C(i,j) = ZERO;
01430             }
01431           }
01432         }
01433 
01434         // compute tmp1 = MM*C
01435         int teuchosret;
01436         teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,MM,C,ZERO);
01437         TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01438 
01439         // compute tmp2 = C^H*tmp1 == C^H*MM*C
01440         teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,tmp1,ZERO);
01441         TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01442 
01443         // compute R (cholesky) of tmp2
01444         int info;
01445         lapack.POTRF('U',twoBlocks,tmp2.values(),tmp2.stride(),&info);
01446         TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, 
01447                            "Anasazi::LOBPCG::iterate(): Could not perform full orthogonalization.");
01448         // compute C = C inv(R)
01449         blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG,
01450                   threeBlocks,twoBlocks,ONE,tmp2.values(),tmp2.stride(),C.values(),C.stride());
01451         // put C(:,0:oneBlock-1) into CX
01452         for (int j=0; j<oneBlock; j++) {
01453           for (int i=0; i<threeBlocks; i++) {
01454             CX(i,j) = C(i,j);
01455           }
01456         }
01457         // put C(:,oneBlock:twoBlocks-1) into CP
01458         for (int j=0; j<oneBlock; j++) {
01459           for (int i=0; i<threeBlocks; i++) {
01460             CP(i,j) = C(i,oneBlock+j);
01461           }
01462         }
01463 
01464         // check the results
01465         if (om_->isVerbosity( Debug ) ) {
01466           Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(threeBlocks,oneBlock),
01467                                                      tmp2(oneBlock,oneBlock);
01468           MagnitudeType tmp;
01469           int teuchosret;
01470           stringstream os;
01471           os.precision(2);
01472           os.setf(ios::scientific, ios::floatfield);
01473 
01474           os << " Checking Full Ortho: iteration " << iter_ << endl;
01475 
01476           // check CX^T MM CX == I
01477           // compute tmp1 = MM*CX
01478           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,MM,CX,ZERO);
01479           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01480           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX
01481           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,CX,tmp1,ZERO);
01482           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01483           // subtrace tmp2 - I == CX^H * MM * CX - I
01484           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01485           tmp = tmp2.normFrobenius();          
01486           os << " >> Error in CX^H MM CX == I : " << tmp << endl;
01487 
01488           // check CP^T MM CP == I
01489           // compute tmp1 = MM*CP
01490           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,MM,CP,ZERO);
01491           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01492           // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP
01493           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,CP,tmp1,ZERO);
01494           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01495           // subtrace tmp2 - I == CP^H * MM * CP - I
01496           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01497           tmp = tmp2.normFrobenius();          
01498           os << " >> Error in CP^H MM CP == I : " << tmp << endl;
01499 
01500           // check CX^T MM CP == 0
01501           // compute tmp1 = MM*CP
01502           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,MM,CP,ZERO);
01503           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01504           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP
01505           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,CX,tmp1,ZERO);
01506           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Logic error calling SerialDenseMatrix::multiply");
01507           // subtrace tmp2 == CX^H * MM * CP
01508           tmp = tmp2.normFrobenius();          
01509           os << " >> Error in CX^H MM CP == 0 : " << tmp << endl;
01510 
01511           os << endl;
01512           om_->print(Debug,os.str());
01513         }
01514       }
01515       else {
01516         //      [S11]
01517         // S1 = [S21]
01518         //      [S31]
01519         //
01520         // CX = S1
01521         //
01522         //      [ 0 ]
01523         // CP = [S21]
01524         //      [S31]
01525         //
01526         Teuchos::SerialDenseMatrix<int,ScalarType> S1(Teuchos::View,S,threeBlocks,oneBlock);
01527         CX.assign(S1);
01528         CP.assign(S1);
01529         // remove first block of rows from CP
01530         for (int j=0; j<oneBlock; j++) {
01531           for (int i=0; i<oneBlock; i++) {
01532             CP(i,j) = ZERO;
01533           }
01534         }
01535       }
01536 
01537       //
01538       // Update the spaces: compute new X and new P
01539       // P is only computed if we have localSize >= twoBlocks
01540       // Note: Use R as a temporary work space and (if full ortho) tmpMV
01541       hasP_ = false;
01542       {
01543         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01544 
01545         TEST_FOR_EXCEPTION(localSize==oneBlock,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error in local update.");
01546         if (localSize == twoBlocks) {
01547           Teuchos::SerialDenseMatrix<int,ScalarType> CX1( Teuchos::View, CX, blockSize_, blockSize_ ),
01548                                                      CX2( Teuchos::View, CX, blockSize_, blockSize_, oneBlock, 0 ),
01549                                                      CP1( Teuchos::View, CP, blockSize_, blockSize_ ),
01550                                                      CP2( Teuchos::View, CP, blockSize_, blockSize_, oneBlock, 0 );
01551 
01552           hasP_ = true;
01553 
01554           // X = [X H][CX1]
01555           //          [CX2]
01556           //
01557           // P = [X H][CP1]
01558           //          [CP2]
01559           //
01560 
01561           // R <- X
01562           MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );        
01563           // X <- R*CX1 + H*CX2 == X*CX1 + H*CX2
01564           MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *X_ );
01565           MVT::MvTimesMatAddMv( ONE, *H_, CX2, ONE , *X_ );
01566           // P <- R*CP1 + H*CP2 == X*CP1 + H*CP2
01567           // however, if fullOrtho_ == false, CP1 == ZERO
01568           if (fullOrtho_) { 
01569             MVT::MvTimesMatAddMv( ONE, *R_, CP1, ZERO, *P_ );
01570             MVT::MvTimesMatAddMv( ONE, *H_, CP2, ONE, *P_ );
01571           }
01572           else {
01573             MVT::MvTimesMatAddMv( ONE, *H_, CP2, ZERO, *P_ );
01574           }
01575 
01576           // R  <- KX
01577           MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );        
01578           // KX <- R*CX1 + KH*CX2 == KX*CX1 + KH*CX2
01579           MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *KX_ );
01580           MVT::MvTimesMatAddMv( ONE, *KH_, CX2, ONE , *KX_ );
01581           // KP <- R*CP1 + KH*CP2 == KX*CP1 + KH*CP2
01582           // however, if fullOrtho_ == false, CP1 == ZERO
01583           if (fullOrtho_) { 
01584             MVT::MvTimesMatAddMv( ONE, *R_, CP1, ZERO, *KP_ );
01585             MVT::MvTimesMatAddMv( ONE, *KH_, CP2, ONE, *KP_ );
01586           }
01587           else {
01588             MVT::MvTimesMatAddMv( ONE, *KH_, CP2, ZERO, *KP_ );
01589           }
01590 
01591           if (hasM_) {
01592             // R  <- MX
01593             MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );        
01594             // MX <- R*CX1 + MH*CX2 == MX*CX1 + MH*CX2
01595             MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *MX_ );
01596             MVT::MvTimesMatAddMv( ONE, *MH_, CX2, ONE , *MX_ );
01597             // MP <- R*CP1 + MH*CP2 == MX*CP1 + MH*CP2
01598             // however, if fullOrtho_ == false, CP1 == ZERO
01599             if (fullOrtho_) { 
01600               MVT::MvTimesMatAddMv( ONE, *R_, CP1, ZERO, *MP_ );
01601               MVT::MvTimesMatAddMv( ONE, *MH_, CP2, ONE, *MP_ );
01602             }
01603             else {
01604               MVT::MvTimesMatAddMv( ONE, *MH_, CP2, ZERO, *MP_ );
01605             }
01606           }
01607 
01608         } // if (localSize == twoBlocks)
01609         else if (localSize == threeBlocks) {
01610           Teuchos::SerialDenseMatrix<int,ScalarType> CX1( Teuchos::View, CX, blockSize_, blockSize_ ),
01611                                                      CX2( Teuchos::View, CX, blockSize_, blockSize_, oneBlock, 0 ),
01612                                                      CX3( Teuchos::View, CX, blockSize_, blockSize_, twoBlocks, 0 ),
01613                                                      CP1( Teuchos::View, CP, blockSize_, blockSize_ ),
01614                                                      CP2( Teuchos::View, CP, blockSize_, blockSize_, oneBlock, 0 ),
01615                                                      CP3( Teuchos::View, CP, blockSize_, blockSize_, twoBlocks, 0 );
01616 
01617           hasP_ = true;
01618 
01619           // X <- X*CX1 + P*CX3
01620           // P <- X*CP1 + P*CP3 (note, CP1 == ZERO if fullOrtho_==false)
01621           if (fullOrtho_) {
01622             // copy X,P
01623             MVT::MvAddMv(ONE,*X_, ZERO,*X_, *R_);
01624             MVT::MvAddMv(ONE,*P_, ZERO,*P_, *tmpMV_);
01625             // perform [X P][CX1 CP1]
01626             //              [CX3 CP3]
01627             MVT::MvTimesMatAddMv( ONE, *R_,     CX1, ZERO, *X_ );
01628             MVT::MvTimesMatAddMv( ONE, *tmpMV_, CX3,  ONE, *X_ );
01629             MVT::MvTimesMatAddMv( ONE, *R_,     CP1, ZERO, *P_ );
01630             MVT::MvTimesMatAddMv( ONE, *tmpMV_, CP3,  ONE, *P_ );
01631           }
01632           else {
01633             // copy X
01634             MVT::MvAddMv(ONE,*X_, ZERO,*X_, *R_);
01635             // perform [X P][CX1  0 ]
01636             //              [CX3 CP3]
01637             MVT::MvTimesMatAddMv( ONE, *R_, CX1, ZERO, *X_ );
01638             MVT::MvTimesMatAddMv( ONE, *P_, CX3,  ONE, *X_ );
01639             // copy P
01640             MVT::MvAddMv(ONE,*P_, ZERO,*P_, *R_);
01641             MVT::MvTimesMatAddMv( ONE, *R_, CP3, ZERO, *P_ );
01642           }
01643           // X <- X + H*CX2
01644           // P <- P + H*CP2
01645           MVT::MvTimesMatAddMv( ONE, *H_, CX2,  ONE, *X_ );
01646           MVT::MvTimesMatAddMv( ONE, *H_, CP2,  ONE, *P_ );
01647 
01648 
01649           // KX <- KX*CX1 + KP*CX3
01650           // KP <- KX*CP1 + KP*CP3 (note, CP1 == ZERO if fullOrtho_==false)
01651           if (fullOrtho_) {
01652             // copy KX,KP
01653             MVT::MvAddMv(ONE,*KX_, ZERO,*KX_, *R_);
01654             MVT::MvAddMv(ONE,*KP_, ZERO,*KP_, *tmpMV_);
01655             // perform [KX KP][CX1 CP1]
01656             //                [CX3 CP3]
01657             MVT::MvTimesMatAddMv( ONE, *R_,     CX1, ZERO, *KX_ );
01658             MVT::MvTimesMatAddMv( ONE, *tmpMV_, CX3,  ONE, *KX_ );
01659             MVT::MvTimesMatAddMv( ONE, *R_,     CP1, ZERO, *KP_ );
01660             MVT::MvTimesMatAddMv( ONE, *tmpMV_, CP3,  ONE, *KP_ );
01661           }
01662           else {
01663             // copy KX
01664             MVT::MvAddMv(ONE,*KX_, ZERO,*KX_, *R_);
01665             // perform [KX KP][CX1  0 ]
01666             //                [CX3 CP3]
01667             MVT::MvTimesMatAddMv( ONE, *R_ , CX1, ZERO, *KX_ );
01668             MVT::MvTimesMatAddMv( ONE, *KP_, CX3,  ONE, *KX_ );
01669             // copy KP
01670             MVT::MvAddMv(ONE,*KP_, ZERO,*KP_, *R_);
01671             MVT::MvTimesMatAddMv( ONE, *R_, CP3, ZERO, *KP_ );
01672           }
01673           // KX <- KX + KH*CX2
01674           // KP <- KP + KH*CP2
01675           MVT::MvTimesMatAddMv( ONE, *KH_, CX2,  ONE, *KX_ );
01676           MVT::MvTimesMatAddMv( ONE, *KH_, CP2,  ONE, *KP_ );
01677 
01678 
01679           if (hasM_) {
01680             // MX <- MX*CX1 + MP*CX3
01681             // MP <- MX*CP1 + MP*CP3 (note, CP1 == ZERO if fullOrtho_==false)
01682             if (fullOrtho_) {
01683               // copy MX,MP
01684               MVT::MvAddMv(ONE,*MX_, ZERO,*MX_, *R_);
01685               MVT::MvAddMv(ONE,*MP_, ZERO,*MP_, *tmpMV_);
01686               // perform [MX MP][CX1 CP1]
01687               //                [CX3 CP3]
01688               MVT::MvTimesMatAddMv( ONE, *R_,     CX1, ZERO, *MX_ );
01689               MVT::MvTimesMatAddMv( ONE, *tmpMV_, CX3,  ONE, *MX_ );
01690               MVT::MvTimesMatAddMv( ONE, *R_,     CP1, ZERO, *MP_ );
01691               MVT::MvTimesMatAddMv( ONE, *tmpMV_, CP3,  ONE, *MP_ );
01692             }
01693             else {
01694               // copy MX
01695               MVT::MvAddMv(ONE,*MX_, ZERO,*MX_, *R_);
01696               // perform [MX MP][CX1  0 ]
01697               //                [CX3 CP3]
01698               MVT::MvTimesMatAddMv( ONE, *R_ , CX1, ZERO, *MX_ );
01699               MVT::MvTimesMatAddMv( ONE, *MP_, CX3,  ONE, *MX_ );
01700               // copy MP
01701               MVT::MvAddMv(ONE,*MP_, ZERO,*MP_, *R_);
01702               MVT::MvTimesMatAddMv( ONE, *R_, CP3, ZERO, *MP_ );
01703             }
01704             // MX <- MX + MH*CX2
01705             // MP <- MP + MH*CP2
01706             MVT::MvTimesMatAddMv( ONE, *MH_, CX2,  ONE, *MX_ );
01707             MVT::MvTimesMatAddMv( ONE, *MH_, CP2,  ONE, *MP_ );
01708           }
01709 
01710         } // if (localSize == threeBlocks)
01711       } // end timing block
01712 
01713       // Compute the new residuals, explicitly
01714       {
01715         Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01716         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
01717         Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ );
01718         for (int i = 0; i < blockSize_; i++) {
01719           T(i,i) = theta_[i];
01720         }
01721         MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ );
01722       }
01723 
01724       // R has been updated; mark the norms as out-of-date
01725       Rnorms_current_ = false;
01726       R2norms_current_ = false;
01727 
01728 
01729       // When required, monitor some orthogonalities
01730       if (om_->isVerbosity( Debug ) ) {
01731         // Check almost everything here
01732         CheckList chk;
01733         chk.checkX = true;
01734         chk.checkKX = true;
01735         chk.checkMX = true;
01736         chk.checkP = true;
01737         chk.checkKP = true;
01738         chk.checkMP = true;
01739         chk.checkR = true;
01740         om_->print( Debug, accuracyCheck(chk, ": after local update") );
01741       }
01742       else if (om_->isVerbosity( OrthoDetails )) {
01743         CheckList chk;
01744         chk.checkX = true;
01745         chk.checkP = true;
01746         chk.checkR = true;
01747         om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") );
01748       }
01749     } // end while (statusTest == false)
01750   }
01751 
01752 
01754   // compute/return residual M-norms
01755   template <class ScalarType, class MV, class OP>
01756   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01757   LOBPCG<ScalarType,MV,OP>::getResNorms() {
01758     if (Rnorms_current_ == false) {
01759       // Update the residual norms
01760       orthman_->norm(*R_,&Rnorms_);
01761       Rnorms_current_ = true;
01762     }
01763     return Rnorms_;
01764   }
01765 
01766   
01768   // compute/return residual 2-norms
01769   template <class ScalarType, class MV, class OP>
01770   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01771   LOBPCG<ScalarType,MV,OP>::getRes2Norms() {
01772     if (R2norms_current_ == false) {
01773       // Update the residual 2-norms 
01774       MVT::MvNorm(*R_,&R2norms_);
01775       R2norms_current_ = true;
01776     }
01777     return R2norms_;
01778   }
01779 
01780 
01781 
01782 
01784   // Check accuracy, orthogonality, and other debugging stuff
01785   // 
01786   // bools specify which tests we want to run (instead of running more than we actually care about)
01787   //
01788   // we don't bother checking the following because they are computed explicitly:
01789   //    H == Prec*R
01790   //   KH == K*H
01791   //
01792   // 
01793   // checkX : X orthonormal
01794   //          orthogonal to auxvecs
01795   // checkMX: check MX == M*X
01796   // checkKX: check KX == K*X
01797   // checkP : if fullortho P orthonormal and orthogonal to X
01798   //          orthogonal to auxvecs
01799   // checkMP: check MP == M*P
01800   // checkKP: check KP == K*P
01801   // checkH : if fullortho H orthonormal and orthogonal to X and P
01802   //          orthogonal to auxvecs
01803   // checkMH: check MH == M*H
01804   // checkR : check R orthogonal to X
01805   // checkQ : check that auxiliary vectors are actually orthonormal
01806   //
01807   // TODO: 
01808   //  add checkTheta 
01809   //
01810   template <class ScalarType, class MV, class OP>
01811   std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const string &where ) const 
01812   {
01813     stringstream os;
01814     os.precision(2);
01815     os.setf(ios::scientific, ios::floatfield);
01816     MagnitudeType tmp;
01817 
01818     os << " Debugging checks: iteration " << iter_ << where << endl;
01819 
01820     // X and friends
01821     if (chk.checkX && initialized_) {
01822       tmp = orthman_->orthonormError(*X_);
01823       os << " >> Error in X^H M X == I : " << tmp << endl;
01824       for (unsigned int i=0; i<auxVecs_.size(); i++) {
01825         tmp = orthman_->orthogError(*X_,*auxVecs_[i]);
01826         os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl;
01827       }
01828     }
01829     if (chk.checkMX && hasM_ && initialized_) {
01830       tmp = MSUtils_.errorEquality(X_.get(), MX_.get(), MOp_.get());
01831       os << " >> Error in MX == M*X    : " << tmp << endl;
01832     }
01833     if (chk.checkKX && initialized_) {
01834       tmp = MSUtils_.errorEquality(X_.get(), KX_.get(), Op_.get());
01835       os << " >> Error in KX == K*X    : " << tmp << endl;
01836     }
01837 
01838     // P and friends
01839     if (chk.checkP && hasP_ && initialized_) {
01840       if (fullOrtho_) {
01841         tmp = orthman_->orthonormError(*P_);
01842         os << " >> Error in P^H M P == I : " << tmp << endl;
01843         tmp = orthman_->orthogError(*P_,*X_);
01844         os << " >> Error in P^H M X == 0 : " << tmp << endl;
01845       }
01846       for (unsigned int i=0; i<auxVecs_.size(); i++) {
01847         tmp = orthman_->orthogError(*P_,*auxVecs_[i]);
01848         os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl;
01849       }
01850     }
01851     if (chk.checkMP && hasM_ && hasP_ && initialized_) {
01852       tmp = MSUtils_.errorEquality(P_.get(), MP_.get(), MOp_.get());
01853       os << " >> Error in MP == M*P    : " << tmp << endl;
01854     }
01855     if (chk.checkKP && hasP_ && initialized_) {
01856       tmp = MSUtils_.errorEquality(P_.get(), KP_.get(), Op_.get());
01857       os << " >> Error in KP == K*P    : " << tmp << endl;
01858     }
01859 
01860     // H and friends
01861     if (chk.checkH && initialized_) {
01862       if (fullOrtho_) {
01863         tmp = orthman_->orthonormError(*H_);
01864         os << " >> Error in H^H M H == I : " << tmp << endl;
01865         tmp = orthman_->orthogError(*H_,*X_);
01866         os << " >> Error in H^H M X == 0 : " << tmp << endl;
01867         if (hasP_) {
01868           tmp = orthman_->orthogError(*H_,*P_);
01869           os << " >> Error in H^H M P == 0 : " << tmp << endl;
01870         }
01871       }
01872       for (unsigned int i=0; i<auxVecs_.size(); i++) {
01873         tmp = orthman_->orthogError(*H_,*auxVecs_[i]);
01874         os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl;
01875       }
01876     }
01877     if (chk.checkMH && hasM_ && initialized_) {
01878       tmp = MSUtils_.errorEquality(H_.get(), MH_.get(), MOp_.get());
01879       os << " >> Error in MH == M*H    : " << tmp << endl;
01880     }
01881 
01882     // R: this is not M-orthogonality, but standard euclidean orthogonality
01883     if (chk.checkR && initialized_) {
01884       Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_);
01885       MVT::MvTransMv(ONE,*X_,*R_,xTx);
01886       tmp = xTx.normFrobenius();
01887       os << " >> Error in X^H R == 0   : " << tmp << endl;
01888     }
01889 
01890     // Q
01891     if (chk.checkQ) {
01892       for (unsigned int i=0; i<auxVecs_.size(); i++) {
01893         tmp = orthman_->orthonormError(*auxVecs_[i]);
01894         os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl;
01895         for (unsigned int j=i+1; j<auxVecs_.size(); j++) {
01896           tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]);
01897           os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl;
01898         }
01899       }
01900     }
01901 
01902     os << endl;
01903 
01904     return os.str();
01905   }
01906 
01907 
01909   // Print the current status of the solver
01910   template <class ScalarType, class MV, class OP>
01911   void 
01912   LOBPCG<ScalarType,MV,OP>::currentStatus(ostream &os) 
01913   {
01914     os.setf(ios::scientific, ios::floatfield);  
01915     os.precision(6);
01916     os <<endl;
01917     os <<"================================================================================" << endl;
01918     os << endl;
01919     os <<"                              LOBPCG Solver Status" << endl;
01920     os << endl;
01921     os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl;
01922     os <<"The number of iterations performed is " << iter_       << endl;
01923     os <<"The current block size is             " << blockSize_  << endl;
01924     os <<"The number of auxiliary vectors is    " << numAuxVecs_ << endl;
01925     os <<"The number of operations Op*x   is " << count_ApplyOp_   << endl;
01926     os <<"The number of operations M*x    is " << count_ApplyM_    << endl;
01927     os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl;
01928 
01929     os.setf(ios_base::right, ios_base::adjustfield);
01930 
01931     if (initialized_) {
01932       os << endl;
01933       os <<"CURRENT EIGENVALUE ESTIMATES             "<<endl;
01934       os << std::setw(20) << "Eigenvalue" 
01935          << std::setw(20) << "Residual(M)"
01936          << std::setw(20) << "Residual(2)"
01937          << endl;
01938       os <<"--------------------------------------------------------------------------------"<<endl;
01939       for (int i=0; i<blockSize_; i++) {
01940         os << std::setw(20) << theta_[i];
01941         if (Rnorms_current_) os << std::setw(20) << Rnorms_[i];
01942         else os << std::setw(20) << "not current";
01943         if (R2norms_current_) os << std::setw(20) << R2norms_[i];
01944         else os << std::setw(20) << "not current";
01945         os << endl;
01946       }
01947     }
01948     os <<"================================================================================" << endl;
01949     os << endl;
01950   }
01951 
01952   
01953 } // end Anasazi namespace
01954 
01955 #endif // ANASAZI_LOBPCG_HPP