Tpetra_CrsMatrix.hpp

00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Tpetra: Templated Linear Algebra Services Package 
00005 //                 Copyright (2001) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef TPETRA_CRSMATRIX_HPP
00030 #define TPETRA_CRSMATRIX_HPP
00031 
00032 #include <Teuchos_SerialDenseMatrix.hpp>
00033 #include <Teuchos_CommHelpers.hpp>
00034 #include <Teuchos_ScalarTraits.hpp>
00035 #include <Teuchos_OrdinalTraits.hpp>
00036 #include <Teuchos_VerboseObject.hpp>
00037 #include "Tpetra_Operator.hpp"
00038 #include "Tpetra_Map.hpp"
00039 #include "Tpetra_Import.hpp"
00040 #include "Tpetra_Export.hpp"
00041 
00042 namespace Tpetra {
00043   // struct for i,j,v triplets
00044   template <class Ordinal, class Scalar>
00045   struct CrsIJV {
00046     CrsIJV() {}
00047     CrsIJV(Ordinal row, Ordinal col, const Scalar &val) {
00048       i = row;
00049       j = col;
00050       v = val;
00051     }
00052     Ordinal i,j;
00053     Scalar  v;
00054   };
00055 }
00056 
00057 namespace Teuchos {
00058   // SerializationTraits specialization for CrsIJV, using DirectSerialization
00059   template<typename Ordinal, typename Scalar>
00060   class SerializationTraits<Ordinal,Tpetra::CrsIJV<Ordinal,Scalar> >
00061   : public DirectSerializationTraits<Ordinal,Tpetra::CrsIJV<Ordinal,Scalar> >
00062   {};
00063 }
00064 
00065 namespace Tpetra 
00066 {
00068 
00079   template<class Ordinal, class Scalar>
00080   class CrsMatrix : public Operator<Ordinal,Scalar>
00081   {
00082     public:
00084 
00085 
00087       CrsMatrix(const Map<Ordinal> &rowMap);
00088 
00089       // !Destructor.
00090       virtual ~CrsMatrix();
00091 
00093 
00094 
00095       
00097       inline bool isFillCompleted() const;
00098 
00100       Teuchos::RCP<const Teuchos::Comm<Ordinal> > getComm() const;
00101 
00103       inline Ordinal getNumGlobalNonzeros() const;
00104 
00106       inline Ordinal getNumMyNonzeros() const;
00107 
00109       inline Ordinal getNumGlobalRows() const;
00110 
00112       inline Ordinal getNumGlobalCols() const;
00113 
00115       inline Ordinal getNumMyRows() const;
00116 
00118       inline Ordinal getNumMyCols() const;
00119 
00121       inline Ordinal getNumGlobalDiagonals() const;
00122 
00124       inline Ordinal getNumMyDiagonals() const;
00125 
00127       inline Ordinal getGlobalMaxNumEntries() const;
00128 
00130       inline Ordinal getMyMaxNumEntries() const;
00131 
00133       inline Ordinal getIndexBase() const;
00134       
00136       const Map<Ordinal> & getRowMap() const;
00137 
00139       const Map<Ordinal> & getColMap() const;
00140 
00142 
00144 
00145       
00147       const Map<Ordinal> & getDomainMap() const;
00148 
00150       const Map<Ordinal> & getRangeMap() const;
00151 
00153       void apply(const MultiVector<Ordinal,Scalar>& X, MultiVector<Ordinal,Scalar> &Y, Teuchos::ETransp mode = Teuchos::NO_TRANS) const;
00154 
00156 
00158 
00159 
00161       void fillComplete();
00162 
00164       void submitEntry(Ordinal globalRow, Ordinal globalCol,
00165                        const Scalar &value);
00166 
00168 
00169       void submitEntries(Ordinal globalRow, 
00170                          const Teuchos::ArrayView<const Ordinal> &cols,
00171                          const Teuchos::ArrayView<const Scalar> &vals);
00172 
00174       void setAllToScalar(const Scalar &alpha);
00175 
00177       void scale(const Scalar &alpha);
00178 
00179       // @}
00180 
00182       // @{ 
00183 
00185       inline Ordinal getNumRowEntries(Ordinal globalRow) const;
00186 
00188       void getMyRowCopy(Ordinal myRow, const Teuchos::ArrayView<Ordinal> &indices, 
00189                                        const Teuchos::ArrayView<Scalar> &values) const;
00190 
00192       void getGlobalRowCopy(Ordinal globalRow, 
00193                             const Teuchos::ArrayView<Ordinal> &indices,
00194                             const Teuchos::ArrayView<Scalar> &values) const;
00195 
00197 
00199 
00200       
00202       void print(std::ostream& os) const;
00203 
00204       // @}
00205 
00206     private:
00207       // copy constructor disabled
00208       CrsMatrix(const CrsMatrix<Ordinal,Scalar> &Source);
00209       // operator= disabled
00210       CrsMatrix& operator=(const CrsMatrix<Ordinal, Scalar> &rhs);
00211       // useful typedefs
00212       typedef Teuchos::OrdinalTraits<Ordinal> OT;
00213 
00214       // Performs importing of off-processor elements and adds them to the locally owned elements.
00215       void globalAssemble();
00216       // multiplication routines
00217       void GeneralMV(const Teuchos::ArrayView<const Scalar> &X, const Teuchos::ArrayView<Scalar> &Y) const;
00218       void GeneralMM(const Teuchos::ArrayView<const Teuchos::ArrayView<const Scalar> > &X, const Teuchos::ArrayView<const Teuchos::ArrayView<Scalar> > &Y) const;
00219 
00220       const Teuchos::RCP<const Teuchos::Comm<Ordinal> > comm_;
00221       const Map<Ordinal>& rowMap_;
00222       Teuchos::RCP<const Map<Ordinal> > colMap_;
00223 
00224       Ordinal numMyRows_, numGlobalRows_;
00225 
00226       // column indices
00227       Teuchos::Array<Teuchos::Array<Ordinal> > colinds_;
00228       // values
00229       Teuchos::Array<Teuchos::Array<Scalar> > values_;
00230 
00231       // TODO: consider using a contiguous storage
00232       // costs are: insertions are more difficult
00233       mutable Teuchos::RCP<MultiVector<Ordinal,Scalar> > importMV_, exportMV_;
00234 
00235       // importer is needed if DomainMap is not sameas ColumnMap
00236       // FINISH: currently, domain map == range map == row map which is typically != column map
00237       //         ergo, we will usually have an importer
00238       Teuchos::RCP<Import<Ordinal> > importer_;
00239       // exporter is needed if RowMap is not sameas DomainMap
00240       // FINISH: currently, domain map == range map == row map, so that we never have an exporter
00241       Teuchos::RCP<Export<Ordinal> > exporter_;
00242 
00243       // a map between a (non-local) row and a list of (col,val)
00244       // TODO: switch this to a hash-based map (instead of a sort-based map) as soon as one is available
00245       std::map<Ordinal, std::list<std::pair<Ordinal,Scalar> > > nonlocals_;
00246 
00247       bool fillCompleted_;
00248 
00249       Ordinal globalNNZ_;
00250       Ordinal myNNZ_;
00251 
00252       Ordinal numGlobalDiags_;
00253       Ordinal numMyDiags_;
00254 
00255       Ordinal globalMaxNumEntries_;
00256       Ordinal myMaxNumEntries_;
00257 
00258   }; // class CrsMatrix
00259 
00260 
00261 
00264 
00265   template<class Ordinal, class Scalar>
00266   CrsMatrix<Ordinal,Scalar>::CrsMatrix(const Map<Ordinal> &rowMap) 
00267   : comm_(rowMap.getComm())
00268   , rowMap_(rowMap)
00269   , numMyRows_(rowMap.getNumMyEntries())
00270   , numGlobalRows_(rowMap.getNumGlobalEntries())
00271   , colinds_(numMyRows_)
00272   ,  values_(numMyRows_)
00273   {
00274     fillCompleted_       = false;
00275     globalNNZ_           = OT::zero();
00276     myNNZ_               = OT::zero();
00277     numGlobalDiags_      = OT::zero();
00278     numMyDiags_          = OT::zero();
00279     globalMaxNumEntries_ = OT::zero();
00280     myMaxNumEntries_     = OT::zero();
00281   }
00282 
00283 
00284   template<class Ordinal, class Scalar>
00285   CrsMatrix<Ordinal,Scalar>::~CrsMatrix()
00286   {}
00287 
00288 
00289   template<class Ordinal, class Scalar>
00290   bool CrsMatrix<Ordinal,Scalar>::isFillCompleted() const
00291   { return fillCompleted_; }
00292 
00293 
00294   template<class Ordinal, class Scalar>
00295   Teuchos::RCP<const Teuchos::Comm<Ordinal> > 
00296   CrsMatrix<Ordinal,Scalar>::getComm() const
00297   { return comm_; }
00298 
00299 
00300   template<class Ordinal, class Scalar>
00301   Ordinal CrsMatrix<Ordinal,Scalar>::getNumGlobalNonzeros() const
00302   {
00303     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00304       "Tpetra::CrsMatrix: cannot call getNumGlobalNonzeros() until fillComplete() has been called.");
00305     return globalNNZ_;
00306   }
00307 
00308 
00309   template<class Ordinal, class Scalar>
00310   Ordinal CrsMatrix<Ordinal,Scalar>::getNumMyNonzeros() const
00311   {
00312     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00313       "Tpetra::CrsMatrix: cannot call getNumMyNonzeros() until fillComplete() has been called.");
00314     return myNNZ_;
00315   }
00316 
00317 
00318   template<class Ordinal, class Scalar>
00319   Ordinal CrsMatrix<Ordinal,Scalar>::getNumGlobalRows() const
00320   { return rowMap_.getNumGlobalEntries(); }
00321 
00322 
00323   template<class Ordinal, class Scalar>
00324   Ordinal CrsMatrix<Ordinal,Scalar>::getNumGlobalCols() const
00325   {
00326     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00327       "Tpetra::CrsMatrix: cannot call getNumGlobalCols() until fillComplete() has been called.");
00328     return colMap_->getNumGlobalEntries();
00329   }
00330 
00331 
00332   template<class Ordinal, class Scalar>
00333   Ordinal CrsMatrix<Ordinal,Scalar>::getNumMyRows() const
00334   { return rowMap_.getNumMyEntries(); }
00335 
00336 
00337   template<class Ordinal, class Scalar>
00338   Ordinal CrsMatrix<Ordinal,Scalar>::getNumMyCols() const
00339   {
00340     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00341       "Tpetra::CrsMatrix: cannot call getNumMyCols() until fillComplete() has been called.");
00342     return colMap_->getNumMyEntries();
00343   }
00344 
00345 
00346   template<class Ordinal, class Scalar>
00347   Ordinal CrsMatrix<Ordinal,Scalar>::getNumGlobalDiagonals() const
00348   {
00349     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00350       "Tpetra::CrsMatrix: cannot call getNumGlobalDiagonals() until fillComplete() has been called.");
00351     return numGlobalDiags_;
00352   }
00353 
00354 
00355   template<class Ordinal, class Scalar>
00356   Ordinal CrsMatrix<Ordinal,Scalar>::getNumMyDiagonals() const
00357   {
00358     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00359       "Tpetra::CrsMatrix: cannot call getNumMyDiagonals() until fillComplete() has been called.");
00360     return numMyDiags_;
00361   }
00362 
00363 
00364   template<class Ordinal, class Scalar>
00365   Ordinal CrsMatrix<Ordinal,Scalar>::getNumRowEntries(Ordinal globalRow) const
00366   {
00367     TEST_FOR_EXCEPTION(!rowMap_.isMyGlobalIndex(globalRow), std::runtime_error,
00368         "Tpetra::CrsMatrix::getNumRowEntries(globalRow): globalRow does not belong to this node.");
00369     return values_[rowMap_.getLocalIndex(globalRow)].size();
00370   }
00371 
00372 
00373   template<class Ordinal, class Scalar>
00374   Ordinal CrsMatrix<Ordinal,Scalar>::getGlobalMaxNumEntries() const
00375   {
00376     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00377       "Tpetra::CrsMatrix: cannot call getGlobalMaxNumEntries() until fillComplete() has been called.");
00378     return globalMaxNumEntries_;
00379   }
00380 
00381 
00382   template<class Ordinal, class Scalar>
00383   Ordinal CrsMatrix<Ordinal,Scalar>::getMyMaxNumEntries() const
00384   {
00385     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00386       "Tpetra::CrsMatrix: cannot call getMyMaxNumEntries() until fillComplete() has been called.");
00387     return myMaxNumEntries_;
00388   }
00389 
00390 
00391   template<class Ordinal, class Scalar>
00392   Ordinal CrsMatrix<Ordinal,Scalar>::getIndexBase() const
00393   { return rowMap_.getIndexBase(); }
00394 
00395 
00396   template<class Ordinal, class Scalar>
00397   const Map<Ordinal> & CrsMatrix<Ordinal,Scalar>::getRowMap() const 
00398   { return rowMap_; }
00399 
00400 
00401   template<class Ordinal, class Scalar>
00402   const Map<Ordinal> & CrsMatrix<Ordinal,Scalar>::getColMap() const 
00403   { 
00404     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00405         "Tpetra::CrsMatrix: cannot call getColMap() until fillComplete() has been called.");
00406     return *colMap_; 
00407   }
00408 
00409   template<class Ordinal, class Scalar>
00410   const Map<Ordinal> & CrsMatrix<Ordinal,Scalar>::getDomainMap() const
00411   {
00412     return getColMap();
00413   }
00414 
00415   template<class Ordinal, class Scalar>
00416   const Map<Ordinal> & CrsMatrix<Ordinal,Scalar>::getRangeMap() const
00417   {
00418     return getRowMap();
00419   }
00420 
00421   template<class Ordinal, class Scalar>
00422   void CrsMatrix<Ordinal,Scalar>::submitEntry(Ordinal globalRow, Ordinal globalCol, const Scalar &value)
00423   {
00424     TEST_FOR_EXCEPTION(isFillCompleted() == true, std::runtime_error,
00425       "Tpetra::CrsMatrix::submitEntry(): fillComplete() has already been called.");
00426     if (rowMap_.isMyGlobalIndex(globalRow)) {
00427       Ordinal myRow = rowMap_.getLocalIndex(globalRow);
00428       colinds_[myRow].push_back(globalCol);
00429       values_[myRow].push_back(value);
00430     }
00431     else {
00432       nonlocals_[globalRow].push_back(std::make_pair(globalCol, value));
00433     }
00434   }
00435 
00436 
00437   template<class Ordinal, class Scalar>
00438   void CrsMatrix<Ordinal,Scalar>::submitEntries(Ordinal globalRow, 
00439                          const Teuchos::ArrayView<const Ordinal> &indices,
00440                          const Teuchos::ArrayView<const Scalar>  &values)
00441   {
00442     TEST_FOR_EXCEPTION(values.size() != indices.size(), std::runtime_error,
00443         "Tpetra::CrsMatrix::submitEntries(): values.size() must equal indices.size().");
00444     typename  Teuchos::ArrayView<const Scalar>::iterator val =  values.begin();
00445     typename Teuchos::ArrayView<const Ordinal>::iterator ind = indices.begin();
00446     for (; val != values.end(); ++val, ++ind) 
00447     {
00448       submitEntry(globalRow, *ind, *val);
00449     }
00450   }
00451 
00452 
00453   template<class Ordinal, class Scalar>
00454   void CrsMatrix<Ordinal,Scalar>::setAllToScalar(const Scalar &alpha)
00455   { 
00456     using Teuchos::Array;
00457     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00458         "Tpetra::CrsMatrix: cannot call setAllToScalar() until fillComplete() has been called.");
00459     for (typename Array<Array<Scalar> >::const_iterator row = values_.begin(); row != values_.end(); ++row) 
00460     {
00461       std::fill(row->begin(),row->end(),alpha);
00462     }
00463   }
00464 
00465 
00466   template<class Ordinal, class Scalar>
00467   void CrsMatrix<Ordinal,Scalar>::scale(const Scalar &alpha)
00468   { 
00469     using Teuchos::Array;
00470     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00471         "Tpetra::CrsMatrix: cannot call scale() until fillComplete() has been called.");
00472     for (typename Array<Array<Scalar> >::const_iterator row = values_.begin(); row != values_.end(); ++row) 
00473     {
00474       for (typename Array<Scalar>::const_iterator val = row->begin(); val != row->end(); ++val)
00475       {
00476         *val *= alpha;
00477       }
00478     }
00479   }
00480 
00481 
00482   template<class Ordinal, class Scalar>
00483   void CrsMatrix<Ordinal,Scalar>::getMyRowCopy(Ordinal myRow, 
00484                                                const Teuchos::ArrayView<Ordinal> &indices, 
00485                                                const Teuchos::ArrayView<Scalar>  &values) const
00486   {
00487     TEST_FOR_EXCEPTION(isFillCompleted() == false, std::runtime_error,
00488         "Tpetra::CrsMatrix: cannot call getMyRowCopy() until fillComplete() has been called.");
00489     TEST_FOR_EXCEPTION(indices.size() != colinds_[myRow].size() || values.size() != values_[myRow].size(), std::runtime_error, 
00490         "Tpetra::CrsMatrix::getMyRowCopy(indices,values): size of indices,values must be sufficient to store the values for the specified row.");
00491     std::copy( values_[myRow].begin(), values_[myRow].end(), values.begin());
00492     std::copy(colinds_[myRow].begin(),colinds_[myRow].end(),indices.begin());
00493   }
00494 
00495 
00496   template<class Ordinal, class Scalar>
00497   void CrsMatrix<Ordinal,Scalar>::getGlobalRowCopy(Ordinal globalRow, 
00498                                                    const Teuchos::ArrayView<Ordinal> &indices,
00499                                                    const Teuchos::ArrayView<Scalar>  &values) const
00500   {
00501     // TODO: is it possible to even call this one? we can't preallocate for the output buffer without knowing the size, 
00502     //       which (above) seems to require fillCompleted()
00503     // 
00504     // Only locally owned rows can be queried, otherwise complain
00505     TEST_FOR_EXCEPTION(!rowMap_.isMyGlobalIndex(globalRow), std::runtime_error,
00506         "Tpetra::CrsMatrix::getGlobalRowCOpy(globalRow): globalRow does not belong to this node.");
00507     Ordinal myRow = rowMap_.getLocalIndex(globalRow);
00508     TEST_FOR_EXCEPTION(
00509         Teuchos::as<typename Teuchos::Array<Ordinal>::size_type>(indices.size()) != colinds_[myRow].size() 
00510           || Teuchos::as<typename Teuchos::Array<Scalar>::size_type>(values.size()) != values_[myRow].size(), std::runtime_error, 
00511         "Tpetra::CrsMatrix::getGlobalRowCopy(indices,values): size of indices,values must be sufficient to store the values for the specified row.");
00512     std::copy(values_[myRow].begin(),values_[myRow].end(),values.begin());
00513     if (isFillCompleted())
00514     {
00515       // translate local IDs back to global IDs
00516       typename Teuchos::ArrayView<Ordinal>::iterator gind = indices.begin();
00517       for (typename Teuchos::Array<Ordinal>::const_iterator lind = colinds_[myRow].begin();
00518            lind != colinds_[myRow].end(); ++lind, ++gind) 
00519       {
00520         (*gind) = colMap_->getGlobalIndex(*lind);
00521       }
00522     }
00523     else
00524     {
00525       std::copy(colinds_[myRow].begin(),colinds_[myRow].end(),indices.begin());
00526     }
00527   }
00528 
00529 
00530   template<class Ordinal, class Scalar>
00531   void CrsMatrix<Ordinal,Scalar>::apply(const MultiVector<Ordinal,Scalar> &X, MultiVector<Ordinal,Scalar> &Y, Teuchos::ETransp mode) const
00532   {
00533     // TODO: add support for alpha,beta term coefficients: Y = alpha*A*X + beta*Y
00534     using Teuchos::null;
00535     using Teuchos::ArrayView;
00536     TEST_FOR_EXCEPTION(!isFillCompleted(), std::runtime_error, 
00537         "Tpetra::CrsMatrix: cannot call apply() until fillComplete() has been called.");
00538     TEST_FOR_EXCEPTION(X.numVectors() != Y.numVectors(), std::runtime_error,
00539         "Tpetra::CrsMatrix::apply(X,Y): X and Y must have the same number of vectors.");
00540 
00541     // FINISH: in debug mode, we need to establish that X and Y are compatible with (sameas?) DomainMap and RangeMap (respectively)
00542     // FINISH: or maybe not; I guess this should happen below in the calls to import(), export()
00543 
00544 #   ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00545     int myImageID = Teuchos::rank(*comm_);
00546     Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream();
00547     if (myImageID == 0) {
00548       *out << "Entering CrsMatrix::apply()" << std::endl
00549                 << "Column Map: " << std::endl;
00550     }
00551     *out << this->getColMap() << std::endl;
00552     if (myImageID == 0) {
00553       *out << "Initial input: " << std::endl;
00554     }
00555     X.print(*out); X.printValues(*out);
00556 #   endif
00557 
00558     Ordinal numVectors = X.numVectors();
00559     // because of Views, it is difficult to determine if X and Y point to the same data. 
00560     // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning)
00561     // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y
00562     Teuchos::RCP<const MultiVector<Ordinal,Scalar> > Xcopy;
00563     ArrayView<const ArrayView<const Scalar> > Xdata = X.extractConstView();
00564     ArrayView<const ArrayView<      Scalar> > Ydata = Y.extractView();
00565     if (&X==&Y && importer_==null && exporter_==null) {
00566 #     if defined(THROW_TPETRA_EFFICIENCY_WARNINGS) || defined(PRINT_TPETRA_EFFICIENCY_WARNINGS)
00567       std::string err;
00568       err += "Tpetra::CrsMatrix<" + Teuchos::TypeNameTraits<Ordinal>::name() + "," + Teuchos::TypeNameTraits<Ordinal>::name()
00569         + ">::apply(X,Y): If X and Y are the same, it necessitates a temporary copy of X, which is inefficient.";
00570 #     if defined(THROW_TPETRA_EFFICIENCY_WARNINGS)
00571       TEST_FOR_EXCEPTION(true, std::runtime_error, err);
00572 #     else
00573       std::cerr << err << std::endl;
00574 #     endif
00575 #     endif
00576       // generate a copy of X 
00577       Xcopy = Teuchos::rcp(new MultiVector<Ordinal,Scalar>(X));
00578       Xdata = Xcopy->extractConstView();
00579 #   ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00580       if (myImageID == 0) *out << "X and Y are co-located, duplicating X results in a stride copy" << std::endl;
00581       *out << this->getColMap() << std::endl;
00582       Xcopy->print(*out); Xcopy->printValues(*out);
00583 #   endif
00584     }
00585     if (importer_ != null) {
00586       if (importMV_ != null && importMV_->numVectors() != numVectors) importMV_ = null;
00587       if (importMV_ == null) {
00588         importMV_ = Teuchos::rcp( new MultiVector<Ordinal,Scalar>(*colMap_,numVectors) );
00589       }
00590     }
00591     if (exporter_ != null) {
00592       if (exportMV_ != null && exportMV_->numVectors() != numVectors) exportMV_ = null;
00593       if (exportMV_ == null) {
00594         exportMV_ = Teuchos::rcp( new MultiVector<Ordinal,Scalar>(rowMap_,numVectors) );
00595       }
00596     }
00597     // only support NO_TRANS currently
00598     TEST_FOR_EXCEPTION(mode != Teuchos::NO_TRANS, std::logic_error,
00599         "Tpetra::CrsMatrix::apply() does not currently support transposed multiplications.");
00600     if (mode == Teuchos::NO_TRANS) {
00601       // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
00602       if (importer_ != null) {
00603         importMV_->doImport(X, *importer_, INSERT);
00604         Xdata = importMV_->extractConstView();
00605 #   ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00606         if (myImageID == 0) {
00607           *out << "Performed import of X..." << std::endl;
00608         }
00609         importMV_->print(*out); importMV_->printValues(*out);
00610 #   endif
00611       }
00612       // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
00613       // We will compute solution into the to-be-exported MV; get a view
00614       if (exporter_ != null) {
00615         Ydata = exportMV_->extractView();
00616       }
00617       // Do actual computation
00618       if (numVectors==Teuchos::OrdinalTraits<Ordinal>::one()) {
00619         GeneralMV(Xdata[0],Ydata[0]);
00620       }
00621       else {
00622         GeneralMM(Xdata,Ydata);
00623       }
00624 #   ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00625       if (myImageID == 0) *out << "Matrix-MV product..." << std::endl;
00626       if (exportMV_ != null) {
00627         exportMV_->print(*out); exportMV_->printValues(*out);
00628       } else {
00629         Y.print(*out); Y.printValues(*out);
00630       }
00631 #   endif
00632       // do the export
00633       if (exporter_ != null) {
00634         Y.putScalar(0.0);  // Make sure target is zero: necessary because we are adding. may need adjusting for alpha,beta apply()
00635         Y.doExport(*exportMV_, *exporter_, ADD); // Fill Y with Values from export vector
00636 #   ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00637         if (myImageID == 0) *out << "Output vector after export()..." << std::endl;
00638         Y.print(*out); Y.printValues(*out);
00639 #   endif
00640       }
00641       // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
00642       if (Y.isDistributed() == false) {
00643         Y.reduce();
00644 #   ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00645         if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl;
00646         Y.print(*out); Y.printValues(*out);
00647 #   endif
00648       }
00649     }
00650   }
00651 
00652 
00653   template<class Ordinal, class Scalar>
00654   void CrsMatrix<Ordinal,Scalar>::print(std::ostream& os) const 
00655   {
00656     using std::endl;
00657     int myImageID = Teuchos::rank(*comm_);
00658     if (myImageID == 0)
00659     {
00660       os << "Tpetra::CrsMatrix, label = " << this->label() << endl;
00661       os << "Number of global rows    = " << getNumGlobalRows() << endl;
00662       if (isFillCompleted())
00663       {
00664         os << "Number of global columns    = " << getNumGlobalCols() << endl;
00665         os << "Status = fillCompleted" << endl;
00666         os << "Number of global nonzeros   = " << getNumGlobalNonzeros() << endl;
00667         os << "Global max nonzeros per row = " << getGlobalMaxNumEntries() << endl;
00668       }
00669       else
00670       {
00671         os << "Status = not fillCompleted" << endl;
00672       }
00673     }
00674     if (isFillCompleted())
00675     {
00676       for (int pid=0; pid < Teuchos::size(*comm_); ++pid)
00677       {
00678         if (pid == myImageID)
00679         {
00680           Teuchos::Array<Ordinal> indices(getMyMaxNumEntries());
00681           Teuchos::Array<Scalar>   values(getMyMaxNumEntries());
00682           os << "% Number of rows on image " << myImageID << " = " << getNumMyRows() << endl;
00683           for (Ordinal i=OT::zero(); i < getNumMyRows(); ++i)
00684           {
00685             Ordinal globalRow = rowMap_.getGlobalIndex(i);
00686             Ordinal rowSize = getNumRowEntries(globalRow);
00687             if (rowSize > Teuchos::OrdinalTraits<Ordinal>::zero()) {
00688               getGlobalRowCopy(globalRow, indices(0,rowSize), values(0,rowSize));
00689               for (Ordinal j=OT::zero(); j < rowSize; ++j) {
00690                 os << "Matrix(" << globalRow << ", " << indices[j] << ") = " << values[j] << endl;
00691               }
00692             }
00693           }
00694         }
00695         Teuchos::barrier(*comm_);
00696         Teuchos::barrier(*comm_);
00697         Teuchos::barrier(*comm_);
00698       }
00699     }
00700   }
00701 
00702 
00703 
00706   template<class Ordinal, class Scalar>
00707   void CrsMatrix<Ordinal,Scalar>::fillComplete()
00708   {
00709     using Teuchos::Array;
00710     TEST_FOR_EXCEPTION(isFillCompleted() == true, std::runtime_error,
00711       "Tpetra::CrsMatrix::fillComplete(): fillComplete() has already been called.");
00712 
00713     Teuchos::ArrayView<const Ordinal> myGlobalEntries = rowMap_.getMyGlobalEntries();
00714 
00715     // =============================== //
00716     // Part 0: send off-image elements //
00717     // =============================== //
00718     if (comm_->getSize() > 1) {
00719       globalAssemble();
00720     }
00721     else {
00722       TEST_FOR_EXCEPTION(nonlocals_.size() > 0, std::runtime_error,
00723           "Tpetra::CrsMatrix::fillComplete(): cannot have non-local entries on a serial run. Invalid entry was submitted to the CrsMatrix.");
00724     }
00725 
00726     // =============================== //
00727     // Part I: remove repeated indices //
00728     // =============================== //
00729     //
00730     // I load all matrix entries in a hash table, then I re-fill
00731     // the row with the last inserted value.
00732     // This also has the effect of sorting the indices.
00733     for (Ordinal r = OT::zero(); r < numMyRows_ ; ++r)
00734     {
00735       // put them in the map from colinds_,values_
00736       std::map<Ordinal,Scalar> row;
00737       {
00738         typename Array<Ordinal>::const_iterator cind = colinds_[r].begin();
00739         typename  Array<Scalar>::const_iterator val  = values_[r].begin();
00740         for (; cind != colinds_[r].end(); ++cind, ++val)
00741         {
00742           row[*cind] += *val;
00743         }
00744       }
00745       // get them out of the map, back to colinds_,values_
00746       typename std::map<Ordinal,Scalar>::size_type count = 0;
00747       for (typename std::map<Ordinal,Scalar>::iterator iter = row.begin(); 
00748            iter != row.end(); ++iter)
00749       {
00750         colinds_[r][count] = iter->first;
00751         values_[r][count] = iter->second;
00752         ++count;
00753       }
00754       colinds_[r].resize(count);
00755       values_[r].resize(count);
00756     }
00757 
00758     // =============================== //
00759     // Part II: build the column space //
00760     // =============================== //
00761     // construct a list of columns for which we have a non-zero, but are not locally owned by our row map
00762     std::set<Ordinal> nnzcols;
00763     for (Ordinal r=OT::zero(); r < numMyRows_ ; ++r)
00764     {
00765       for (typename Array<Ordinal>::const_iterator cind = colinds_[r].begin();
00766            cind != colinds_[r].end(); ++cind)
00767       {
00768         if (!rowMap_.isMyGlobalIndex(*cind))
00769         {
00770           nnzcols.insert(*cind);
00771         }
00772       }
00773     }
00774     Array<Ordinal> myPaddedGlobalEntries(myGlobalEntries.begin(),myGlobalEntries.end());
00775     for (typename std::set<Ordinal>::iterator iter = nnzcols.begin(); iter != nnzcols.end() ; ++iter)
00776     {
00777       myPaddedGlobalEntries.push_back(*iter);
00778     }
00779     colMap_ = Teuchos::rcp(new Map<Ordinal>(Teuchos::as<Ordinal>(-1), myPaddedGlobalEntries(),
00780                                             rowMap_.getIndexBase(), *rowMap_.getPlatform()) );
00781     importer_ = Teuchos::rcp( new Import<Ordinal>(rowMap_,*colMap_) );
00782 
00783     // ============================== //
00784     // Part IV: move to local indices //
00785     // ============================== //
00786     for (Ordinal r=OT::zero(); r < numMyRows_ ; ++r)
00787     {
00788       for (typename Array<Ordinal>::iterator cind = colinds_[r].begin();
00789            cind != colinds_[r].end(); ++cind) 
00790       {
00791         (*cind) = colMap_->getLocalIndex(*cind);
00792       }
00793     }
00794 
00795     // ================================================ //
00796     // Part V: compute some local and global quantities //
00797     // ================================================ //
00798     for (Ordinal r=OT::zero(); r < numMyRows_; ++r)
00799     {
00800       Ordinal numEntries = colinds_[r].size();
00801       myNNZ_ += numEntries;
00802       if ( std::find(colinds_[r].begin(),colinds_[r].end(),r) != colinds_[r].end() )
00803       {
00804         ++numMyDiags_;
00805       }
00806       myMaxNumEntries_ = std::max(myMaxNumEntries_,numEntries);
00807     }
00808     // TODO: consider eliminating one of these comms in favor of a pair
00809     Teuchos::reduceAll<Ordinal>(*comm_,Teuchos::REDUCE_SUM,myNNZ_,&globalNNZ_);
00810     Teuchos::reduceAll<Ordinal>(*comm_,Teuchos::REDUCE_SUM,numMyDiags_,&numGlobalDiags_);
00811     Teuchos::reduceAll<Ordinal>(*comm_,Teuchos::REDUCE_MAX,myMaxNumEntries_,&globalMaxNumEntries_);
00812 
00813     // mark transformation as successfully completed
00814     fillCompleted_ = true;
00815   }
00816 
00817 
00818 
00821   template<class Ordinal, class Scalar>
00822   void CrsMatrix<Ordinal,Scalar>::globalAssemble()
00823   {
00824     using Teuchos::arcp;
00825     using Teuchos::OrdinalTraits;
00826     using Teuchos::Array;
00827     using Teuchos::SerialDenseMatrix;
00828     using Teuchos::ArrayView;
00829     using Teuchos::ArrayRCP;
00830     using Teuchos::rcp;
00831     using std::list;
00832     using std::pair;
00833     using std::make_pair;
00834     typedef typename std::map<Ordinal,std::list<pair<Ordinal,Scalar> > >::const_iterator NLITER;
00835     int numImages = comm_->getSize();
00836     int myImageID = comm_->getRank();
00837     // Determine if any nodes have global entries to share
00838     Ordinal MyNonlocals = nonlocals_.size(),
00839             MaxGlobalNonlocals;
00840     Teuchos::reduceAll<Ordinal>(*comm_,Teuchos::REDUCE_MAX,MyNonlocals,&MaxGlobalNonlocals);
00841     if (MaxGlobalNonlocals == OT::zero()) return;  // no entries to share
00842 
00843     // compute a list of NLRs from nonlocals_ and use it to compute:
00844     //      IdsAndRows: a vector of (id,row) pairs
00845     //          NLR2Id: a map from NLR to the Id that owns it
00846     // globalNeighbors: a global graph of connectivity between images: globalNeighbors(i,j) indicates that j sends to i
00847     //         sendIDs: a list of all images I send to
00848     //         recvIDs: a list of all images I receive from (constructed later)
00849     Array<pair<Ordinal,Ordinal> > IdsAndRows;
00850     std::map<Ordinal,Ordinal> NLR2Id;
00851     SerialDenseMatrix<Ordinal,char> globalNeighbors;
00852     Array<Ordinal> sendIDs, recvIDs;
00853     {
00854       // nonlocals_ contains the entries we are holding for all non-local rows
00855       // we want a list of the rows for which we have data
00856       Array<Ordinal> NLRs;
00857       std::set<Ordinal> setOfRows;
00858       for (NLITER iter = nonlocals_.begin(); iter != nonlocals_.end(); ++iter)
00859       {
00860         setOfRows.insert(iter->first);
00861       }
00862       // copy the elements in the set into an Array
00863       NLRs.resize(setOfRows.size());
00864       std::copy(setOfRows.begin(), setOfRows.end(), NLRs.begin());
00865 
00866       // get a list of ImageIDs for the non-local rows (NLRs)
00867       Array<Ordinal> NLRIds(NLRs.size());
00868       {
00869         bool invalidGIDs = rowMap_.getRemoteIndexList(NLRs(),NLRIds());
00870         char lclerror = ( invalidGIDs ? OrdinalTraits<char>::one() : OrdinalTraits<char>::zero() );
00871         char gblerror;
00872         Teuchos::reduceAll(*comm_,Teuchos::REDUCE_MAX,lclerror,&gblerror);
00873         TEST_FOR_EXCEPTION(gblerror, std::runtime_error,
00874             "Tpetra::CrsMatrix::globalAssemble(): non-local entries correspond to invalid rows.");
00875       }
00876 
00877       // build up a list of neighbors, as well as a map between NLRs and Ids
00878       // localNeighbors[i] != 0 iff I have data to send to image i
00879       // put NLRs,Ids into an array of pairs
00880       IdsAndRows.reserve(NLRs.size());
00881       Array<char> localNeighbors(numImages,OrdinalTraits<char>::zero());
00882       for (typename Array<Ordinal>::const_iterator nlr = NLRs.begin(), id = NLRIds.begin();
00883           nlr != NLRs.end(); ++nlr, ++id) 
00884       {
00885         NLR2Id[*nlr] = *id;
00886         localNeighbors[*id] = OrdinalTraits<char>::one();
00887         IdsAndRows.push_back(make_pair(*id,*nlr));
00888       }
00889       for (Ordinal j=OT::zero(); j<numImages; ++j)
00890       {
00891         if (localNeighbors[j]) {
00892           sendIDs.push_back(j);
00893         }
00894       }
00895       // sort IdsAndRows, by Ids first, then rows
00896       std::sort(IdsAndRows.begin(),IdsAndRows.end());
00897       // gather from other nodes to form the full graph
00898       globalNeighbors.shapeUninitialized(numImages,numImages);
00899       Teuchos::gatherAll<Ordinal>(*comm_,numImages,localNeighbors.getRawPtr(),numImages*numImages,globalNeighbors.values());
00900       // globalNeighbors at this point contains (on all images) the
00901       // connectivity between the images. 
00902       // globalNeighbors(i,j) != 0 means that j sends to i/that i receives from j
00903     }
00904 
00906     // FIGURE OUT WHO IS SENDING TO WHOM AND HOW MUCH
00907     // DO THIS IN THE PROCESS OF PACKING ALL OUTGOING DATA ACCORDING TO DESTINATION ID
00909 
00910     // loop over all columns to know from which images I can expect to receive something
00911     for (Ordinal j=OT::zero(); j<numImages; ++j)
00912     {
00913       if (globalNeighbors(myImageID,j)) {
00914         recvIDs.push_back(j);
00915       }
00916     }
00917     Ordinal numRecvs = recvIDs.size();
00918 
00919     // we know how many we're sending to already
00920     // form a contiguous list of all data to be sent
00921     // track the number of entries for each ID
00922     Array<CrsIJV<Ordinal,Scalar> > IJVSendBuffer;
00923     Array<Ordinal> sendSizes(sendIDs.size(), 0);
00924     Ordinal numSends = 0;
00925     for (typename Array<pair<Ordinal,Ordinal> >::const_iterator IdAndRow = IdsAndRows.begin();
00926          IdAndRow != IdsAndRows.end(); ++IdAndRow) 
00927     {
00928       Ordinal  id = IdAndRow->first;
00929       Ordinal row = IdAndRow->second;
00930       // have we advanced to a new send?
00931       if (sendIDs[numSends] != id) {
00932         numSends++;
00933         TEST_FOR_EXCEPTION(sendIDs[numSends] != id, std::logic_error, "Tpetra::CrsMatrix::globalAssemble(): internal logic error. Contact Tpetra team.");
00934       }
00935       // copy data for row into contiguous storage
00936       for (typename list<pair<Ordinal,Scalar> >::const_iterator jv = nonlocals_[row].begin(); jv != nonlocals_[row].end(); ++jv)
00937       {
00938         IJVSendBuffer.push_back( CrsIJV<Ordinal,Scalar>(row,jv->first,jv->second) );
00939         sendSizes[numSends]++;
00940       }
00941     }
00942     if (IdsAndRows.size() > 0) {
00943       numSends++; // one last increment, to make it a count instead of an index
00944     }
00945     TEST_FOR_EXCEPTION(Teuchos::as<typename Array<Ordinal>::size_type>(numSends) != sendIDs.size(), std::logic_error, "Tpetra::CrsMatrix::globalAssemble(): internal logic error. Contact Tpetra team.");
00946 
00947     // don't need this data anymore
00948     nonlocals_.clear();
00949 
00951     // TRANSMIT SIZE INFO BETWEEN SENDERS AND RECEIVERS
00953     // perform non-blocking sends: send sizes to our recipients
00954     Array<Teuchos::RCP<Teuchos::CommRequest> > sendRequests;
00955     for (int s=0; s < numSends ; ++s)
00956     {
00957       // we'll fake the memory management, because all communication will be local to this method and the scope of our data
00958       sendRequests.push_back( Teuchos::isend<Ordinal,Ordinal>(*comm_,rcp(&sendSizes[s],false),sendIDs[s]) );
00959     }
00960     // perform non-blocking receives: receive sizes from our senders
00961     Array<Teuchos::RCP<Teuchos::CommRequest> > recvRequests;
00962     Array<Ordinal> recvSizes(numRecvs);
00963     for (int r=0; r < numRecvs; ++r) {
00964       // we'll fake the memory management, because all communication will be local to this method and the scope of our data
00965       recvRequests.push_back( Teuchos::ireceive(*comm_,rcp(&recvSizes[r],false),recvIDs[r]) );
00966     }
00967     // wait on all 
00968     if (!sendRequests.empty()) {
00969       Teuchos::waitAll(*comm_,sendRequests());
00970     }
00971     if (!recvRequests.empty()) {
00972       Teuchos::waitAll(*comm_,recvRequests());
00973     }
00974     Teuchos::barrier(*comm_);
00975     sendRequests.clear();
00976     recvRequests.clear();
00977 
00979     // NOW SEND/RECEIVE ALL ROW DATA
00981     // from the size info, build the ArrayViews into IJVSendBuffer
00982     Array<ArrayView<CrsIJV<Ordinal,Scalar> > > sendBuffers(numSends,Teuchos::null);
00983     {
00984       Ordinal cur = 0;
00985       for (int s=0; s<numSends; ++s) {
00986         sendBuffers[s] = IJVSendBuffer(cur,sendSizes[s]);
00987         cur += sendSizes[s];
00988       }
00989     }
00990     // perform non-blocking sends
00991     for (int s=0; s < numSends ; ++s)
00992     {
00993       // we'll fake the memory management, because all communication will be local to this method and the scope of our data
00994       ArrayRCP<CrsIJV<Ordinal,Scalar> > tmparcp = arcp(sendBuffers[s].getRawPtr(),OT::zero(),sendBuffers[s].size(),false);
00995       sendRequests.push_back( Teuchos::isend<Ordinal,CrsIJV<Ordinal,Scalar> >(*comm_,tmparcp,sendIDs[s]) );
00996     }
00997     // calculate amount of storage needed for receives
00998     // setup pointers for the receives as well
00999     Ordinal totalRecvSize = std::accumulate(recvSizes.begin(),recvSizes.end(),OT::zero());
01000     Array<CrsIJV<Ordinal,Scalar> > IJVRecvBuffer(totalRecvSize);
01001     // from the size info, build the ArrayViews into IJVRecvBuffer
01002     Array<ArrayView<CrsIJV<Ordinal,Scalar> > > recvBuffers(numRecvs,Teuchos::null);
01003     {
01004       Ordinal cur = 0;
01005       for (int r=0; r<numRecvs; ++r) {
01006         recvBuffers[r] = IJVRecvBuffer(cur,recvSizes[r]);
01007         cur += recvSizes[r];
01008       }
01009     }
01010     // perform non-blocking recvs
01011     for (int r=0; r < numRecvs ; ++r)
01012     {
01013       // we'll fake the memory management, because all communication will be local to this method and the scope of our data
01014       ArrayRCP<CrsIJV<Ordinal,Scalar> > tmparcp = arcp(recvBuffers[r].getRawPtr(),OT::zero(),recvBuffers[r].size(),false);
01015       recvRequests.push_back( Teuchos::ireceive<Ordinal,CrsIJV<Ordinal,Scalar> >(*comm_,tmparcp,recvIDs[r]) );
01016     }
01017     // perform waits
01018     if (!sendRequests.empty()) {
01019       Teuchos::waitAll(*comm_,sendRequests());
01020     }
01021     if (!recvRequests.empty()) {
01022       Teuchos::waitAll(*comm_,recvRequests());
01023     }
01024     Teuchos::barrier(*comm_);
01025     sendRequests.clear();
01026     recvRequests.clear();
01027 
01028 
01030     // NOW PROCESS THE RECEIVED ROW DATA
01032     for (typename Array<CrsIJV<Ordinal,Scalar> >::const_iterator ijv = IJVRecvBuffer.begin(); ijv != IJVRecvBuffer.end(); ++ijv)
01033     {
01034       submitEntry(ijv->i, ijv->j, ijv->v);
01035     }
01036 
01037     // WHEW! THAT WAS TIRING!
01038   }
01039 
01040 
01041   template<class Ordinal, class Scalar>
01042   void CrsMatrix<Ordinal,Scalar>::GeneralMV(const Teuchos::ArrayView<const Scalar> &x, const Teuchos::ArrayView<Scalar> &y) const
01043   {
01044     typedef Teuchos::ScalarTraits<Scalar> ST;
01045     typename Teuchos::Array<Ordinal>::const_iterator col;
01046     typename Teuchos::Array<Scalar >::const_iterator val;
01047     for (Ordinal r=0; r < numMyRows_; ++r) {
01048       col = colinds_[r].begin(); 
01049       val =  values_[r].begin(); 
01050       Scalar sum = ST::zero();
01051       for (; col != colinds_[r].end(); ++col, ++val) {
01052         sum += (*val) * x[*col];
01053       }
01054       y[r] = sum;
01055     }
01056   }
01057 
01058 
01059   template<class Ordinal, class Scalar>
01060   void CrsMatrix<Ordinal,Scalar>::GeneralMM(const Teuchos::ArrayView<const Teuchos::ArrayView<const Scalar> > &X, const Teuchos::ArrayView<const Teuchos::ArrayView<Scalar> > &Y) const
01061   {
01062     typedef Teuchos::ScalarTraits<Scalar> ST;
01063     Ordinal numVectors = X.size();
01064     typename Teuchos::Array<Ordinal>::const_iterator col;
01065     typename Teuchos::Array<Scalar >::const_iterator val;
01066     for (Ordinal r=0; r < numMyRows_; ++r) {
01067       for (int j=0; j<numVectors; ++j) {
01068         const Teuchos::ArrayView<const Scalar> &xvals = X[j]; 
01069         col = colinds_[r].begin(); 
01070         val = values_[r].begin(); 
01071         Scalar sum = ST::zero();
01072         for (; col != colinds_[r].end(); ++col, ++val) {
01073           sum += (*val) * xvals[*col];
01074         }
01075         Y[j][r] = sum;
01076       }
01077     }
01078   }
01079 
01080 
01081 } // namespace Tpetra
01082 
01083 #endif

Generated on Sat Mar 14 12:22:22 2009 for Tpetra Matrix/Vector Services by  doxygen 1.3.9.1