Actual source code: mpibdiag.c

  1: #define PETSCMAT_DLL

  3: /*
  4:    The basic matrix operations for the Block diagonal parallel 
  5:   matrices.
  6: */
 7:  #include src/mat/impls/bdiag/mpi/mpibdiag.h

 11: PetscErrorCode MatSetValues_MPIBDiag(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
 12: {
 13:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;
 15:   PetscInt       i,j,row,rstart = mbd->rstart,rend = mbd->rend;
 16:   PetscTruth     roworiented = mbd->roworiented;

 19:   for (i=0; i<m; i++) {
 20:     if (idxm[i] < 0) continue;
 21:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 22:     if (idxm[i] >= rstart && idxm[i] < rend) {
 23:       row = idxm[i] - rstart;
 24:       for (j=0; j<n; j++) {
 25:         if (idxn[j] < 0) continue;
 26:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 27:         if (roworiented) {
 28:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
 29:         } else {
 30:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 31:         }
 32:       }
 33:     } else {
 34:       if (!mbd->donotstash) {
 35:         if (roworiented) {
 36:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 37:         } else {
 38:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 39:         }
 40:       }
 41:     }
 42:   }
 43:   return(0);
 44: }

 48: PetscErrorCode MatGetValues_MPIBDiag(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
 49: {
 50:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;
 52:   PetscInt       i,j,row,rstart = mbd->rstart,rend = mbd->rend;

 55:   for (i=0; i<m; i++) {
 56:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 57:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 58:     if (idxm[i] >= rstart && idxm[i] < rend) {
 59:       row = idxm[i] - rstart;
 60:       for (j=0; j<n; j++) {
 61:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 62:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 63:         MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
 64:       }
 65:     } else {
 66:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 67:     }
 68:   }
 69:   return(0);
 70: }

 74: PetscErrorCode MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
 75: {
 76:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;
 77:   MPI_Comm       comm = mat->comm;
 79:   PetscInt       nstash,reallocs;
 80:   InsertMode     addv;

 83:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
 84:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
 85:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
 86:   }
 87:   mat->insertmode = addv; /* in case this processor had no cache */
 88:   MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
 89:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
 90:   PetscLogInfo((0,"MatAssemblyBegin_MPIBDiag:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));
 91:   return(0);
 92: }

 96: PetscErrorCode MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
 97: {
 98:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;
 99:   Mat_SeqBDiag   *mlocal;
101:   PetscMPIInt    n;
102:   PetscInt       i,*row,*col;
103:   PetscInt       *tmp1,*tmp2,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
104:   PetscScalar    *val;
105:   InsertMode     addv = mat->insertmode;


109:   while (1) {
110:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
111:     if (!flg) break;
112: 
113:     for (i=0; i<n;) {
114:       /* Now identify the consecutive vals belonging to the same row */
115:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
116:       if (j < n) ncols = j-i;
117:       else       ncols = n-i;
118:       /* Now assemble all these values with a single function call */
119:       MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
120:       i = j;
121:     }
122:   }
123:   MatStashScatterEnd_Private(&mat->stash);

125:   MatAssemblyBegin(mbd->A,mode);
126:   MatAssemblyEnd(mbd->A,mode);

128:   /* Fix main diagonal location and determine global diagonals */
129:   mlocal         = (Mat_SeqBDiag*)mbd->A->data;
130:   Mblock         = mat->M/mat->bs; Nblock = mat->N/mat->bs;
131:   len            = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
132:   PetscMalloc(2*len*sizeof(PetscInt),&tmp1);
133:   tmp2           = tmp1 + len;
134:   PetscMemzero(tmp1,2*len*sizeof(PetscInt));
135:   mlocal->mainbd = -1;
136:   for (i=0; i<mlocal->nd; i++) {
137:     if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
138:     tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
139:   }
140:   MPI_Allreduce(tmp1,tmp2,len,MPIU_INT,MPI_SUM,mat->comm);
141:   ict  = 0;
142:   for (i=0; i<len; i++) {
143:     if (tmp2[i]) {
144:       mbd->gdiag[ict] = i - Mblock;
145:       ict++;
146:     }
147:   }
148:   mbd->gnd = ict;
149:   PetscFree(tmp1);

151:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
152:     MatSetUpMultiply_MPIBDiag(mat);
153:   }
154:   return(0);
155: }

159: PetscErrorCode MatZeroEntries_MPIBDiag(Mat A)
160: {
161:   Mat_MPIBDiag   *l = (Mat_MPIBDiag*)A->data;

165:   MatZeroEntries(l->A);
166:   return(0);
167: }

169: /* again this uses the same basic stratagy as in the assembly and 
170:    scatter create routines, we should try to do it systematically 
171:    if we can figure out the proper level of generality. */

173: /* the code does not do the diagonal entries correctly unless the 
174:    matrix is square and the column and row owerships are identical.
175:    This is a BUG. The only way to fix it seems to be to access 
176:    aij->A and aij->B directly and not through the MatZeroRows() 
177:    routine. 
178: */

182: PetscErrorCode MatZeroRows_MPIBDiag(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
183: {
184:   Mat_MPIBDiag   *l = (Mat_MPIBDiag*)A->data;
186:   PetscMPIInt    n,imdex,size = l->size,rank = l->rank,tag = A->tag;
187:   PetscInt       i,*owners = l->rowners;
188:   PetscInt       *nprocs,j,idx,nsends;
189:   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
190:   PetscInt       *rvalues,count,base,slen,*source;
191:   PetscInt       *lens,*lrows,*values;
192:   MPI_Comm       comm = A->comm;
193:   MPI_Request    *send_waits,*recv_waits;
194:   MPI_Status     recv_status,*send_status;
195:   PetscTruth     found;

198:   /*  first count number of contributors to each processor */
199:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
200:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
201:   PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
202:   for (i=0; i<N; i++) {
203:     idx = rows[i];
204:     found = PETSC_FALSE;
205:     for (j=0; j<size; j++) {
206:       if (idx >= owners[j] && idx < owners[j+1]) {
207:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
208:       }
209:     }
210:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
211:   }
212:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

214:   /* inform other processors of number of messages and max length*/
215:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

217:   /* post receives:   */
218:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
219:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
220:   for (i=0; i<nrecvs; i++) {
221:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
222:   }

224:   /* do sends:
225:       1) starts[i] gives the starting index in svalues for stuff going to 
226:          the ith processor
227:   */
228:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
229:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
230:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
231:   starts[0] = 0;
232:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
233:   for (i=0; i<N; i++) {
234:     svalues[starts[owner[i]]++] = rows[i];
235:   }

237:   starts[0] = 0;
238:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
239:   count = 0;
240:   for (i=0; i<size; i++) {
241:     if (nprocs[2*i+1]) {
242:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
243:     }
244:   }
245:   PetscFree(starts);

247:   base = owners[rank];

249:   /*  wait on receives */
250:   PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
251:   source = lens + nrecvs;
252:   count  = nrecvs;
253:   slen   = 0;
254:   while (count) {
255:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
256:     /* unpack receives into our local space */
257:     MPI_Get_count(&recv_status,MPIU_INT,&n);
258:     source[imdex]  = recv_status.MPI_SOURCE;
259:     lens[imdex]  = n;
260:     slen += n;
261:     count--;
262:   }
263:   PetscFree(recv_waits);
264: 
265:   /* move the data into the send scatter */
266:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
267:   count = 0;
268:   for (i=0; i<nrecvs; i++) {
269:     values = rvalues + i*nmax;
270:     for (j=0; j<lens[i]; j++) {
271:       lrows[count++] = values[j] - base;
272:     }
273:   }
274:   PetscFree(rvalues);
275:   PetscFree(lens);
276:   PetscFree(owner);
277:   PetscFree(nprocs);
278: 
279:   /* actually zap the local rows */
280:   MatZeroRows(l->A,slen,lrows,diag);
281:   PetscFree(lrows);

283:   /* wait on sends */
284:   if (nsends) {
285:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
286:     MPI_Waitall(nsends,send_waits,send_status);
287:     PetscFree(send_status);
288:   }
289:   PetscFree(send_waits);
290:   PetscFree(svalues);

292:   return(0);
293: }

297: PetscErrorCode MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
298: {
299:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;

303:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
304:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
305:   (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
306:   return(0);
307: }

311: PetscErrorCode MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
312: {
313:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;

317:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
318:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
319:   (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
320:   return(0);
321: }

325: PetscErrorCode MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
326: {
327:   Mat_MPIBDiag   *a = (Mat_MPIBDiag*)A->data;
329:   PetscScalar    zero = 0.0;

332:   VecSet(yy,zero);
333:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
334:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
335:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
336:   return(0);
337: }

341: PetscErrorCode MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
342: {
343:   Mat_MPIBDiag   *a = (Mat_MPIBDiag*)A->data;

347:   VecCopy(yy,zz);
348:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
349:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
350:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
351:   return(0);
352: }

356: PetscErrorCode MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
357: {
358:   Mat_MPIBDiag   *mat = (Mat_MPIBDiag*)matin->data;
360:   PetscReal      isend[5],irecv[5];

363:   info->block_size     = (PetscReal)mat->A->bs;
364:   MatGetInfo(mat->A,MAT_LOCAL,info);
365:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
366:   isend[3] = info->memory;  isend[4] = info->mallocs;
367:   if (flag == MAT_LOCAL) {
368:     info->nz_used      = isend[0];
369:     info->nz_allocated = isend[1];
370:     info->nz_unneeded  = isend[2];
371:     info->memory       = isend[3];
372:     info->mallocs      = isend[4];
373:   } else if (flag == MAT_GLOBAL_MAX) {
374:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
375:     info->nz_used      = irecv[0];
376:     info->nz_allocated = irecv[1];
377:     info->nz_unneeded  = irecv[2];
378:     info->memory       = irecv[3];
379:     info->mallocs      = irecv[4];
380:   } else if (flag == MAT_GLOBAL_SUM) {
381:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
382:     info->nz_used      = irecv[0];
383:     info->nz_allocated = irecv[1];
384:     info->nz_unneeded  = irecv[2];
385:     info->memory       = irecv[3];
386:     info->mallocs      = irecv[4];
387:   }
388:   info->rows_global    = (double)matin->M;
389:   info->columns_global = (double)matin->N;
390:   info->rows_local     = (double)matin->m;
391:   info->columns_local  = (double)matin->N;
392:   return(0);
393: }

397: PetscErrorCode MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
398: {
400:   Mat_MPIBDiag   *A = (Mat_MPIBDiag*)mat->data;

403:   MatGetDiagonal(A->A,v);
404:   return(0);
405: }

409: PetscErrorCode MatDestroy_MPIBDiag(Mat mat)
410: {
411:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;
413: #if defined(PETSC_USE_LOG)
414:   Mat_SeqBDiag   *ms = (Mat_SeqBDiag*)mbd->A->data;

417:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D, BSize=%D, NDiag=%D",mat->M,mat->N,mat->bs,ms->nd);
418: #else
420: #endif
421:   MatStashDestroy_Private(&mat->stash);
422:   PetscFree(mbd->rowners);
423:   PetscFree(mbd->gdiag);
424:   MatDestroy(mbd->A);
425:   if (mbd->lvec) {VecDestroy(mbd->lvec);}
426:   if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
427:   PetscFree(mbd);
428:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
429:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIBDiagSetPreallocation_C","",PETSC_NULL);
430:   return(0);
431: }


436: static PetscErrorCode MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
437: {
438:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)mat->data;

442:   if (mbd->size == 1) {
443:     MatView(mbd->A,viewer);
444:   } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
445:   return(0);
446: }

450: static PetscErrorCode MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
451: {
452:   Mat_MPIBDiag      *mbd = (Mat_MPIBDiag*)mat->data;
453:   PetscErrorCode    ierr;
454:   PetscMPIInt       size = mbd->size,rank = mbd->rank;
455:   PetscInt          i;
456:   PetscTruth        iascii,isdraw;
457:   PetscViewer       sviewer;
458:   PetscViewerFormat format;

461:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
462:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
463:   if (iascii) {
464:     PetscViewerGetFormat(viewer,&format);
465:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
466:       PetscInt nline = PetscMin(10,mbd->gnd),k,nk,np;
467:       PetscViewerASCIIPrintf(viewer,"  block size=%D, total number of diagonals=%D\n",mat->bs,mbd->gnd);
468:       nk = (mbd->gnd-1)/nline + 1;
469:       for (k=0; k<nk; k++) {
470:         PetscViewerASCIIPrintf(viewer,"  global diag numbers:");
471:         np = PetscMin(nline,mbd->gnd - nline*k);
472:         for (i=0; i<np; i++) {
473:           PetscViewerASCIIPrintf(viewer,"  %D",mbd->gdiag[i+nline*k]);
474:         }
475:         PetscViewerASCIIPrintf(viewer,"\n");
476:       }
477:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
478:         MatInfo info;
479:         MPI_Comm_rank(mat->comm,&rank);
480:         MatGetInfo(mat,MAT_LOCAL,&info);
481:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
482:             (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
483:         PetscViewerFlush(viewer);
484:         VecScatterView(mbd->Mvctx,viewer);
485:       }
486:       return(0);
487:     }
488:   }

490:   if (isdraw) {
491:     PetscDraw       draw;
492:     PetscTruth isnull;
493:     PetscViewerDrawGetDraw(viewer,0,&draw);
494:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
495:   }

497:   if (size == 1) {
498:     MatView(mbd->A,viewer);
499:   } else {
500:     /* assemble the entire matrix onto first processor. */
501:     Mat          A;
502:     PetscInt     M = mat->M,N = mat->N,m,row,nz,*cols;
503:     PetscScalar  *vals;

505:     /* Here we are constructing a temporary matrix, so we will explicitly set the type to MPIBDiag */
506:     MatCreate(mat->comm,&A);
507:     if (!rank) {
508:       MatSetSizes(A,M,N,M,N);
509:       MatSetType(A,MATMPIBDIAG);
510:       MatMPIBDiagSetPreallocation(A,mbd->gnd,mbd->A->bs,mbd->gdiag,PETSC_NULL);
511:     } else {
512:       MatSetSizes(A,0,0,M,N);
513:       MatSetType(A,MATMPIBDIAG);
514:       MatMPIBDiagSetPreallocation(A,0,mbd->A->bs,PETSC_NULL,PETSC_NULL);
515:     }
516:     PetscLogObjectParent(mat,A);

518:     /* Copy the matrix ... This isn't the most efficient means,
519:        but it's quick for now */
520:     row = mbd->rstart;
521:     m = mbd->A->m;
522:     for (i=0; i<m; i++) {
523:       MatGetRow_MPIBDiag(mat,row,&nz,&cols,&vals);
524:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
525:       MatRestoreRow_MPIBDiag(mat,row,&nz,&cols,&vals);
526:       row++;
527:     }
528:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
529:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
530:     PetscViewerGetSingleton(viewer,&sviewer);
531:     if (!rank) {
532:       MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
533:     }
534:     PetscViewerRestoreSingleton(viewer,&sviewer);
535:     PetscViewerFlush(viewer);
536:     MatDestroy(A);
537:   }
538:   return(0);
539: }

543: PetscErrorCode MatView_MPIBDiag(Mat mat,PetscViewer viewer)
544: {
546:   PetscTruth     iascii,isdraw,isbinary;

549:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
550:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
551:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
552:   if (iascii || isdraw) {
553:     MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
554:   } else if (isbinary) {
555:     MatView_MPIBDiag_Binary(mat,viewer);
556:   } else {
557:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
558:   }
559:   return(0);
560: }

564: PetscErrorCode MatSetOption_MPIBDiag(Mat A,MatOption op)
565: {
566:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)A->data;

569:   switch (op) {
570:   case MAT_NO_NEW_NONZERO_LOCATIONS:
571:   case MAT_YES_NEW_NONZERO_LOCATIONS:
572:   case MAT_NEW_NONZERO_LOCATION_ERR:
573:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
574:   case MAT_NO_NEW_DIAGONALS:
575:   case MAT_YES_NEW_DIAGONALS:
576:     MatSetOption(mbd->A,op);
577:     break;
578:   case MAT_ROW_ORIENTED:
579:     mbd->roworiented = PETSC_TRUE;
580:     MatSetOption(mbd->A,op);
581:     break;
582:   case MAT_COLUMN_ORIENTED:
583:     mbd->roworiented = PETSC_FALSE;
584:     MatSetOption(mbd->A,op);
585:     break;
586:   case MAT_IGNORE_OFF_PROC_ENTRIES:
587:     mbd->donotstash = PETSC_TRUE;
588:     break;
589:   case MAT_ROWS_SORTED:
590:   case MAT_ROWS_UNSORTED:
591:   case MAT_COLUMNS_SORTED:
592:   case MAT_COLUMNS_UNSORTED:
593:     PetscLogInfo((A,"MatSetOption_MPIBDiag:Option ignored\n"));
594:     break;
595:   case MAT_SYMMETRIC:
596:   case MAT_STRUCTURALLY_SYMMETRIC:
597:   case MAT_NOT_SYMMETRIC:
598:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
599:   case MAT_HERMITIAN:
600:   case MAT_NOT_HERMITIAN:
601:   case MAT_SYMMETRY_ETERNAL:
602:   case MAT_NOT_SYMMETRY_ETERNAL:
603:     break;
604:   default:
605:     SETERRQ(PETSC_ERR_SUP,"unknown option");
606:   }
607:   return(0);
608: }


613: PetscErrorCode MatGetRow_MPIBDiag(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
614: {
615:   Mat_MPIBDiag   *mat = (Mat_MPIBDiag*)matin->data;
617:   PetscInt       lrow;

620:   if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
621:   lrow = row - mat->rstart;
622:   MatGetRow_SeqBDiag(mat->A,lrow,nz,idx,v);
623:   return(0);
624: }

628: PetscErrorCode MatRestoreRow_MPIBDiag(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
629: {
630:   Mat_MPIBDiag   *mat = (Mat_MPIBDiag*)matin->data;
632:   PetscInt       lrow;

635:   lrow = row - mat->rstart;
636:   MatRestoreRow_SeqBDiag(mat->A,lrow,nz,idx,v);
637:   return(0);
638: }


643: PetscErrorCode MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
644: {
645:   Mat_MPIBDiag   *mbd = (Mat_MPIBDiag*)A->data;
646:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)mbd->A->data;
647:   PetscReal      sum = 0.0;
649:   PetscInt       d,i,nd = a->nd,bs = A->bs,len;
650:   PetscScalar    *dv;

653:   if (type == NORM_FROBENIUS) {
654:     for (d=0; d<nd; d++) {
655:       dv   = a->diagv[d];
656:       len  = a->bdlen[d]*bs*bs;
657:       for (i=0; i<len; i++) {
658: #if defined(PETSC_USE_COMPLEX)
659:         sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
660: #else
661:         sum += dv[i]*dv[i];
662: #endif
663:       }
664:     }
665:     MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
666:     *nrm = sqrt(*nrm);
667:     PetscLogFlops(2*A->n*A->m);
668:   } else if (type == NORM_1) { /* max column norm */
669:     PetscReal *tmp,*tmp2;
670:     PetscInt    j;
671:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
672:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
673:     MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
674:     *nrm = 0.0;
675:     MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
676:     for (j=0; j<mbd->A->n; j++) {
677:       if (tmp2[j] > *nrm) *nrm = tmp2[j];
678:     }
679:     PetscFree(tmp);
680:     PetscFree(tmp2);
681:   } else if (type == NORM_INFINITY) { /* max row norm */
682:     PetscReal normtemp;
683:     MatNorm(mbd->A,type,&normtemp);
684:     MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
685:   }
686:   return(0);
687: }

691: PetscErrorCode MatPrintHelp_MPIBDiag(Mat A)
692: {
693:   Mat_MPIBDiag   *a = (Mat_MPIBDiag*)A->data;

697:   if (!a->rank) {
698:     MatPrintHelp_SeqBDiag(a->A);
699:   }
700:   return(0);
701: }

705: PetscErrorCode MatScale_MPIBDiag(Mat A,PetscScalar alpha)
706: {
708:   Mat_MPIBDiag   *a = (Mat_MPIBDiag*)A->data;

711:   MatScale_SeqBDiag(a->A,alpha);
712:   return(0);
713: }

717: PetscErrorCode MatSetUpPreallocation_MPIBDiag(Mat A)
718: {

722:    MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
723:   return(0);
724: }

726: /* -------------------------------------------------------------------*/

728: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
729:        MatGetRow_MPIBDiag,
730:        MatRestoreRow_MPIBDiag,
731:        MatMult_MPIBDiag,
732: /* 4*/ MatMultAdd_MPIBDiag,
733:        MatMultTranspose_MPIBDiag,
734:        MatMultTransposeAdd_MPIBDiag,
735:        0,
736:        0,
737:        0,
738: /*10*/ 0,
739:        0,
740:        0,
741:        0,
742:        0,
743: /*15*/ MatGetInfo_MPIBDiag,
744:        0,
745:        MatGetDiagonal_MPIBDiag,
746:        0,
747:        MatNorm_MPIBDiag,
748: /*20*/ MatAssemblyBegin_MPIBDiag,
749:        MatAssemblyEnd_MPIBDiag,
750:        0,
751:        MatSetOption_MPIBDiag,
752:        MatZeroEntries_MPIBDiag,
753: /*25*/ MatZeroRows_MPIBDiag,
754:        0,
755:        0,
756:        0,
757:        0,
758: /*30*/ MatSetUpPreallocation_MPIBDiag,
759:        0,
760:        0,
761:        0,
762:        0,
763: /*35*/ 0,
764:        0,
765:        0,
766:        0,
767:        0,
768: /*40*/ 0,
769:        0,
770:        0,
771:        MatGetValues_MPIBDiag,
772:        0,
773: /*45*/ MatPrintHelp_MPIBDiag,
774:        MatScale_MPIBDiag,
775:        0,
776:        0,
777:        0,
778: /*50*/ 0,
779:        0,
780:        0,
781:        0,
782:        0,
783: /*55*/ 0,
784:        0,
785:        0,
786:        0,
787:        0,
788: /*60*/ 0,
789:        MatDestroy_MPIBDiag,
790:        MatView_MPIBDiag,
791:        MatGetPetscMaps_Petsc,
792:        0,
793: /*65*/ 0,
794:        0,
795:        0,
796:        0,
797:        0,
798: /*70*/ 0,
799:        0,
800:        0,
801:        0,
802:        0,
803: /*75*/ 0,
804:        0,
805:        0,
806:        0,
807:        0,
808: /*80*/ 0,
809:        0,
810:        0,
811:        0,
812:        MatLoad_MPIBDiag,
813: /*85*/ 0,
814:        0,
815:        0,
816:        0,
817:        0,
818: /*90*/ 0,
819:        0,
820:        0,
821:        0,
822:        0,
823: /*95*/ 0,
824:        0,
825:        0,
826:        0};

831: PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
832: {
833:   Mat_MPIBDiag   *matin = (Mat_MPIBDiag *)A->data;
835:   PetscInt       lrows,lcols,rstart,rend;
836:   IS             localc,localr;

839:   MatGetLocalSize(A,&lrows,&lcols);
840:   MatGetOwnershipRange(A,&rstart,&rend);
841:   ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
842:   ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
843:   MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
844:   ISDestroy(localr);
845:   ISDestroy(localc);

847:   *iscopy = PETSC_TRUE;
848:   return(0);
849: }

855: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBDiagSetPreallocation_MPIBDiag(Mat B,PetscInt nd,PetscInt bs,PetscInt *diag,PetscScalar **diagv)
856: {
857:   Mat_MPIBDiag   *b;
859:   PetscInt       i,k,*ldiag,len,nd2;
860:   PetscScalar    **ldiagv = 0;
861:   PetscTruth     flg2;

864:   B->preallocated = PETSC_TRUE;
865:   if (bs == PETSC_DEFAULT) bs = 1;
866:   if (nd == PETSC_DEFAULT) nd = 0;
867:   PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
868:   PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
869:   PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
870:   if (nd && !diag) {
871:     PetscMalloc(nd*sizeof(PetscInt),&diag);
872:     nd2  = nd;
873:     PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
874:     if (nd2 != nd) {
875:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
876:     }
877:   } else if (flg2) {
878:     SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
879:   }

881:   if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");

883:   PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
884:   PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);

886:   if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
887:   if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
888:   if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");
889:   B->bs = bs;

891:   /* the information in the maps duplicates the information computed below, eventually 
892:      we should remove the duplicate information that is not contained in the maps */
893:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
894:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);


897:   b          = (Mat_MPIBDiag*)B->data;
898:   b->gnd     = nd;

900:   MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);
901:   b->rowners[0] = 0;
902:   for (i=2; i<=b->size; i++) {
903:     b->rowners[i] += b->rowners[i-1];
904:   }
905:   b->rstart  = b->rowners[b->rank];
906:   b->rend    = b->rowners[b->rank+1];
907:   b->brstart = (b->rstart)/bs;
908:   b->brend   = (b->rend)/bs;


911:   /* Determine local diagonals; for now, assume global rows = global cols */
912:   /* These are sorted in MatCreateSeqBDiag */
913:   PetscMalloc((nd+1)*sizeof(PetscInt),&ldiag);
914:   len  = B->M/bs + B->N/bs + 1;
915:   PetscMalloc(len*sizeof(PetscInt),&b->gdiag);
916:   k    = 0;
917:   PetscLogObjectMemory(B,(nd+1)*sizeof(PetscInt) + (b->size+2)*sizeof(PetscInt) + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
918:   if (diagv) {
919:     PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
920:   }
921:   for (i=0; i<nd; i++) {
922:     b->gdiag[i] = diag[i];
923:     if (diag[i] > 0) { /* lower triangular */
924:       if (diag[i] < b->brend) {
925:         ldiag[k] = diag[i] - b->brstart;
926:         if (diagv) ldiagv[k] = diagv[i];
927:         k++;
928:       }
929:     } else { /* upper triangular */
930:       if (B->M/bs - diag[i] > B->N/bs) {
931:         if (B->M/bs + diag[i] > b->brstart) {
932:           ldiag[k] = diag[i] - b->brstart;
933:           if (diagv) ldiagv[k] = diagv[i];
934:           k++;
935:         }
936:       } else {
937:         if (B->M/bs > b->brstart) {
938:           ldiag[k] = diag[i] - b->brstart;
939:           if (diagv) ldiagv[k] = diagv[i];
940:           k++;
941:         }
942:       }
943:     }
944:   }

946:   /* Form local matrix */
947:   MatCreate(PETSC_COMM_SELF,&b->A);
948:   MatSetSizes(b->A,B->m,B->N,B->m,B->N);
949:   MatSetType(b->A,MATSEQBDIAG);
950:   MatSeqBDiagSetPreallocation(b->A,k,bs,ldiag,ldiagv);
951:   PetscLogObjectParent(B,b->A);
952:   PetscFree(ldiag);
953:   if (ldiagv) {PetscFree(ldiagv);}

955:   return(0);
956: }

959: /*MC
960:    MATMPIBDIAG - MATMPIBDIAG = "mpibdiag" - A matrix type to be used for distributed block diagonal matrices.

962:    Options Database Keys:
963: . -mat_type mpibdiag - sets the matrix type to "mpibdiag" during a call to MatSetFromOptions()

965:   Level: beginner

967: .seealso: MatCreateMPIBDiag
968: M*/

973: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBDiag(Mat B)
974: {
975:   Mat_MPIBDiag   *b;

979:   PetscNew(Mat_MPIBDiag,&b);
980:   B->data         = (void*)b;
981:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
982:   B->factor       = 0;
983:   B->mapping      = 0;

985:   B->insertmode = NOT_SET_VALUES;
986:   MPI_Comm_rank(B->comm,&b->rank);
987:   MPI_Comm_size(B->comm,&b->size);

989:   /* build local table of row ownerships */
990:   PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rowners);

992:   /* build cache for off array entries formed */
993:   MatStashCreate_Private(B->comm,1,&B->stash);
994:   b->donotstash = PETSC_FALSE;

996:   /* stuff used for matrix-vector multiply */
997:   b->lvec        = 0;
998:   b->Mvctx       = 0;

1000:   /* used for MatSetValues() input */
1001:   b->roworiented = PETSC_TRUE;

1003:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1004:                                      "MatGetDiagonalBlock_MPIBDiag",
1005:                                       MatGetDiagonalBlock_MPIBDiag);
1006:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBDiagSetPreallocation_C",
1007:                                      "MatMPIBDiagSetPreallocation_MPIBDiag",
1008:                                       MatMPIBDiagSetPreallocation_MPIBDiag);
1009:   return(0);
1010: }

1013: /*MC
1014:    MATBDIAG - MATBDIAG = "bdiag" - A matrix type to be used for block diagonal matrices.

1016:    This matrix type is identical to MATSEQBDIAG when constructed with a single process communicator,
1017:    and MATMPIBDIAG otherwise.

1019:    Options Database Keys:
1020: . -mat_type bdiag - sets the matrix type to "bdiag" during a call to MatSetFromOptions()

1022:   Level: beginner

1024: .seealso: MatCreateMPIBDiag,MATSEQBDIAG,MATMPIBDIAG
1025: M*/

1030: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BDiag(Mat A)
1031: {
1033:   PetscMPIInt   size;

1036:   PetscObjectChangeTypeName((PetscObject)A,MATBDIAG);
1037:   MPI_Comm_size(A->comm,&size);
1038:   if (size == 1) {
1039:     MatSetType(A,MATSEQBDIAG);
1040:   } else {
1041:     MatSetType(A,MATMPIBDIAG);
1042:   }
1043:   return(0);
1044: }

1049: /*@C
1050:    MatMPIBDiagSetPreallocation - 

1052:    Collective on Mat

1054:    Input Parameters:
1055: +  A - the matrix 
1056: .  nd - number of block diagonals (global) (optional)
1057: .  bs - each element of a diagonal is an bs x bs dense matrix
1058: .  diag - optional array of block diagonal numbers (length nd).
1059:    For a matrix element A[i,j], where i=row and j=column, the
1060:    diagonal number is
1061: $     diag = i/bs - j/bs  (integer division)
1062:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1063:    needed (expensive).
1064: -  diagv  - pointer to actual diagonals (in same order as diag array), 
1065:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1066:    to control memory allocation.


1069:    Options Database Keys:
1070: .  -mat_block_size <bs> - Sets blocksize
1071: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

1073:    Notes:
1074:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1075:    than it must be used on all processors that share the object for that argument.

1077:    The parallel matrix is partitioned across the processors by rows, where
1078:    each local rectangular matrix is stored in the uniprocessor block 
1079:    diagonal format.  See the users manual for further details.

1081:    The user MUST specify either the local or global numbers of rows
1082:    (possibly both).

1084:    The case bs=1 (conventional diagonal storage) is implemented as
1085:    a special case.

1087:    Fortran Notes:
1088:    Fortran programmers cannot set diagv; this variable is ignored.

1090:    Level: intermediate

1092: .keywords: matrix, block, diagonal, parallel, sparse

1094: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1095: @*/
1096: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBDiagSetPreallocation(Mat B,PetscInt nd,PetscInt bs,const PetscInt diag[],PetscScalar *diagv[])
1097: {
1098:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscScalar*[]);

1101:   PetscObjectQueryFunction((PetscObject)B,"MatMPIBDiagSetPreallocation_C",(void (**)(void))&f);
1102:   if (f) {
1103:     (*f)(B,nd,bs,diag,diagv);
1104:   }
1105:   return(0);
1106: }

1110: /*@C
1111:    MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.

1113:    Collective on MPI_Comm

1115:    Input Parameters:
1116: +  comm - MPI communicator
1117: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1118: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1119: .  N - number of columns (local and global)
1120: .  nd - number of block diagonals (global) (optional)
1121: .  bs - each element of a diagonal is an bs x bs dense matrix
1122: .  diag - optional array of block diagonal numbers (length nd).
1123:    For a matrix element A[i,j], where i=row and j=column, the
1124:    diagonal number is
1125: $     diag = i/bs - j/bs  (integer division)
1126:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1127:    needed (expensive).
1128: -  diagv  - pointer to actual diagonals (in same order as diag array), 
1129:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1130:    to control memory allocation.

1132:    Output Parameter:
1133: .  A - the matrix 

1135:    Options Database Keys:
1136: .  -mat_block_size <bs> - Sets blocksize
1137: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

1139:    Notes:
1140:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1141:    than it must be used on all processors that share the object for that argument.

1143:    The parallel matrix is partitioned across the processors by rows, where
1144:    each local rectangular matrix is stored in the uniprocessor block 
1145:    diagonal format.  See the users manual for further details.

1147:    The user MUST specify either the local or global numbers of rows
1148:    (possibly both).

1150:    The case bs=1 (conventional diagonal storage) is implemented as
1151:    a special case.

1153:    Fortran Notes:
1154:    Fortran programmers cannot set diagv; this variable is ignored.

1156:    Level: intermediate

1158: .keywords: matrix, block, diagonal, parallel, sparse

1160: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1161: @*/
1162: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBDiag(MPI_Comm comm,PetscInt m,PetscInt M,PetscInt N,PetscInt nd,PetscInt bs,const PetscInt diag[],PetscScalar *diagv[],Mat *A)
1163: {
1165:   PetscMPIInt    size;

1168:   MatCreate(comm,A);
1169:   MatSetSizes(*A,m,m,M,N);
1170:   MPI_Comm_size(comm,&size);
1171:   if (size > 1) {
1172:     MatSetType(*A,MATMPIBDIAG);
1173:     MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1174:   } else {
1175:     MatSetType(*A,MATSEQBDIAG);
1176:     MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1177:   }
1178:   return(0);
1179: }

1183: /*@C
1184:    MatBDiagGetData - Gets the data for the block diagonal matrix format.
1185:    For the parallel case, this returns information for the local submatrix.

1187:    Input Parameters:
1188: .  mat - the matrix, stored in block diagonal format.

1190:    Not Collective

1192:    Output Parameters:
1193: +  m - number of rows
1194: .  n - number of columns
1195: .  nd - number of block diagonals
1196: .  bs - each element of a diagonal is an bs x bs dense matrix
1197: .  bdlen - array of total block lengths of block diagonals
1198: .  diag - optional array of block diagonal numbers (length nd).
1199:    For a matrix element A[i,j], where i=row and j=column, the
1200:    diagonal number is
1201: $     diag = i/bs - j/bs  (integer division)
1202:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1203:    needed (expensive).
1204: -  diagv - pointer to actual diagonals (in same order as diag array), 

1206:    Level: advanced

1208:    Notes:
1209:    See the users manual for further details regarding this storage format.

1211: .keywords: matrix, block, diagonal, get, data

1213: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1214: @*/
1215: PetscErrorCode PETSCMAT_DLLEXPORT MatBDiagGetData(Mat mat,PetscInt *nd,PetscInt *bs,PetscInt *diag[],PetscInt *bdlen[],PetscScalar ***diagv)
1216: {
1217:   Mat_MPIBDiag   *pdmat;
1218:   Mat_SeqBDiag   *dmat = 0;
1219:   PetscTruth     isseq,ismpi;

1224:   PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1225:   PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1226:   if (isseq) {
1227:     dmat = (Mat_SeqBDiag*)mat->data;
1228:   } else if (ismpi) {
1229:     pdmat = (Mat_MPIBDiag*)mat->data;
1230:     dmat = (Mat_SeqBDiag*)pdmat->A->data;
1231:   } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1232:   *nd    = dmat->nd;
1233:   *bs    = mat->bs;
1234:   *diag  = dmat->diag;
1235:   *bdlen = dmat->bdlen;
1236:   *diagv = dmat->diagv;
1237:   return(0);
1238: }

1240:  #include petscsys.h

1244: PetscErrorCode MatLoad_MPIBDiag(PetscViewer viewer, MatType type,Mat *newmat)
1245: {
1246:   Mat            A;
1247:   PetscScalar    *vals,*svals;
1248:   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1249:   MPI_Status     status;
1251:   int            fd;
1252:   PetscMPIInt    tag = ((PetscObject)viewer)->tag,rank,size,*sndcounts = 0,*rowners,maxnz,mm;
1253:   PetscInt       bs,i,nz,j,rstart,rend,*cols;
1254:   PetscInt       header[4],*rowlengths = 0,M,N,m,Mbs;
1255:   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1256:   PetscInt       extra_rows;

1259:   MPI_Comm_size(comm,&size);
1260:   MPI_Comm_rank(comm,&rank);
1261:   if (!rank) {
1262:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1263:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1264:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1265:     if (header[3] < 0) {
1266:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1267:     }
1268:   }
1269:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1270:   M = header[1]; N = header[2];

1272:   bs = 1;   /* uses a block size of 1 by default; */
1273:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);

1275:   /* 
1276:      This code adds extra rows to make sure the number of rows is 
1277:      divisible by the blocksize
1278:   */
1279:   Mbs        = M/bs;
1280:   extra_rows = bs - M + bs*(Mbs);
1281:   if (extra_rows == bs) extra_rows = 0;
1282:   else                  Mbs++;
1283:   if (extra_rows && !rank) {
1284:     PetscLogInfo((0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksize\n"));
1285:   }

1287:   /* determine ownership of all rows */
1288:   m          = bs*(Mbs/size + ((Mbs % size) > rank));
1289:   PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1290:   mm         = (PetscMPIInt)m;
1291:   MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1292:   rowners[0] = 0;
1293:   for (i=2; i<=size; i++) {
1294:     rowners[i] += rowners[i-1];
1295:   }
1296:   rstart = rowners[rank];
1297:   rend   = rowners[rank+1];

1299:   /* distribute row lengths to all processors */
1300:   PetscMalloc((rend-rstart)*sizeof(PetscInt),&ourlens);
1301:   if (!rank) {
1302:     PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
1303:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1304:     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1305:     PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1306:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1307:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1308:     PetscFree(sndcounts);
1309:   } else {
1310:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1311:   }

1313:   if (!rank) {
1314:     /* calculate the number of nonzeros on each processor */
1315:     PetscMalloc(size*sizeof(PetscInt),&procsnz);
1316:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1317:     for (i=0; i<size; i++) {
1318:       for (j=rowners[i]; j<rowners[i+1]; j++) {
1319:         procsnz[i] += rowlengths[j];
1320:       }
1321:     }
1322:     PetscFree(rowlengths);

1324:     /* determine max buffer needed and allocate it */
1325:     maxnz = 0;
1326:     for (i=0; i<size; i++) {
1327:       maxnz = PetscMax(maxnz,procsnz[i]);
1328:     }
1329:     PetscMalloc(maxnz*sizeof(PetscInt),&cols);

1331:     /* read in my part of the matrix column indices  */
1332:     nz   = procsnz[0];
1333:     PetscMalloc(nz*sizeof(PetscInt),&mycols);
1334:     if (size == 1)  nz -= extra_rows;
1335:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1336:     if (size == 1)  for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }

1338:     /* read in every one elses and ship off */
1339:     for (i=1; i<size-1; i++) {
1340:       nz   = procsnz[i];
1341:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1342:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1343:     }
1344:     /* read in the stuff for the last proc */
1345:     if (size != 1) {
1346:       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1347:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1348:       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1349:       MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
1350:     }
1351:     PetscFree(cols);
1352:   } else {
1353:     /* determine buffer space needed for message */
1354:     nz = 0;
1355:     for (i=0; i<m; i++) {
1356:       nz += ourlens[i];
1357:     }
1358:     PetscMalloc(nz*sizeof(PetscInt),&mycols);

1360:     /* receive message of column indices*/
1361:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1362:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1363:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1364:   }

1366:   MatCreate(comm,newmat);
1367:   MatSetSizes(*newmat,m,m,M+extra_rows,N+extra_rows);
1368:   MatSetType(*newmat,type);
1369:   MatMPIBDiagSetPreallocation(*newmat,0,bs,PETSC_NULL,PETSC_NULL);
1370:   A = *newmat;

1372:   if (!rank) {
1373:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1375:     /* read in my part of the matrix numerical values  */
1376:     nz = procsnz[0];
1377:     if (size == 1)  nz -= extra_rows;
1378:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1379:     if (size == 1)  for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }

1381:     /* insert into matrix */
1382:     jj      = rstart;
1383:     smycols = mycols;
1384:     svals   = vals;
1385:     for (i=0; i<m; i++) {
1386:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1387:       smycols += ourlens[i];
1388:       svals   += ourlens[i];
1389:       jj++;
1390:     }

1392:     /* read in other processors (except the last one) and ship out */
1393:     for (i=1; i<size-1; i++) {
1394:       nz   = procsnz[i];
1395:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1396:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1397:     }
1398:     /* the last proc */
1399:     if (size != 1){
1400:       nz   = procsnz[i] - extra_rows;
1401:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1402:       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1403:       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1404:     }
1405:     PetscFree(procsnz);
1406:   } else {
1407:     /* receive numeric values */
1408:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1410:     /* receive message of values*/
1411:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1412:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1413:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1415:     /* insert into matrix */
1416:     jj      = rstart;
1417:     smycols = mycols;
1418:     svals   = vals;
1419:     for (i=0; i<m; i++) {
1420:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1421:       smycols += ourlens[i];
1422:       svals   += ourlens[i];
1423:       jj++;
1424:     }
1425:   }
1426:   PetscFree(ourlens);
1427:   PetscFree(vals);
1428:   PetscFree(mycols);
1429:   PetscFree(rowners);

1431:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1432:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1433:   return(0);
1434: }