Actual source code: mpibdiag.c
1: #define PETSCMAT_DLL
3: /*
4: The basic matrix operations for the Block diagonal parallel
5: matrices.
6: */
7: #include src/mat/impls/bdiag/mpi/mpibdiag.h
11: PetscErrorCode MatSetValues_MPIBDiag(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
12: {
13: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
15: PetscInt i,j,row,rstart = mbd->rstart,rend = mbd->rend;
16: PetscTruth roworiented = mbd->roworiented;
19: for (i=0; i<m; i++) {
20: if (idxm[i] < 0) continue;
21: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
22: if (idxm[i] >= rstart && idxm[i] < rend) {
23: row = idxm[i] - rstart;
24: for (j=0; j<n; j++) {
25: if (idxn[j] < 0) continue;
26: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
27: if (roworiented) {
28: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
29: } else {
30: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
31: }
32: }
33: } else {
34: if (!mbd->donotstash) {
35: if (roworiented) {
36: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
37: } else {
38: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
39: }
40: }
41: }
42: }
43: return(0);
44: }
48: PetscErrorCode MatGetValues_MPIBDiag(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
49: {
50: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
52: PetscInt i,j,row,rstart = mbd->rstart,rend = mbd->rend;
55: for (i=0; i<m; i++) {
56: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
57: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
58: if (idxm[i] >= rstart && idxm[i] < rend) {
59: row = idxm[i] - rstart;
60: for (j=0; j<n; j++) {
61: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
62: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
63: MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
64: }
65: } else {
66: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
67: }
68: }
69: return(0);
70: }
74: PetscErrorCode MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
75: {
76: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
77: MPI_Comm comm = mat->comm;
79: PetscInt nstash,reallocs;
80: InsertMode addv;
83: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
84: if (addv == (ADD_VALUES|INSERT_VALUES)) {
85: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
86: }
87: mat->insertmode = addv; /* in case this processor had no cache */
88: MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
89: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
90: PetscLogInfo((0,"MatAssemblyBegin_MPIBDiag:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));
91: return(0);
92: }
96: PetscErrorCode MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
97: {
98: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
99: Mat_SeqBDiag *mlocal;
101: PetscMPIInt n;
102: PetscInt i,*row,*col;
103: PetscInt *tmp1,*tmp2,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
104: PetscScalar *val;
105: InsertMode addv = mat->insertmode;
109: while (1) {
110: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
111: if (!flg) break;
112:
113: for (i=0; i<n;) {
114: /* Now identify the consecutive vals belonging to the same row */
115: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
116: if (j < n) ncols = j-i;
117: else ncols = n-i;
118: /* Now assemble all these values with a single function call */
119: MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
120: i = j;
121: }
122: }
123: MatStashScatterEnd_Private(&mat->stash);
125: MatAssemblyBegin(mbd->A,mode);
126: MatAssemblyEnd(mbd->A,mode);
128: /* Fix main diagonal location and determine global diagonals */
129: mlocal = (Mat_SeqBDiag*)mbd->A->data;
130: Mblock = mat->M/mat->bs; Nblock = mat->N/mat->bs;
131: len = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
132: PetscMalloc(2*len*sizeof(PetscInt),&tmp1);
133: tmp2 = tmp1 + len;
134: PetscMemzero(tmp1,2*len*sizeof(PetscInt));
135: mlocal->mainbd = -1;
136: for (i=0; i<mlocal->nd; i++) {
137: if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
138: tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
139: }
140: MPI_Allreduce(tmp1,tmp2,len,MPIU_INT,MPI_SUM,mat->comm);
141: ict = 0;
142: for (i=0; i<len; i++) {
143: if (tmp2[i]) {
144: mbd->gdiag[ict] = i - Mblock;
145: ict++;
146: }
147: }
148: mbd->gnd = ict;
149: PetscFree(tmp1);
151: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
152: MatSetUpMultiply_MPIBDiag(mat);
153: }
154: return(0);
155: }
159: PetscErrorCode MatZeroEntries_MPIBDiag(Mat A)
160: {
161: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
165: MatZeroEntries(l->A);
166: return(0);
167: }
169: /* again this uses the same basic stratagy as in the assembly and
170: scatter create routines, we should try to do it systematically
171: if we can figure out the proper level of generality. */
173: /* the code does not do the diagonal entries correctly unless the
174: matrix is square and the column and row owerships are identical.
175: This is a BUG. The only way to fix it seems to be to access
176: aij->A and aij->B directly and not through the MatZeroRows()
177: routine.
178: */
182: PetscErrorCode MatZeroRows_MPIBDiag(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
183: {
184: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
186: PetscMPIInt n,imdex,size = l->size,rank = l->rank,tag = A->tag;
187: PetscInt i,*owners = l->rowners;
188: PetscInt *nprocs,j,idx,nsends;
189: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
190: PetscInt *rvalues,count,base,slen,*source;
191: PetscInt *lens,*lrows,*values;
192: MPI_Comm comm = A->comm;
193: MPI_Request *send_waits,*recv_waits;
194: MPI_Status recv_status,*send_status;
195: PetscTruth found;
198: /* first count number of contributors to each processor */
199: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
200: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
201: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
202: for (i=0; i<N; i++) {
203: idx = rows[i];
204: found = PETSC_FALSE;
205: for (j=0; j<size; j++) {
206: if (idx >= owners[j] && idx < owners[j+1]) {
207: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
208: }
209: }
210: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
211: }
212: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
214: /* inform other processors of number of messages and max length*/
215: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
217: /* post receives: */
218: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
219: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
220: for (i=0; i<nrecvs; i++) {
221: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
222: }
224: /* do sends:
225: 1) starts[i] gives the starting index in svalues for stuff going to
226: the ith processor
227: */
228: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
229: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
230: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
231: starts[0] = 0;
232: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
233: for (i=0; i<N; i++) {
234: svalues[starts[owner[i]]++] = rows[i];
235: }
237: starts[0] = 0;
238: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
239: count = 0;
240: for (i=0; i<size; i++) {
241: if (nprocs[2*i+1]) {
242: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
243: }
244: }
245: PetscFree(starts);
247: base = owners[rank];
249: /* wait on receives */
250: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
251: source = lens + nrecvs;
252: count = nrecvs;
253: slen = 0;
254: while (count) {
255: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
256: /* unpack receives into our local space */
257: MPI_Get_count(&recv_status,MPIU_INT,&n);
258: source[imdex] = recv_status.MPI_SOURCE;
259: lens[imdex] = n;
260: slen += n;
261: count--;
262: }
263: PetscFree(recv_waits);
264:
265: /* move the data into the send scatter */
266: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
267: count = 0;
268: for (i=0; i<nrecvs; i++) {
269: values = rvalues + i*nmax;
270: for (j=0; j<lens[i]; j++) {
271: lrows[count++] = values[j] - base;
272: }
273: }
274: PetscFree(rvalues);
275: PetscFree(lens);
276: PetscFree(owner);
277: PetscFree(nprocs);
278:
279: /* actually zap the local rows */
280: MatZeroRows(l->A,slen,lrows,diag);
281: PetscFree(lrows);
283: /* wait on sends */
284: if (nsends) {
285: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
286: MPI_Waitall(nsends,send_waits,send_status);
287: PetscFree(send_status);
288: }
289: PetscFree(send_waits);
290: PetscFree(svalues);
292: return(0);
293: }
297: PetscErrorCode MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
298: {
299: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
303: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
304: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
305: (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
306: return(0);
307: }
311: PetscErrorCode MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
312: {
313: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
317: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
318: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
319: (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
320: return(0);
321: }
325: PetscErrorCode MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
326: {
327: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
329: PetscScalar zero = 0.0;
332: VecSet(yy,zero);
333: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
334: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
335: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
336: return(0);
337: }
341: PetscErrorCode MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
342: {
343: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
347: VecCopy(yy,zz);
348: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
349: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
350: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
351: return(0);
352: }
356: PetscErrorCode MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
357: {
358: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
360: PetscReal isend[5],irecv[5];
363: info->block_size = (PetscReal)mat->A->bs;
364: MatGetInfo(mat->A,MAT_LOCAL,info);
365: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
366: isend[3] = info->memory; isend[4] = info->mallocs;
367: if (flag == MAT_LOCAL) {
368: info->nz_used = isend[0];
369: info->nz_allocated = isend[1];
370: info->nz_unneeded = isend[2];
371: info->memory = isend[3];
372: info->mallocs = isend[4];
373: } else if (flag == MAT_GLOBAL_MAX) {
374: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
375: info->nz_used = irecv[0];
376: info->nz_allocated = irecv[1];
377: info->nz_unneeded = irecv[2];
378: info->memory = irecv[3];
379: info->mallocs = irecv[4];
380: } else if (flag == MAT_GLOBAL_SUM) {
381: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
382: info->nz_used = irecv[0];
383: info->nz_allocated = irecv[1];
384: info->nz_unneeded = irecv[2];
385: info->memory = irecv[3];
386: info->mallocs = irecv[4];
387: }
388: info->rows_global = (double)matin->M;
389: info->columns_global = (double)matin->N;
390: info->rows_local = (double)matin->m;
391: info->columns_local = (double)matin->N;
392: return(0);
393: }
397: PetscErrorCode MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
398: {
400: Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;
403: MatGetDiagonal(A->A,v);
404: return(0);
405: }
409: PetscErrorCode MatDestroy_MPIBDiag(Mat mat)
410: {
411: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
413: #if defined(PETSC_USE_LOG)
414: Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;
417: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D, BSize=%D, NDiag=%D",mat->M,mat->N,mat->bs,ms->nd);
418: #else
420: #endif
421: MatStashDestroy_Private(&mat->stash);
422: PetscFree(mbd->rowners);
423: PetscFree(mbd->gdiag);
424: MatDestroy(mbd->A);
425: if (mbd->lvec) {VecDestroy(mbd->lvec);}
426: if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
427: PetscFree(mbd);
428: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
429: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBDiagSetPreallocation_C","",PETSC_NULL);
430: return(0);
431: }
436: static PetscErrorCode MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
437: {
438: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
442: if (mbd->size == 1) {
443: MatView(mbd->A,viewer);
444: } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
445: return(0);
446: }
450: static PetscErrorCode MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
451: {
452: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
453: PetscErrorCode ierr;
454: PetscMPIInt size = mbd->size,rank = mbd->rank;
455: PetscInt i;
456: PetscTruth iascii,isdraw;
457: PetscViewer sviewer;
458: PetscViewerFormat format;
461: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
462: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
463: if (iascii) {
464: PetscViewerGetFormat(viewer,&format);
465: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
466: PetscInt nline = PetscMin(10,mbd->gnd),k,nk,np;
467: PetscViewerASCIIPrintf(viewer," block size=%D, total number of diagonals=%D\n",mat->bs,mbd->gnd);
468: nk = (mbd->gnd-1)/nline + 1;
469: for (k=0; k<nk; k++) {
470: PetscViewerASCIIPrintf(viewer," global diag numbers:");
471: np = PetscMin(nline,mbd->gnd - nline*k);
472: for (i=0; i<np; i++) {
473: PetscViewerASCIIPrintf(viewer," %D",mbd->gdiag[i+nline*k]);
474: }
475: PetscViewerASCIIPrintf(viewer,"\n");
476: }
477: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
478: MatInfo info;
479: MPI_Comm_rank(mat->comm,&rank);
480: MatGetInfo(mat,MAT_LOCAL,&info);
481: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
482: (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
483: PetscViewerFlush(viewer);
484: VecScatterView(mbd->Mvctx,viewer);
485: }
486: return(0);
487: }
488: }
490: if (isdraw) {
491: PetscDraw draw;
492: PetscTruth isnull;
493: PetscViewerDrawGetDraw(viewer,0,&draw);
494: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
495: }
497: if (size == 1) {
498: MatView(mbd->A,viewer);
499: } else {
500: /* assemble the entire matrix onto first processor. */
501: Mat A;
502: PetscInt M = mat->M,N = mat->N,m,row,nz,*cols;
503: PetscScalar *vals;
505: /* Here we are constructing a temporary matrix, so we will explicitly set the type to MPIBDiag */
506: MatCreate(mat->comm,&A);
507: if (!rank) {
508: MatSetSizes(A,M,N,M,N);
509: MatSetType(A,MATMPIBDIAG);
510: MatMPIBDiagSetPreallocation(A,mbd->gnd,mbd->A->bs,mbd->gdiag,PETSC_NULL);
511: } else {
512: MatSetSizes(A,0,0,M,N);
513: MatSetType(A,MATMPIBDIAG);
514: MatMPIBDiagSetPreallocation(A,0,mbd->A->bs,PETSC_NULL,PETSC_NULL);
515: }
516: PetscLogObjectParent(mat,A);
518: /* Copy the matrix ... This isn't the most efficient means,
519: but it's quick for now */
520: row = mbd->rstart;
521: m = mbd->A->m;
522: for (i=0; i<m; i++) {
523: MatGetRow_MPIBDiag(mat,row,&nz,&cols,&vals);
524: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
525: MatRestoreRow_MPIBDiag(mat,row,&nz,&cols,&vals);
526: row++;
527: }
528: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
529: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
530: PetscViewerGetSingleton(viewer,&sviewer);
531: if (!rank) {
532: MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
533: }
534: PetscViewerRestoreSingleton(viewer,&sviewer);
535: PetscViewerFlush(viewer);
536: MatDestroy(A);
537: }
538: return(0);
539: }
543: PetscErrorCode MatView_MPIBDiag(Mat mat,PetscViewer viewer)
544: {
546: PetscTruth iascii,isdraw,isbinary;
549: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
550: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
551: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
552: if (iascii || isdraw) {
553: MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
554: } else if (isbinary) {
555: MatView_MPIBDiag_Binary(mat,viewer);
556: } else {
557: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
558: }
559: return(0);
560: }
564: PetscErrorCode MatSetOption_MPIBDiag(Mat A,MatOption op)
565: {
566: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
569: switch (op) {
570: case MAT_NO_NEW_NONZERO_LOCATIONS:
571: case MAT_YES_NEW_NONZERO_LOCATIONS:
572: case MAT_NEW_NONZERO_LOCATION_ERR:
573: case MAT_NEW_NONZERO_ALLOCATION_ERR:
574: case MAT_NO_NEW_DIAGONALS:
575: case MAT_YES_NEW_DIAGONALS:
576: MatSetOption(mbd->A,op);
577: break;
578: case MAT_ROW_ORIENTED:
579: mbd->roworiented = PETSC_TRUE;
580: MatSetOption(mbd->A,op);
581: break;
582: case MAT_COLUMN_ORIENTED:
583: mbd->roworiented = PETSC_FALSE;
584: MatSetOption(mbd->A,op);
585: break;
586: case MAT_IGNORE_OFF_PROC_ENTRIES:
587: mbd->donotstash = PETSC_TRUE;
588: break;
589: case MAT_ROWS_SORTED:
590: case MAT_ROWS_UNSORTED:
591: case MAT_COLUMNS_SORTED:
592: case MAT_COLUMNS_UNSORTED:
593: PetscLogInfo((A,"MatSetOption_MPIBDiag:Option ignored\n"));
594: break;
595: case MAT_SYMMETRIC:
596: case MAT_STRUCTURALLY_SYMMETRIC:
597: case MAT_NOT_SYMMETRIC:
598: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
599: case MAT_HERMITIAN:
600: case MAT_NOT_HERMITIAN:
601: case MAT_SYMMETRY_ETERNAL:
602: case MAT_NOT_SYMMETRY_ETERNAL:
603: break;
604: default:
605: SETERRQ(PETSC_ERR_SUP,"unknown option");
606: }
607: return(0);
608: }
613: PetscErrorCode MatGetRow_MPIBDiag(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
614: {
615: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
617: PetscInt lrow;
620: if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
621: lrow = row - mat->rstart;
622: MatGetRow_SeqBDiag(mat->A,lrow,nz,idx,v);
623: return(0);
624: }
628: PetscErrorCode MatRestoreRow_MPIBDiag(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
629: {
630: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
632: PetscInt lrow;
635: lrow = row - mat->rstart;
636: MatRestoreRow_SeqBDiag(mat->A,lrow,nz,idx,v);
637: return(0);
638: }
643: PetscErrorCode MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
644: {
645: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
646: Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
647: PetscReal sum = 0.0;
649: PetscInt d,i,nd = a->nd,bs = A->bs,len;
650: PetscScalar *dv;
653: if (type == NORM_FROBENIUS) {
654: for (d=0; d<nd; d++) {
655: dv = a->diagv[d];
656: len = a->bdlen[d]*bs*bs;
657: for (i=0; i<len; i++) {
658: #if defined(PETSC_USE_COMPLEX)
659: sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
660: #else
661: sum += dv[i]*dv[i];
662: #endif
663: }
664: }
665: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
666: *nrm = sqrt(*nrm);
667: PetscLogFlops(2*A->n*A->m);
668: } else if (type == NORM_1) { /* max column norm */
669: PetscReal *tmp,*tmp2;
670: PetscInt j;
671: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
672: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
673: MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
674: *nrm = 0.0;
675: MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
676: for (j=0; j<mbd->A->n; j++) {
677: if (tmp2[j] > *nrm) *nrm = tmp2[j];
678: }
679: PetscFree(tmp);
680: PetscFree(tmp2);
681: } else if (type == NORM_INFINITY) { /* max row norm */
682: PetscReal normtemp;
683: MatNorm(mbd->A,type,&normtemp);
684: MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
685: }
686: return(0);
687: }
691: PetscErrorCode MatPrintHelp_MPIBDiag(Mat A)
692: {
693: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
697: if (!a->rank) {
698: MatPrintHelp_SeqBDiag(a->A);
699: }
700: return(0);
701: }
705: PetscErrorCode MatScale_MPIBDiag(Mat A,PetscScalar alpha)
706: {
708: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
711: MatScale_SeqBDiag(a->A,alpha);
712: return(0);
713: }
717: PetscErrorCode MatSetUpPreallocation_MPIBDiag(Mat A)
718: {
722: MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
723: return(0);
724: }
726: /* -------------------------------------------------------------------*/
728: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
729: MatGetRow_MPIBDiag,
730: MatRestoreRow_MPIBDiag,
731: MatMult_MPIBDiag,
732: /* 4*/ MatMultAdd_MPIBDiag,
733: MatMultTranspose_MPIBDiag,
734: MatMultTransposeAdd_MPIBDiag,
735: 0,
736: 0,
737: 0,
738: /*10*/ 0,
739: 0,
740: 0,
741: 0,
742: 0,
743: /*15*/ MatGetInfo_MPIBDiag,
744: 0,
745: MatGetDiagonal_MPIBDiag,
746: 0,
747: MatNorm_MPIBDiag,
748: /*20*/ MatAssemblyBegin_MPIBDiag,
749: MatAssemblyEnd_MPIBDiag,
750: 0,
751: MatSetOption_MPIBDiag,
752: MatZeroEntries_MPIBDiag,
753: /*25*/ MatZeroRows_MPIBDiag,
754: 0,
755: 0,
756: 0,
757: 0,
758: /*30*/ MatSetUpPreallocation_MPIBDiag,
759: 0,
760: 0,
761: 0,
762: 0,
763: /*35*/ 0,
764: 0,
765: 0,
766: 0,
767: 0,
768: /*40*/ 0,
769: 0,
770: 0,
771: MatGetValues_MPIBDiag,
772: 0,
773: /*45*/ MatPrintHelp_MPIBDiag,
774: MatScale_MPIBDiag,
775: 0,
776: 0,
777: 0,
778: /*50*/ 0,
779: 0,
780: 0,
781: 0,
782: 0,
783: /*55*/ 0,
784: 0,
785: 0,
786: 0,
787: 0,
788: /*60*/ 0,
789: MatDestroy_MPIBDiag,
790: MatView_MPIBDiag,
791: MatGetPetscMaps_Petsc,
792: 0,
793: /*65*/ 0,
794: 0,
795: 0,
796: 0,
797: 0,
798: /*70*/ 0,
799: 0,
800: 0,
801: 0,
802: 0,
803: /*75*/ 0,
804: 0,
805: 0,
806: 0,
807: 0,
808: /*80*/ 0,
809: 0,
810: 0,
811: 0,
812: MatLoad_MPIBDiag,
813: /*85*/ 0,
814: 0,
815: 0,
816: 0,
817: 0,
818: /*90*/ 0,
819: 0,
820: 0,
821: 0,
822: 0,
823: /*95*/ 0,
824: 0,
825: 0,
826: 0};
831: PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
832: {
833: Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
835: PetscInt lrows,lcols,rstart,rend;
836: IS localc,localr;
839: MatGetLocalSize(A,&lrows,&lcols);
840: MatGetOwnershipRange(A,&rstart,&rend);
841: ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
842: ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
843: MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
844: ISDestroy(localr);
845: ISDestroy(localc);
847: *iscopy = PETSC_TRUE;
848: return(0);
849: }
855: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBDiagSetPreallocation_MPIBDiag(Mat B,PetscInt nd,PetscInt bs,PetscInt *diag,PetscScalar **diagv)
856: {
857: Mat_MPIBDiag *b;
859: PetscInt i,k,*ldiag,len,nd2;
860: PetscScalar **ldiagv = 0;
861: PetscTruth flg2;
864: B->preallocated = PETSC_TRUE;
865: if (bs == PETSC_DEFAULT) bs = 1;
866: if (nd == PETSC_DEFAULT) nd = 0;
867: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
868: PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
869: PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
870: if (nd && !diag) {
871: PetscMalloc(nd*sizeof(PetscInt),&diag);
872: nd2 = nd;
873: PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
874: if (nd2 != nd) {
875: SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
876: }
877: } else if (flg2) {
878: SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
879: }
881: if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");
883: PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
884: PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);
886: if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
887: if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
888: if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");
889: B->bs = bs;
891: /* the information in the maps duplicates the information computed below, eventually
892: we should remove the duplicate information that is not contained in the maps */
893: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
894: PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);
897: b = (Mat_MPIBDiag*)B->data;
898: b->gnd = nd;
900: MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);
901: b->rowners[0] = 0;
902: for (i=2; i<=b->size; i++) {
903: b->rowners[i] += b->rowners[i-1];
904: }
905: b->rstart = b->rowners[b->rank];
906: b->rend = b->rowners[b->rank+1];
907: b->brstart = (b->rstart)/bs;
908: b->brend = (b->rend)/bs;
911: /* Determine local diagonals; for now, assume global rows = global cols */
912: /* These are sorted in MatCreateSeqBDiag */
913: PetscMalloc((nd+1)*sizeof(PetscInt),&ldiag);
914: len = B->M/bs + B->N/bs + 1;
915: PetscMalloc(len*sizeof(PetscInt),&b->gdiag);
916: k = 0;
917: PetscLogObjectMemory(B,(nd+1)*sizeof(PetscInt) + (b->size+2)*sizeof(PetscInt) + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
918: if (diagv) {
919: PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
920: }
921: for (i=0; i<nd; i++) {
922: b->gdiag[i] = diag[i];
923: if (diag[i] > 0) { /* lower triangular */
924: if (diag[i] < b->brend) {
925: ldiag[k] = diag[i] - b->brstart;
926: if (diagv) ldiagv[k] = diagv[i];
927: k++;
928: }
929: } else { /* upper triangular */
930: if (B->M/bs - diag[i] > B->N/bs) {
931: if (B->M/bs + diag[i] > b->brstart) {
932: ldiag[k] = diag[i] - b->brstart;
933: if (diagv) ldiagv[k] = diagv[i];
934: k++;
935: }
936: } else {
937: if (B->M/bs > b->brstart) {
938: ldiag[k] = diag[i] - b->brstart;
939: if (diagv) ldiagv[k] = diagv[i];
940: k++;
941: }
942: }
943: }
944: }
946: /* Form local matrix */
947: MatCreate(PETSC_COMM_SELF,&b->A);
948: MatSetSizes(b->A,B->m,B->N,B->m,B->N);
949: MatSetType(b->A,MATSEQBDIAG);
950: MatSeqBDiagSetPreallocation(b->A,k,bs,ldiag,ldiagv);
951: PetscLogObjectParent(B,b->A);
952: PetscFree(ldiag);
953: if (ldiagv) {PetscFree(ldiagv);}
955: return(0);
956: }
959: /*MC
960: MATMPIBDIAG - MATMPIBDIAG = "mpibdiag" - A matrix type to be used for distributed block diagonal matrices.
962: Options Database Keys:
963: . -mat_type mpibdiag - sets the matrix type to "mpibdiag" during a call to MatSetFromOptions()
965: Level: beginner
967: .seealso: MatCreateMPIBDiag
968: M*/
973: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBDiag(Mat B)
974: {
975: Mat_MPIBDiag *b;
979: PetscNew(Mat_MPIBDiag,&b);
980: B->data = (void*)b;
981: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
982: B->factor = 0;
983: B->mapping = 0;
985: B->insertmode = NOT_SET_VALUES;
986: MPI_Comm_rank(B->comm,&b->rank);
987: MPI_Comm_size(B->comm,&b->size);
989: /* build local table of row ownerships */
990: PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rowners);
992: /* build cache for off array entries formed */
993: MatStashCreate_Private(B->comm,1,&B->stash);
994: b->donotstash = PETSC_FALSE;
996: /* stuff used for matrix-vector multiply */
997: b->lvec = 0;
998: b->Mvctx = 0;
1000: /* used for MatSetValues() input */
1001: b->roworiented = PETSC_TRUE;
1003: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1004: "MatGetDiagonalBlock_MPIBDiag",
1005: MatGetDiagonalBlock_MPIBDiag);
1006: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBDiagSetPreallocation_C",
1007: "MatMPIBDiagSetPreallocation_MPIBDiag",
1008: MatMPIBDiagSetPreallocation_MPIBDiag);
1009: return(0);
1010: }
1013: /*MC
1014: MATBDIAG - MATBDIAG = "bdiag" - A matrix type to be used for block diagonal matrices.
1016: This matrix type is identical to MATSEQBDIAG when constructed with a single process communicator,
1017: and MATMPIBDIAG otherwise.
1019: Options Database Keys:
1020: . -mat_type bdiag - sets the matrix type to "bdiag" during a call to MatSetFromOptions()
1022: Level: beginner
1024: .seealso: MatCreateMPIBDiag,MATSEQBDIAG,MATMPIBDIAG
1025: M*/
1030: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BDiag(Mat A)
1031: {
1033: PetscMPIInt size;
1036: PetscObjectChangeTypeName((PetscObject)A,MATBDIAG);
1037: MPI_Comm_size(A->comm,&size);
1038: if (size == 1) {
1039: MatSetType(A,MATSEQBDIAG);
1040: } else {
1041: MatSetType(A,MATMPIBDIAG);
1042: }
1043: return(0);
1044: }
1049: /*@C
1050: MatMPIBDiagSetPreallocation -
1052: Collective on Mat
1054: Input Parameters:
1055: + A - the matrix
1056: . nd - number of block diagonals (global) (optional)
1057: . bs - each element of a diagonal is an bs x bs dense matrix
1058: . diag - optional array of block diagonal numbers (length nd).
1059: For a matrix element A[i,j], where i=row and j=column, the
1060: diagonal number is
1061: $ diag = i/bs - j/bs (integer division)
1062: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1063: needed (expensive).
1064: - diagv - pointer to actual diagonals (in same order as diag array),
1065: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1066: to control memory allocation.
1069: Options Database Keys:
1070: . -mat_block_size <bs> - Sets blocksize
1071: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1073: Notes:
1074: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1075: than it must be used on all processors that share the object for that argument.
1077: The parallel matrix is partitioned across the processors by rows, where
1078: each local rectangular matrix is stored in the uniprocessor block
1079: diagonal format. See the users manual for further details.
1081: The user MUST specify either the local or global numbers of rows
1082: (possibly both).
1084: The case bs=1 (conventional diagonal storage) is implemented as
1085: a special case.
1087: Fortran Notes:
1088: Fortran programmers cannot set diagv; this variable is ignored.
1090: Level: intermediate
1092: .keywords: matrix, block, diagonal, parallel, sparse
1094: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1095: @*/
1096: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBDiagSetPreallocation(Mat B,PetscInt nd,PetscInt bs,const PetscInt diag[],PetscScalar *diagv[])
1097: {
1098: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscScalar*[]);
1101: PetscObjectQueryFunction((PetscObject)B,"MatMPIBDiagSetPreallocation_C",(void (**)(void))&f);
1102: if (f) {
1103: (*f)(B,nd,bs,diag,diagv);
1104: }
1105: return(0);
1106: }
1110: /*@C
1111: MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.
1113: Collective on MPI_Comm
1115: Input Parameters:
1116: + comm - MPI communicator
1117: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1118: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1119: . N - number of columns (local and global)
1120: . nd - number of block diagonals (global) (optional)
1121: . bs - each element of a diagonal is an bs x bs dense matrix
1122: . diag - optional array of block diagonal numbers (length nd).
1123: For a matrix element A[i,j], where i=row and j=column, the
1124: diagonal number is
1125: $ diag = i/bs - j/bs (integer division)
1126: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1127: needed (expensive).
1128: - diagv - pointer to actual diagonals (in same order as diag array),
1129: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1130: to control memory allocation.
1132: Output Parameter:
1133: . A - the matrix
1135: Options Database Keys:
1136: . -mat_block_size <bs> - Sets blocksize
1137: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1139: Notes:
1140: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1141: than it must be used on all processors that share the object for that argument.
1143: The parallel matrix is partitioned across the processors by rows, where
1144: each local rectangular matrix is stored in the uniprocessor block
1145: diagonal format. See the users manual for further details.
1147: The user MUST specify either the local or global numbers of rows
1148: (possibly both).
1150: The case bs=1 (conventional diagonal storage) is implemented as
1151: a special case.
1153: Fortran Notes:
1154: Fortran programmers cannot set diagv; this variable is ignored.
1156: Level: intermediate
1158: .keywords: matrix, block, diagonal, parallel, sparse
1160: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1161: @*/
1162: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBDiag(MPI_Comm comm,PetscInt m,PetscInt M,PetscInt N,PetscInt nd,PetscInt bs,const PetscInt diag[],PetscScalar *diagv[],Mat *A)
1163: {
1165: PetscMPIInt size;
1168: MatCreate(comm,A);
1169: MatSetSizes(*A,m,m,M,N);
1170: MPI_Comm_size(comm,&size);
1171: if (size > 1) {
1172: MatSetType(*A,MATMPIBDIAG);
1173: MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1174: } else {
1175: MatSetType(*A,MATSEQBDIAG);
1176: MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1177: }
1178: return(0);
1179: }
1183: /*@C
1184: MatBDiagGetData - Gets the data for the block diagonal matrix format.
1185: For the parallel case, this returns information for the local submatrix.
1187: Input Parameters:
1188: . mat - the matrix, stored in block diagonal format.
1190: Not Collective
1192: Output Parameters:
1193: + m - number of rows
1194: . n - number of columns
1195: . nd - number of block diagonals
1196: . bs - each element of a diagonal is an bs x bs dense matrix
1197: . bdlen - array of total block lengths of block diagonals
1198: . diag - optional array of block diagonal numbers (length nd).
1199: For a matrix element A[i,j], where i=row and j=column, the
1200: diagonal number is
1201: $ diag = i/bs - j/bs (integer division)
1202: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1203: needed (expensive).
1204: - diagv - pointer to actual diagonals (in same order as diag array),
1206: Level: advanced
1208: Notes:
1209: See the users manual for further details regarding this storage format.
1211: .keywords: matrix, block, diagonal, get, data
1213: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1214: @*/
1215: PetscErrorCode PETSCMAT_DLLEXPORT MatBDiagGetData(Mat mat,PetscInt *nd,PetscInt *bs,PetscInt *diag[],PetscInt *bdlen[],PetscScalar ***diagv)
1216: {
1217: Mat_MPIBDiag *pdmat;
1218: Mat_SeqBDiag *dmat = 0;
1219: PetscTruth isseq,ismpi;
1224: PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1225: PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1226: if (isseq) {
1227: dmat = (Mat_SeqBDiag*)mat->data;
1228: } else if (ismpi) {
1229: pdmat = (Mat_MPIBDiag*)mat->data;
1230: dmat = (Mat_SeqBDiag*)pdmat->A->data;
1231: } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1232: *nd = dmat->nd;
1233: *bs = mat->bs;
1234: *diag = dmat->diag;
1235: *bdlen = dmat->bdlen;
1236: *diagv = dmat->diagv;
1237: return(0);
1238: }
1240: #include petscsys.h
1244: PetscErrorCode MatLoad_MPIBDiag(PetscViewer viewer, MatType type,Mat *newmat)
1245: {
1246: Mat A;
1247: PetscScalar *vals,*svals;
1248: MPI_Comm comm = ((PetscObject)viewer)->comm;
1249: MPI_Status status;
1251: int fd;
1252: PetscMPIInt tag = ((PetscObject)viewer)->tag,rank,size,*sndcounts = 0,*rowners,maxnz,mm;
1253: PetscInt bs,i,nz,j,rstart,rend,*cols;
1254: PetscInt header[4],*rowlengths = 0,M,N,m,Mbs;
1255: PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1256: PetscInt extra_rows;
1259: MPI_Comm_size(comm,&size);
1260: MPI_Comm_rank(comm,&rank);
1261: if (!rank) {
1262: PetscViewerBinaryGetDescriptor(viewer,&fd);
1263: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1264: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1265: if (header[3] < 0) {
1266: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1267: }
1268: }
1269: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1270: M = header[1]; N = header[2];
1272: bs = 1; /* uses a block size of 1 by default; */
1273: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1275: /*
1276: This code adds extra rows to make sure the number of rows is
1277: divisible by the blocksize
1278: */
1279: Mbs = M/bs;
1280: extra_rows = bs - M + bs*(Mbs);
1281: if (extra_rows == bs) extra_rows = 0;
1282: else Mbs++;
1283: if (extra_rows && !rank) {
1284: PetscLogInfo((0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksize\n"));
1285: }
1287: /* determine ownership of all rows */
1288: m = bs*(Mbs/size + ((Mbs % size) > rank));
1289: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1290: mm = (PetscMPIInt)m;
1291: MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1292: rowners[0] = 0;
1293: for (i=2; i<=size; i++) {
1294: rowners[i] += rowners[i-1];
1295: }
1296: rstart = rowners[rank];
1297: rend = rowners[rank+1];
1299: /* distribute row lengths to all processors */
1300: PetscMalloc((rend-rstart)*sizeof(PetscInt),&ourlens);
1301: if (!rank) {
1302: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
1303: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1304: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1305: PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1306: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1307: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1308: PetscFree(sndcounts);
1309: } else {
1310: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1311: }
1313: if (!rank) {
1314: /* calculate the number of nonzeros on each processor */
1315: PetscMalloc(size*sizeof(PetscInt),&procsnz);
1316: PetscMemzero(procsnz,size*sizeof(PetscInt));
1317: for (i=0; i<size; i++) {
1318: for (j=rowners[i]; j<rowners[i+1]; j++) {
1319: procsnz[i] += rowlengths[j];
1320: }
1321: }
1322: PetscFree(rowlengths);
1324: /* determine max buffer needed and allocate it */
1325: maxnz = 0;
1326: for (i=0; i<size; i++) {
1327: maxnz = PetscMax(maxnz,procsnz[i]);
1328: }
1329: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
1331: /* read in my part of the matrix column indices */
1332: nz = procsnz[0];
1333: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1334: if (size == 1) nz -= extra_rows;
1335: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1336: if (size == 1) for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }
1338: /* read in every one elses and ship off */
1339: for (i=1; i<size-1; i++) {
1340: nz = procsnz[i];
1341: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1342: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1343: }
1344: /* read in the stuff for the last proc */
1345: if (size != 1) {
1346: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
1347: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1348: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1349: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
1350: }
1351: PetscFree(cols);
1352: } else {
1353: /* determine buffer space needed for message */
1354: nz = 0;
1355: for (i=0; i<m; i++) {
1356: nz += ourlens[i];
1357: }
1358: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1360: /* receive message of column indices*/
1361: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1362: MPI_Get_count(&status,MPIU_INT,&maxnz);
1363: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1364: }
1366: MatCreate(comm,newmat);
1367: MatSetSizes(*newmat,m,m,M+extra_rows,N+extra_rows);
1368: MatSetType(*newmat,type);
1369: MatMPIBDiagSetPreallocation(*newmat,0,bs,PETSC_NULL,PETSC_NULL);
1370: A = *newmat;
1372: if (!rank) {
1373: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1375: /* read in my part of the matrix numerical values */
1376: nz = procsnz[0];
1377: if (size == 1) nz -= extra_rows;
1378: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1379: if (size == 1) for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }
1381: /* insert into matrix */
1382: jj = rstart;
1383: smycols = mycols;
1384: svals = vals;
1385: for (i=0; i<m; i++) {
1386: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1387: smycols += ourlens[i];
1388: svals += ourlens[i];
1389: jj++;
1390: }
1392: /* read in other processors (except the last one) and ship out */
1393: for (i=1; i<size-1; i++) {
1394: nz = procsnz[i];
1395: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1396: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1397: }
1398: /* the last proc */
1399: if (size != 1){
1400: nz = procsnz[i] - extra_rows;
1401: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1402: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1403: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1404: }
1405: PetscFree(procsnz);
1406: } else {
1407: /* receive numeric values */
1408: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1410: /* receive message of values*/
1411: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1412: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1413: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1415: /* insert into matrix */
1416: jj = rstart;
1417: smycols = mycols;
1418: svals = vals;
1419: for (i=0; i<m; i++) {
1420: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1421: smycols += ourlens[i];
1422: svals += ourlens[i];
1423: jj++;
1424: }
1425: }
1426: PetscFree(ourlens);
1427: PetscFree(vals);
1428: PetscFree(mycols);
1429: PetscFree(rowners);
1431: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1432: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1433: return(0);
1434: }