Actual source code: mpibdiag.c
1: #define PETSCMAT_DLL
3: /*
4: The basic matrix operations for the Block diagonal parallel
5: matrices.
6: */
7: #include src/mat/impls/bdiag/mpi/mpibdiag.h
11: PetscErrorCode MatSetValues_MPIBDiag(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
12: {
13: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
15: PetscInt i,j,row,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
16: PetscTruth roworiented = mbd->roworiented;
19: for (i=0; i<m; i++) {
20: if (idxm[i] < 0) continue;
21: if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
22: if (idxm[i] >= rstart && idxm[i] < rend) {
23: row = idxm[i] - rstart;
24: for (j=0; j<n; j++) {
25: if (idxn[j] < 0) continue;
26: if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
27: if (roworiented) {
28: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
29: } else {
30: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
31: }
32: }
33: } else {
34: if (!mbd->donotstash) {
35: if (roworiented) {
36: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
37: } else {
38: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
39: }
40: }
41: }
42: }
43: return(0);
44: }
48: PetscErrorCode MatGetValues_MPIBDiag(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
49: {
50: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
52: PetscInt i,j,row,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
55: for (i=0; i<m; i++) {
56: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
57: if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
58: if (idxm[i] >= rstart && idxm[i] < rend) {
59: row = idxm[i] - rstart;
60: for (j=0; j<n; j++) {
61: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
62: if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
63: MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
64: }
65: } else {
66: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
67: }
68: }
69: return(0);
70: }
74: PetscErrorCode MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
75: {
76: MPI_Comm comm = mat->comm;
78: PetscInt nstash,reallocs;
79: InsertMode addv;
82: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
83: if (addv == (ADD_VALUES|INSERT_VALUES)) {
84: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
85: }
86: mat->insertmode = addv; /* in case this processor had no cache */
87: MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
88: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
89: PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
90: return(0);
91: }
95: PetscErrorCode MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
96: {
97: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
98: Mat_SeqBDiag *mlocal;
100: PetscMPIInt n;
101: PetscInt i,*row,*col;
102: PetscInt *tmp1,*tmp2,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
103: PetscScalar *val;
104: InsertMode addv = mat->insertmode;
108: while (1) {
109: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
110: if (!flg) break;
111:
112: for (i=0; i<n;) {
113: /* Now identify the consecutive vals belonging to the same row */
114: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
115: if (j < n) ncols = j-i;
116: else ncols = n-i;
117: /* Now assemble all these values with a single function call */
118: MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
119: i = j;
120: }
121: }
122: MatStashScatterEnd_Private(&mat->stash);
124: MatAssemblyBegin(mbd->A,mode);
125: MatAssemblyEnd(mbd->A,mode);
127: /* Fix main diagonal location and determine global diagonals */
128: mlocal = (Mat_SeqBDiag*)mbd->A->data;
129: Mblock = mat->rmap.N/mat->rmap.bs; Nblock = mat->cmap.N/mat->rmap.bs;
130: len = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
131: PetscMalloc(2*len*sizeof(PetscInt),&tmp1);
132: tmp2 = tmp1 + len;
133: PetscMemzero(tmp1,2*len*sizeof(PetscInt));
134: mlocal->mainbd = -1;
135: for (i=0; i<mlocal->nd; i++) {
136: if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
137: tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
138: }
139: MPI_Allreduce(tmp1,tmp2,len,MPIU_INT,MPI_SUM,mat->comm);
140: ict = 0;
141: for (i=0; i<len; i++) {
142: if (tmp2[i]) {
143: mbd->gdiag[ict] = i - Mblock;
144: ict++;
145: }
146: }
147: mbd->gnd = ict;
148: PetscFree(tmp1);
150: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
151: MatSetUpMultiply_MPIBDiag(mat);
152: }
153: return(0);
154: }
158: PetscErrorCode MatZeroEntries_MPIBDiag(Mat A)
159: {
160: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
164: MatZeroEntries(l->A);
165: return(0);
166: }
168: /* again this uses the same basic stratagy as in the assembly and
169: scatter create routines, we should try to do it systematically
170: if we can figure out the proper level of generality. */
172: /* the code does not do the diagonal entries correctly unless the
173: matrix is square and the column and row owerships are identical.
174: This is a BUG. The only way to fix it seems to be to access
175: aij->A and aij->B directly and not through the MatZeroRows()
176: routine.
177: */
181: PetscErrorCode MatZeroRows_MPIBDiag(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
182: {
183: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
185: PetscMPIInt n,imdex,size = l->size,rank = l->rank,tag = A->tag;
186: PetscInt i,*owners = A->rmap.range;
187: PetscInt *nprocs,j,idx,nsends;
188: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
189: PetscInt *rvalues,count,base,slen,*source;
190: PetscInt *lens,*lrows,*values;
191: MPI_Comm comm = A->comm;
192: MPI_Request *send_waits,*recv_waits;
193: MPI_Status recv_status,*send_status;
194: PetscTruth found;
197: /* first count number of contributors to each processor */
198: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
199: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
200: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
201: for (i=0; i<N; i++) {
202: idx = rows[i];
203: found = PETSC_FALSE;
204: for (j=0; j<size; j++) {
205: if (idx >= owners[j] && idx < owners[j+1]) {
206: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
207: }
208: }
209: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
210: }
211: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
213: /* inform other processors of number of messages and max length*/
214: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
216: /* post receives: */
217: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
218: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
219: for (i=0; i<nrecvs; i++) {
220: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
221: }
223: /* do sends:
224: 1) starts[i] gives the starting index in svalues for stuff going to
225: the ith processor
226: */
227: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
228: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
229: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
230: starts[0] = 0;
231: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
232: for (i=0; i<N; i++) {
233: svalues[starts[owner[i]]++] = rows[i];
234: }
236: starts[0] = 0;
237: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
238: count = 0;
239: for (i=0; i<size; i++) {
240: if (nprocs[2*i+1]) {
241: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
242: }
243: }
244: PetscFree(starts);
246: base = owners[rank];
248: /* wait on receives */
249: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
250: source = lens + nrecvs;
251: count = nrecvs;
252: slen = 0;
253: while (count) {
254: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
255: /* unpack receives into our local space */
256: MPI_Get_count(&recv_status,MPIU_INT,&n);
257: source[imdex] = recv_status.MPI_SOURCE;
258: lens[imdex] = n;
259: slen += n;
260: count--;
261: }
262: PetscFree(recv_waits);
263:
264: /* move the data into the send scatter */
265: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
266: count = 0;
267: for (i=0; i<nrecvs; i++) {
268: values = rvalues + i*nmax;
269: for (j=0; j<lens[i]; j++) {
270: lrows[count++] = values[j] - base;
271: }
272: }
273: PetscFree(rvalues);
274: PetscFree(lens);
275: PetscFree(owner);
276: PetscFree(nprocs);
277:
278: /* actually zap the local rows */
279: MatZeroRows(l->A,slen,lrows,diag);
280: PetscFree(lrows);
282: /* wait on sends */
283: if (nsends) {
284: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
285: MPI_Waitall(nsends,send_waits,send_status);
286: PetscFree(send_status);
287: }
288: PetscFree(send_waits);
289: PetscFree(svalues);
291: return(0);
292: }
296: PetscErrorCode MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
297: {
298: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
302: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
303: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
304: (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
305: return(0);
306: }
310: PetscErrorCode MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
311: {
312: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
316: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
317: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
318: (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
319: return(0);
320: }
324: PetscErrorCode MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
325: {
326: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
328: PetscScalar zero = 0.0;
331: VecSet(yy,zero);
332: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
333: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
334: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
335: return(0);
336: }
340: PetscErrorCode MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
341: {
342: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
346: VecCopy(yy,zz);
347: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
348: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
349: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
350: return(0);
351: }
355: PetscErrorCode MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
356: {
357: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
359: PetscReal isend[5],irecv[5];
362: info->block_size = (PetscReal)mat->A->rmap.bs;
363: MatGetInfo(mat->A,MAT_LOCAL,info);
364: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
365: isend[3] = info->memory; isend[4] = info->mallocs;
366: if (flag == MAT_LOCAL) {
367: info->nz_used = isend[0];
368: info->nz_allocated = isend[1];
369: info->nz_unneeded = isend[2];
370: info->memory = isend[3];
371: info->mallocs = isend[4];
372: } else if (flag == MAT_GLOBAL_MAX) {
373: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
374: info->nz_used = irecv[0];
375: info->nz_allocated = irecv[1];
376: info->nz_unneeded = irecv[2];
377: info->memory = irecv[3];
378: info->mallocs = irecv[4];
379: } else if (flag == MAT_GLOBAL_SUM) {
380: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
381: info->nz_used = irecv[0];
382: info->nz_allocated = irecv[1];
383: info->nz_unneeded = irecv[2];
384: info->memory = irecv[3];
385: info->mallocs = irecv[4];
386: }
387: info->rows_global = (double)matin->rmap.N;
388: info->columns_global = (double)matin->cmap.N;
389: info->rows_local = (double)matin->rmap.n;
390: info->columns_local = (double)matin->cmap.N;
391: return(0);
392: }
396: PetscErrorCode MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
397: {
399: Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;
402: MatGetDiagonal(A->A,v);
403: return(0);
404: }
408: PetscErrorCode MatDestroy_MPIBDiag(Mat mat)
409: {
410: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
412: #if defined(PETSC_USE_LOG)
413: Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;
416: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D, BSize=%D, NDiag=%D",mat->rmap.N,mat->cmap.N,mat->rmap.bs,ms->nd);
417: #else
419: #endif
420: MatStashDestroy_Private(&mat->stash);
421: PetscFree(mbd->gdiag);
422: MatDestroy(mbd->A);
423: if (mbd->lvec) {VecDestroy(mbd->lvec);}
424: if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
425: PetscFree(mbd);
426: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
427: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBDiagSetPreallocation_C","",PETSC_NULL);
428: return(0);
429: }
434: static PetscErrorCode MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
435: {
436: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
440: if (mbd->size == 1) {
441: MatView(mbd->A,viewer);
442: } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
443: return(0);
444: }
448: static PetscErrorCode MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
449: {
450: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
451: PetscErrorCode ierr;
452: PetscMPIInt size = mbd->size,rank = mbd->rank;
453: PetscInt i;
454: PetscTruth iascii,isdraw;
455: PetscViewer sviewer;
456: PetscViewerFormat format;
459: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
460: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
461: if (iascii) {
462: PetscViewerGetFormat(viewer,&format);
463: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
464: PetscInt nline = PetscMin(10,mbd->gnd),k,nk,np;
465: PetscViewerASCIIPrintf(viewer," block size=%D, total number of diagonals=%D\n",mat->rmap.bs,mbd->gnd);
466: nk = (mbd->gnd-1)/nline + 1;
467: for (k=0; k<nk; k++) {
468: PetscViewerASCIIPrintf(viewer," global diag numbers:");
469: np = PetscMin(nline,mbd->gnd - nline*k);
470: for (i=0; i<np; i++) {
471: PetscViewerASCIIPrintf(viewer," %D",mbd->gdiag[i+nline*k]);
472: }
473: PetscViewerASCIIPrintf(viewer,"\n");
474: }
475: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
476: MatInfo info;
477: MPI_Comm_rank(mat->comm,&rank);
478: MatGetInfo(mat,MAT_LOCAL,&info);
479: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.N,
480: (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
481: PetscViewerFlush(viewer);
482: VecScatterView(mbd->Mvctx,viewer);
483: }
484: return(0);
485: }
486: }
488: if (isdraw) {
489: PetscDraw draw;
490: PetscTruth isnull;
491: PetscViewerDrawGetDraw(viewer,0,&draw);
492: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
493: }
495: if (size == 1) {
496: MatView(mbd->A,viewer);
497: } else {
498: /* assemble the entire matrix onto first processor. */
499: Mat A;
500: PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,nz,*cols;
501: PetscScalar *vals;
503: /* Here we are constructing a temporary matrix, so we will explicitly set the type to MPIBDiag */
504: MatCreate(mat->comm,&A);
505: if (!rank) {
506: MatSetSizes(A,M,N,M,N);
507: MatSetType(A,MATMPIBDIAG);
508: MatMPIBDiagSetPreallocation(A,mbd->gnd,mbd->A->rmap.bs,mbd->gdiag,PETSC_NULL);
509: } else {
510: MatSetSizes(A,0,0,M,N);
511: MatSetType(A,MATMPIBDIAG);
512: MatMPIBDiagSetPreallocation(A,0,mbd->A->rmap.bs,PETSC_NULL,PETSC_NULL);
513: }
514: PetscLogObjectParent(mat,A);
516: /* Copy the matrix ... This isn't the most efficient means,
517: but it's quick for now */
518: row = mat->rmap.rstart;
519: m = mbd->A->rmap.N;
520: for (i=0; i<m; i++) {
521: MatGetRow_MPIBDiag(mat,row,&nz,&cols,&vals);
522: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
523: MatRestoreRow_MPIBDiag(mat,row,&nz,&cols,&vals);
524: row++;
525: }
526: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
527: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
528: PetscViewerGetSingleton(viewer,&sviewer);
529: if (!rank) {
530: MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
531: }
532: PetscViewerRestoreSingleton(viewer,&sviewer);
533: PetscViewerFlush(viewer);
534: MatDestroy(A);
535: }
536: return(0);
537: }
541: PetscErrorCode MatView_MPIBDiag(Mat mat,PetscViewer viewer)
542: {
544: PetscTruth iascii,isdraw,isbinary;
547: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
548: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
549: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
550: if (iascii || isdraw) {
551: MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
552: } else if (isbinary) {
553: MatView_MPIBDiag_Binary(mat,viewer);
554: } else {
555: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
556: }
557: return(0);
558: }
562: PetscErrorCode MatSetOption_MPIBDiag(Mat A,MatOption op)
563: {
564: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
567: switch (op) {
568: case MAT_NO_NEW_NONZERO_LOCATIONS:
569: case MAT_YES_NEW_NONZERO_LOCATIONS:
570: case MAT_NEW_NONZERO_LOCATION_ERR:
571: case MAT_NEW_NONZERO_ALLOCATION_ERR:
572: case MAT_NO_NEW_DIAGONALS:
573: case MAT_YES_NEW_DIAGONALS:
574: MatSetOption(mbd->A,op);
575: break;
576: case MAT_ROW_ORIENTED:
577: mbd->roworiented = PETSC_TRUE;
578: MatSetOption(mbd->A,op);
579: break;
580: case MAT_COLUMN_ORIENTED:
581: mbd->roworiented = PETSC_FALSE;
582: MatSetOption(mbd->A,op);
583: break;
584: case MAT_IGNORE_OFF_PROC_ENTRIES:
585: mbd->donotstash = PETSC_TRUE;
586: break;
587: case MAT_ROWS_SORTED:
588: case MAT_ROWS_UNSORTED:
589: case MAT_COLUMNS_SORTED:
590: case MAT_COLUMNS_UNSORTED:
591: PetscInfo(A,"Option ignored\n");
592: break;
593: case MAT_SYMMETRIC:
594: case MAT_STRUCTURALLY_SYMMETRIC:
595: case MAT_NOT_SYMMETRIC:
596: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
597: case MAT_HERMITIAN:
598: case MAT_NOT_HERMITIAN:
599: case MAT_SYMMETRY_ETERNAL:
600: case MAT_NOT_SYMMETRY_ETERNAL:
601: break;
602: default:
603: SETERRQ(PETSC_ERR_SUP,"unknown option");
604: }
605: return(0);
606: }
611: PetscErrorCode MatGetRow_MPIBDiag(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
612: {
613: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
615: PetscInt lrow;
618: if (row < matin->rmap.rstart || row >= matin->rmap.rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
619: lrow = row - matin->rmap.rstart;
620: MatGetRow_SeqBDiag(mat->A,lrow,nz,idx,v);
621: return(0);
622: }
626: PetscErrorCode MatRestoreRow_MPIBDiag(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
627: {
628: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
630: PetscInt lrow;
633: lrow = row - matin->rmap.rstart;
634: MatRestoreRow_SeqBDiag(mat->A,lrow,nz,idx,v);
635: return(0);
636: }
641: PetscErrorCode MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
642: {
643: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
644: Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
645: PetscReal sum = 0.0;
647: PetscInt d,i,nd = a->nd,bs = A->rmap.bs,len;
648: PetscScalar *dv;
651: if (type == NORM_FROBENIUS) {
652: for (d=0; d<nd; d++) {
653: dv = a->diagv[d];
654: len = a->bdlen[d]*bs*bs;
655: for (i=0; i<len; i++) {
656: #if defined(PETSC_USE_COMPLEX)
657: sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
658: #else
659: sum += dv[i]*dv[i];
660: #endif
661: }
662: }
663: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
664: *nrm = sqrt(*nrm);
665: PetscLogFlops(2*A->rmap.n*A->rmap.N);
666: } else if (type == NORM_1) { /* max column norm */
667: PetscReal *tmp,*tmp2;
668: PetscInt j;
669: PetscMalloc((mbd->A->cmap.n+1)*sizeof(PetscReal),&tmp);
670: PetscMalloc((mbd->A->cmap.n+1)*sizeof(PetscReal),&tmp2);
671: MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->cmap.n);
672: *nrm = 0.0;
673: MPI_Allreduce(tmp,tmp2,mbd->A->cmap.n,MPIU_REAL,MPI_SUM,A->comm);
674: for (j=0; j<mbd->A->cmap.n; j++) {
675: if (tmp2[j] > *nrm) *nrm = tmp2[j];
676: }
677: PetscFree(tmp);
678: PetscFree(tmp2);
679: } else if (type == NORM_INFINITY) { /* max row norm */
680: PetscReal normtemp;
681: MatNorm(mbd->A,type,&normtemp);
682: MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
683: }
684: return(0);
685: }
689: PetscErrorCode MatPrintHelp_MPIBDiag(Mat A)
690: {
691: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
695: if (!a->rank) {
696: MatPrintHelp_SeqBDiag(a->A);
697: }
698: return(0);
699: }
703: PetscErrorCode MatScale_MPIBDiag(Mat A,PetscScalar alpha)
704: {
706: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
709: MatScale_SeqBDiag(a->A,alpha);
710: return(0);
711: }
715: PetscErrorCode MatSetUpPreallocation_MPIBDiag(Mat A)
716: {
720: MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
721: return(0);
722: }
724: /* -------------------------------------------------------------------*/
726: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
727: MatGetRow_MPIBDiag,
728: MatRestoreRow_MPIBDiag,
729: MatMult_MPIBDiag,
730: /* 4*/ MatMultAdd_MPIBDiag,
731: MatMultTranspose_MPIBDiag,
732: MatMultTransposeAdd_MPIBDiag,
733: 0,
734: 0,
735: 0,
736: /*10*/ 0,
737: 0,
738: 0,
739: 0,
740: 0,
741: /*15*/ MatGetInfo_MPIBDiag,
742: 0,
743: MatGetDiagonal_MPIBDiag,
744: 0,
745: MatNorm_MPIBDiag,
746: /*20*/ MatAssemblyBegin_MPIBDiag,
747: MatAssemblyEnd_MPIBDiag,
748: 0,
749: MatSetOption_MPIBDiag,
750: MatZeroEntries_MPIBDiag,
751: /*25*/ MatZeroRows_MPIBDiag,
752: 0,
753: 0,
754: 0,
755: 0,
756: /*30*/ MatSetUpPreallocation_MPIBDiag,
757: 0,
758: 0,
759: 0,
760: 0,
761: /*35*/ 0,
762: 0,
763: 0,
764: 0,
765: 0,
766: /*40*/ 0,
767: 0,
768: 0,
769: MatGetValues_MPIBDiag,
770: 0,
771: /*45*/ MatPrintHelp_MPIBDiag,
772: MatScale_MPIBDiag,
773: 0,
774: 0,
775: 0,
776: /*50*/ 0,
777: 0,
778: 0,
779: 0,
780: 0,
781: /*55*/ 0,
782: 0,
783: 0,
784: 0,
785: 0,
786: /*60*/ 0,
787: MatDestroy_MPIBDiag,
788: MatView_MPIBDiag,
789: 0,
790: 0,
791: /*65*/ 0,
792: 0,
793: 0,
794: 0,
795: 0,
796: /*70*/ 0,
797: 0,
798: 0,
799: 0,
800: 0,
801: /*75*/ 0,
802: 0,
803: 0,
804: 0,
805: 0,
806: /*80*/ 0,
807: 0,
808: 0,
809: 0,
810: MatLoad_MPIBDiag,
811: /*85*/ 0,
812: 0,
813: 0,
814: 0,
815: 0,
816: /*90*/ 0,
817: 0,
818: 0,
819: 0,
820: 0,
821: /*95*/ 0,
822: 0,
823: 0,
824: 0};
829: PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
830: {
831: Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
833: PetscInt lrows,lcols,rstart,rend;
834: IS localc,localr;
837: MatGetLocalSize(A,&lrows,&lcols);
838: MatGetOwnershipRange(A,&rstart,&rend);
839: ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
840: ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
841: MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
842: ISDestroy(localr);
843: ISDestroy(localc);
845: *iscopy = PETSC_TRUE;
846: return(0);
847: }
853: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBDiagSetPreallocation_MPIBDiag(Mat B,PetscInt nd,PetscInt bs,PetscInt *diag,PetscScalar **diagv)
854: {
855: Mat_MPIBDiag *b;
857: PetscInt i,k,*ldiag,len,nd2;
858: PetscScalar **ldiagv = 0;
859: PetscTruth flg2;
862: B->preallocated = PETSC_TRUE;
863: if (bs == PETSC_DEFAULT) bs = 1;
864: if (nd == PETSC_DEFAULT) nd = 0;
865: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
866: PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
867: PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
868: if (nd && !diag) {
869: PetscMalloc(nd*sizeof(PetscInt),&diag);
870: nd2 = nd;
871: PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
872: if (nd2 != nd) {
873: SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
874: }
875: } else if (flg2) {
876: SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
877: }
879: if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");
881: B->rmap.bs = B->cmap.bs = bs;
883: PetscMapInitialize(B->comm,&B->rmap);
884: PetscMapInitialize(B->comm,&B->cmap);
886: if ((B->cmap.N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
887: if ((B->rmap.N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
888: if ((B->rmap.N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");
891: b = (Mat_MPIBDiag*)B->data;
892: b->gnd = nd;
893: b->brstart = (B->rmap.rstart)/bs;
894: b->brend = (B->rmap.rend)/bs;
897: /* Determine local diagonals; for now, assume global rows = global cols */
898: /* These are sorted in MatCreateSeqBDiag */
899: PetscMalloc((nd+1)*sizeof(PetscInt),&ldiag);
900: len = B->rmap.N/bs + B->cmap.N/bs + 1;
901: PetscMalloc(len*sizeof(PetscInt),&b->gdiag);
902: k = 0;
903: if (diagv) {
904: PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
905: }
906: for (i=0; i<nd; i++) {
907: b->gdiag[i] = diag[i];
908: if (diag[i] > 0) { /* lower triangular */
909: if (diag[i] < b->brend) {
910: ldiag[k] = diag[i] - b->brstart;
911: if (diagv) ldiagv[k] = diagv[i];
912: k++;
913: }
914: } else { /* upper triangular */
915: if (B->rmap.N/bs - diag[i] > B->cmap.N/bs) {
916: if (B->rmap.N/bs + diag[i] > b->brstart) {
917: ldiag[k] = diag[i] - b->brstart;
918: if (diagv) ldiagv[k] = diagv[i];
919: k++;
920: }
921: } else {
922: if (B->rmap.N/bs > b->brstart) {
923: ldiag[k] = diag[i] - b->brstart;
924: if (diagv) ldiagv[k] = diagv[i];
925: k++;
926: }
927: }
928: }
929: }
931: /* Form local matrix */
932: MatCreate(PETSC_COMM_SELF,&b->A);
933: MatSetSizes(b->A,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);
934: MatSetType(b->A,MATSEQBDIAG);
935: MatSeqBDiagSetPreallocation(b->A,k,bs,ldiag,ldiagv);
936: PetscLogObjectParent(B,b->A);
937: PetscFree(ldiag);
938: PetscFree(ldiagv);
940: return(0);
941: }
944: /*MC
945: MATMPIBDIAG - MATMPIBDIAG = "mpibdiag" - A matrix type to be used for distributed block diagonal matrices.
947: Options Database Keys:
948: . -mat_type mpibdiag - sets the matrix type to "mpibdiag" during a call to MatSetFromOptions()
950: Level: beginner
952: .seealso: MatCreateMPIBDiag
953: M*/
958: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBDiag(Mat B)
959: {
960: Mat_MPIBDiag *b;
964: PetscNew(Mat_MPIBDiag,&b);
965: B->data = (void*)b;
966: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
967: B->factor = 0;
968: B->mapping = 0;
970: B->insertmode = NOT_SET_VALUES;
971: MPI_Comm_rank(B->comm,&b->rank);
972: MPI_Comm_size(B->comm,&b->size);
974: /* build cache for off array entries formed */
975: MatStashCreate_Private(B->comm,1,&B->stash);
976: b->donotstash = PETSC_FALSE;
978: /* stuff used for matrix-vector multiply */
979: b->lvec = 0;
980: b->Mvctx = 0;
982: /* used for MatSetValues() input */
983: b->roworiented = PETSC_TRUE;
985: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
986: "MatGetDiagonalBlock_MPIBDiag",
987: MatGetDiagonalBlock_MPIBDiag);
988: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBDiagSetPreallocation_C",
989: "MatMPIBDiagSetPreallocation_MPIBDiag",
990: MatMPIBDiagSetPreallocation_MPIBDiag);
991: return(0);
992: }
995: /*MC
996: MATBDIAG - MATBDIAG = "bdiag" - A matrix type to be used for block diagonal matrices.
998: This matrix type is identical to MATSEQBDIAG when constructed with a single process communicator,
999: and MATMPIBDIAG otherwise.
1001: Options Database Keys:
1002: . -mat_type bdiag - sets the matrix type to "bdiag" during a call to MatSetFromOptions()
1004: Level: beginner
1006: .seealso: MatCreateMPIBDiag,MATSEQBDIAG,MATMPIBDIAG
1007: M*/
1012: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BDiag(Mat A)
1013: {
1015: PetscMPIInt size;
1018: PetscObjectChangeTypeName((PetscObject)A,MATBDIAG);
1019: MPI_Comm_size(A->comm,&size);
1020: if (size == 1) {
1021: MatSetType(A,MATSEQBDIAG);
1022: } else {
1023: MatSetType(A,MATMPIBDIAG);
1024: }
1025: return(0);
1026: }
1031: /*@C
1032: MatMPIBDiagSetPreallocation -
1034: Collective on Mat
1036: Input Parameters:
1037: + A - the matrix
1038: . nd - number of block diagonals (global) (optional)
1039: . bs - each element of a diagonal is an bs x bs dense matrix
1040: . diag - optional array of block diagonal numbers (length nd).
1041: For a matrix element A[i,j], where i=row and j=column, the
1042: diagonal number is
1043: $ diag = i/bs - j/bs (integer division)
1044: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1045: needed (expensive).
1046: - diagv - pointer to actual diagonals (in same order as diag array),
1047: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1048: to control memory allocation.
1051: Options Database Keys:
1052: . -mat_block_size <bs> - Sets blocksize
1053: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1055: Notes:
1056: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1057: than it must be used on all processors that share the object for that argument.
1059: The parallel matrix is partitioned across the processors by rows, where
1060: each local rectangular matrix is stored in the uniprocessor block
1061: diagonal format. See the users manual for further details.
1063: The user MUST specify either the local or global numbers of rows
1064: (possibly both).
1066: The case bs=1 (conventional diagonal storage) is implemented as
1067: a special case.
1069: Fortran Notes:
1070: Fortran programmers cannot set diagv; this variable is ignored.
1072: Level: intermediate
1074: .keywords: matrix, block, diagonal, parallel, sparse
1076: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1077: @*/
1078: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBDiagSetPreallocation(Mat B,PetscInt nd,PetscInt bs,const PetscInt diag[],PetscScalar *diagv[])
1079: {
1080: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscScalar*[]);
1083: PetscObjectQueryFunction((PetscObject)B,"MatMPIBDiagSetPreallocation_C",(void (**)(void))&f);
1084: if (f) {
1085: (*f)(B,nd,bs,diag,diagv);
1086: }
1087: return(0);
1088: }
1092: /*@C
1093: MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.
1095: Collective on MPI_Comm
1097: Input Parameters:
1098: + comm - MPI communicator
1099: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1100: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1101: . N - number of columns (local and global)
1102: . nd - number of block diagonals (global) (optional)
1103: . bs - each element of a diagonal is an bs x bs dense matrix
1104: . diag - optional array of block diagonal numbers (length nd).
1105: For a matrix element A[i,j], where i=row and j=column, the
1106: diagonal number is
1107: $ diag = i/bs - j/bs (integer division)
1108: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1109: needed (expensive).
1110: - diagv - pointer to actual diagonals (in same order as diag array),
1111: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1112: to control memory allocation.
1114: Output Parameter:
1115: . A - the matrix
1117: Options Database Keys:
1118: . -mat_block_size <bs> - Sets blocksize
1119: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1121: Notes:
1122: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1123: than it must be used on all processors that share the object for that argument.
1125: The parallel matrix is partitioned across the processors by rows, where
1126: each local rectangular matrix is stored in the uniprocessor block
1127: diagonal format. See the users manual for further details.
1129: The user MUST specify either the local or global numbers of rows
1130: (possibly both).
1132: The case bs=1 (conventional diagonal storage) is implemented as
1133: a special case.
1135: Fortran Notes:
1136: Fortran programmers cannot set diagv; this variable is ignored.
1138: Level: intermediate
1140: .keywords: matrix, block, diagonal, parallel, sparse
1142: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1143: @*/
1144: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBDiag(MPI_Comm comm,PetscInt m,PetscInt M,PetscInt N,PetscInt nd,PetscInt bs,const PetscInt diag[],PetscScalar *diagv[],Mat *A)
1145: {
1147: PetscMPIInt size;
1150: MatCreate(comm,A);
1151: MatSetSizes(*A,m,m,M,N);
1152: MPI_Comm_size(comm,&size);
1153: if (size > 1) {
1154: MatSetType(*A,MATMPIBDIAG);
1155: MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1156: } else {
1157: MatSetType(*A,MATSEQBDIAG);
1158: MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1159: }
1160: return(0);
1161: }
1165: /*@C
1166: MatBDiagGetData - Gets the data for the block diagonal matrix format.
1167: For the parallel case, this returns information for the local submatrix.
1169: Input Parameters:
1170: . mat - the matrix, stored in block diagonal format.
1172: Not Collective
1174: Output Parameters:
1175: + m - number of rows
1176: . n - number of columns
1177: . nd - number of block diagonals
1178: . bs - each element of a diagonal is an bs x bs dense matrix
1179: . bdlen - array of total block lengths of block diagonals
1180: . diag - optional array of block diagonal numbers (length nd).
1181: For a matrix element A[i,j], where i=row and j=column, the
1182: diagonal number is
1183: $ diag = i/bs - j/bs (integer division)
1184: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1185: needed (expensive).
1186: - diagv - pointer to actual diagonals (in same order as diag array),
1188: Level: advanced
1190: Notes:
1191: See the users manual for further details regarding this storage format.
1193: .keywords: matrix, block, diagonal, get, data
1195: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1196: @*/
1197: PetscErrorCode PETSCMAT_DLLEXPORT MatBDiagGetData(Mat mat,PetscInt *nd,PetscInt *bs,PetscInt *diag[],PetscInt *bdlen[],PetscScalar ***diagv)
1198: {
1199: Mat_MPIBDiag *pdmat;
1200: Mat_SeqBDiag *dmat = 0;
1201: PetscTruth isseq,ismpi;
1206: PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1207: PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1208: if (isseq) {
1209: dmat = (Mat_SeqBDiag*)mat->data;
1210: } else if (ismpi) {
1211: pdmat = (Mat_MPIBDiag*)mat->data;
1212: dmat = (Mat_SeqBDiag*)pdmat->A->data;
1213: } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1214: *nd = dmat->nd;
1215: *bs = mat->rmap.bs;
1216: *diag = dmat->diag;
1217: *bdlen = dmat->bdlen;
1218: *diagv = dmat->diagv;
1219: return(0);
1220: }
1222: #include petscsys.h
1226: PetscErrorCode MatLoad_MPIBDiag(PetscViewer viewer, MatType type,Mat *newmat)
1227: {
1228: Mat A;
1229: PetscScalar *vals,*svals;
1230: MPI_Comm comm = ((PetscObject)viewer)->comm;
1231: MPI_Status status;
1233: int fd;
1234: PetscMPIInt tag = ((PetscObject)viewer)->tag,rank,size,*sndcounts = 0,*rowners,maxnz,mm;
1235: PetscInt bs,i,nz,j,rstart,rend,*cols;
1236: PetscInt header[4],*rowlengths = 0,M,N,m,Mbs;
1237: PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1238: PetscInt extra_rows;
1241: MPI_Comm_size(comm,&size);
1242: MPI_Comm_rank(comm,&rank);
1243: if (!rank) {
1244: PetscViewerBinaryGetDescriptor(viewer,&fd);
1245: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1246: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1247: if (header[3] < 0) {
1248: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1249: }
1250: }
1251: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1252: M = header[1]; N = header[2];
1254: bs = 1; /* uses a block size of 1 by default; */
1255: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1257: /*
1258: This code adds extra rows to make sure the number of rows is
1259: divisible by the blocksize
1260: */
1261: Mbs = M/bs;
1262: extra_rows = bs - M + bs*(Mbs);
1263: if (extra_rows == bs) extra_rows = 0;
1264: else Mbs++;
1265: if (extra_rows && !rank) {
1266: PetscInfo(0,"Padding loaded matrix to match blocksize\n");
1267: }
1269: /* determine ownership of all rows */
1270: m = bs*(Mbs/size + ((Mbs % size) > rank));
1271: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1272: mm = (PetscMPIInt)m;
1273: MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1274: rowners[0] = 0;
1275: for (i=2; i<=size; i++) {
1276: rowners[i] += rowners[i-1];
1277: }
1278: rstart = rowners[rank];
1279: rend = rowners[rank+1];
1281: /* distribute row lengths to all processors */
1282: PetscMalloc((rend-rstart)*sizeof(PetscInt),&ourlens);
1283: if (!rank) {
1284: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
1285: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1286: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1287: PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1288: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1289: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1290: PetscFree(sndcounts);
1291: } else {
1292: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1293: }
1295: if (!rank) {
1296: /* calculate the number of nonzeros on each processor */
1297: PetscMalloc(size*sizeof(PetscInt),&procsnz);
1298: PetscMemzero(procsnz,size*sizeof(PetscInt));
1299: for (i=0; i<size; i++) {
1300: for (j=rowners[i]; j<rowners[i+1]; j++) {
1301: procsnz[i] += rowlengths[j];
1302: }
1303: }
1304: PetscFree(rowlengths);
1306: /* determine max buffer needed and allocate it */
1307: maxnz = 0;
1308: for (i=0; i<size; i++) {
1309: maxnz = PetscMax(maxnz,procsnz[i]);
1310: }
1311: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
1313: /* read in my part of the matrix column indices */
1314: nz = procsnz[0];
1315: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1316: if (size == 1) nz -= extra_rows;
1317: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1318: if (size == 1) for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }
1320: /* read in every one elses and ship off */
1321: for (i=1; i<size-1; i++) {
1322: nz = procsnz[i];
1323: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1324: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1325: }
1326: /* read in the stuff for the last proc */
1327: if (size != 1) {
1328: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
1329: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1330: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1331: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
1332: }
1333: PetscFree(cols);
1334: } else {
1335: /* determine buffer space needed for message */
1336: nz = 0;
1337: for (i=0; i<m; i++) {
1338: nz += ourlens[i];
1339: }
1340: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1342: /* receive message of column indices*/
1343: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1344: MPI_Get_count(&status,MPIU_INT,&maxnz);
1345: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1346: }
1348: MatCreate(comm,newmat);
1349: MatSetSizes(*newmat,m,m,M+extra_rows,N+extra_rows);
1350: MatSetType(*newmat,type);
1351: MatMPIBDiagSetPreallocation(*newmat,0,bs,PETSC_NULL,PETSC_NULL);
1352: A = *newmat;
1354: if (!rank) {
1355: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1357: /* read in my part of the matrix numerical values */
1358: nz = procsnz[0];
1359: if (size == 1) nz -= extra_rows;
1360: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1361: if (size == 1) for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }
1363: /* insert into matrix */
1364: jj = rstart;
1365: smycols = mycols;
1366: svals = vals;
1367: for (i=0; i<m; i++) {
1368: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1369: smycols += ourlens[i];
1370: svals += ourlens[i];
1371: jj++;
1372: }
1374: /* read in other processors (except the last one) and ship out */
1375: for (i=1; i<size-1; i++) {
1376: nz = procsnz[i];
1377: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1378: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1379: }
1380: /* the last proc */
1381: if (size != 1){
1382: nz = procsnz[i] - extra_rows;
1383: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1384: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1385: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1386: }
1387: PetscFree(procsnz);
1388: } else {
1389: /* receive numeric values */
1390: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1392: /* receive message of values*/
1393: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1394: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1395: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1397: /* insert into matrix */
1398: jj = rstart;
1399: smycols = mycols;
1400: svals = vals;
1401: for (i=0; i<m; i++) {
1402: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1403: smycols += ourlens[i];
1404: svals += ourlens[i];
1405: jj++;
1406: }
1407: }
1408: PetscFree(ourlens);
1409: PetscFree(vals);
1410: PetscFree(mycols);
1411: PetscFree(rowners);
1413: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1414: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1415: return(0);
1416: }