#define PETSCMAT_DLL /* Routines to compute overlapping regions of a parallel MPI matrix. Used for finding submatrices that were shared across processors. */ #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" #include "petscbt.h" static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat,PetscInt,IS*); static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat,PetscInt*,PetscInt,PetscInt*,PetscBT*); #undef __FUNCT__ #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ" PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C,PetscInt is_max,IS is[],PetscInt ov) { PetscErrorCode ierr; PetscInt i,N=C->cmap->N, bs=C->rmap->bs; IS *is_new; PetscFunctionBegin; ierr = PetscMalloc(is_max*sizeof(IS),&is_new);CHKERRQ(ierr); /* Convert the indices into block format */ ierr = ISCompressIndicesGeneral(N,bs,is_max,is,is_new);CHKERRQ(ierr); if (ov < 0){ SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");} for (i=0; idata; PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len; const PetscInt *idx_i; PetscInt idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i, Mbs,i,j,k,*odata1,*odata2, proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est; PetscInt proc_end=0,*iwork,len_unused,nodata2; PetscInt ois_max; /* max no of is[] in each of processor */ char *t_p; MPI_Comm comm; MPI_Request *s_waits1,*s_waits2,r_req; MPI_Status *s_status,r_status; PetscBT *table; /* mark indices of this processor's is[] */ PetscBT table_i; PetscBT otable; /* mark indices of other processors' is[] */ PetscInt bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners; IS garray_local,garray_gl; PetscFunctionBegin; comm = ((PetscObject)C)->comm; size = c->size; rank = c->rank; Mbs = c->Mbs; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); /* create tables used in step 1: table[i] - mark c->garray of proc [i] step 3: table[i] - mark indices of is[i] when whose=MINE table[0] - mark incideces of is[] when whose=OTHER */ len = PetscMax(is_max, size);CHKERRQ(ierr); ierr = PetscMalloc(len*sizeof(PetscBT) + (Mbs/PETSC_BITS_PER_BYTE+1)*len*sizeof(char),&table);CHKERRQ(ierr); t_p = (char *)(table + len); for (i=0; igarray from all processors */ ierr = ISCreateGeneral(comm,Bnbs,c->garray,&garray_local);CHKERRQ(ierr); ierr = ISAllGather(garray_local, &garray_gl);CHKERRQ(ierr); ierr = ISDestroy(garray_local);CHKERRQ(ierr); ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); Bowners[0] = 0; for (i=0; irow to proc_id) */ ierr = PetscMalloc(Mbs*sizeof(PetscInt),&ctable);CHKERRQ(ierr); for (proc_id=0,j=0; proc_idrmap->range[proc_id+1]; j++) { ctable[j] = proc_id; } } /* hash tables marking c->garray */ ierr = ISGetIndices(garray_gl,&idx_i); for (i=0; i len)len = len_r1[i]; } ierr = PetscFree(len_r1);CHKERRQ(ierr); ierr = PetscFree(id_r1);CHKERRQ(ierr); for (proc_id=0; proc_id= len_max */ k = 0; while (k < nrqr){ /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status);CHKERRQ(ierr); if (flag){ ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr); /* Process messages */ /* make sure there is enough unused space in odata2 array */ if (len_unused < len_max){ /* allocate more space for odata2 */ ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable);CHKERRQ(ierr); len = 1 + odata2[0]; for (i=0; i 1+is_max){ /* Add data2 into data */ data2_i = data2 + 1 + is_max; for (i=0; idata; Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(c->A)->data; Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(c->B)->data; PetscErrorCode ierr; PetscInt row,mbs,Mbs,*nidx_i,col,col_max,isz,isz0,*ai,*aj,*bi,*bj,*garray,rstart,l; PetscInt a_start,a_end,b_start,b_end,i,j,k,is_max,*idx_i,n; PetscBT table0; /* mark the indices of input is[] for look up */ PetscBT table_i; /* poits to i-th table. When whose=OTHER, a single table is used for all is[] */ PetscFunctionBegin; Mbs = c->Mbs; mbs = a->mbs; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; rstart = c->rstartbs; is_max = data[0]; ierr = PetscBTCreate(Mbs,table0);CHKERRQ(ierr); nidx[0] = is_max; idx_i = data + is_max + 1; /* ptr to input is[0] array */ nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */ for (i=0; i= Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"index col %D >= Mbs %D",col,Mbs); if(!PetscBTLookupSet(table_i,col)) { ierr = PetscBTSet(table0,col);CHKERRQ(ierr); if (whose == MINE) {nidx_i[isz0] = col;} if (col_max < col) col_max = col; isz0++; } } if (whose == MINE) {isz = isz0;} k = 0; /* no. of indices from input is[i] that have been examined */ for (row=0; row= isz0) break; /* for (row=0; row col_max) break; if (PetscBTLookup(table0,col)){ if (!PetscBTLookupSet(table_i,row+rstart)) {nidx_i[isz++] = row+rstart;} break; /* for l = start; l col_max) break; if (PetscBTLookup(table0,col)){ if (!PetscBTLookupSet(table_i,row+rstart)) {nidx_i[isz++] = row+rstart;} break; /* for l = start; l