Actual source code: nn.c
  1: #define PETSCKSP_DLL

 3:  #include ../src/ksp/pc/impls/is/nn/nn.h

  5: /* -------------------------------------------------------------------------- */
  6: /*
  7:    PCSetUp_NN - Prepares for the use of the NN preconditioner
  8:                     by setting data structures and options.   

 10:    Input Parameter:
 11: .  pc - the preconditioner context

 13:    Application Interface Routine: PCSetUp()

 15:    Notes:
 16:    The interface routine PCSetUp() is not usually called directly by
 17:    the user, but instead is called by PCApply() if necessary.
 18: */
 21: static PetscErrorCode PCSetUp_NN(PC pc)
 22: {
 24: 
 26:   if (!pc->setupcalled) {
 27:     /* Set up all the "iterative substructuring" common block */
 28:     PCISSetUp(pc);
 29:     /* Create the coarse matrix. */
 30:     PCNNCreateCoarseMatrix(pc);
 31:   }
 32:   return(0);
 33: }

 35: /* -------------------------------------------------------------------------- */
 36: /*
 37:    PCApply_NN - Applies the NN preconditioner to a vector.

 39:    Input Parameters:
 40: .  pc - the preconditioner context
 41: .  r - input vector (global)

 43:    Output Parameter:
 44: .  z - output vector (global)

 46:    Application Interface Routine: PCApply()
 47:  */
 50: static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
 51: {
 52:   PC_IS          *pcis = (PC_IS*)(pc->data);
 54:   PetscScalar    m_one = -1.0;
 55:   Vec            w = pcis->vec1_global;

 58:   /*
 59:     Dirichlet solvers.
 60:     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
 61:     Storing the local results at vec2_D
 62:   */
 63:   VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
 64:   VecScatterEnd  (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
 65:   KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
 66: 
 67:   /*
 68:     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
 69:     Storing the result in the interface portion of the global vector w.
 70:   */
 71:   MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
 72:   VecScale(pcis->vec1_B,m_one);
 73:   VecCopy(r,w);
 74:   VecScatterBegin(pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);
 75:   VecScatterEnd  (pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);

 77:   /*
 78:     Apply the interface preconditioner
 79:   */
 80:   PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
 81:                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);

 83:   /*
 84:     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
 85:     The result is stored in vec1_D.
 86:   */
 87:   VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
 88:   VecScatterEnd  (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
 89:   MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);

 91:   /*
 92:     Dirichlet solvers.
 93:     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
 94:     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
 95:   */
 96:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
 97:   VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
 98:   KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
 99:   VecScale(pcis->vec2_D,m_one);
100:   VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
101:   VecScatterEnd  (pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
102:   return(0);
103: }

105: /* -------------------------------------------------------------------------- */
106: /*
107:    PCDestroy_NN - Destroys the private context for the NN preconditioner
108:    that was created with PCCreate_NN().

110:    Input Parameter:
111: .  pc - the preconditioner context

113:    Application Interface Routine: PCDestroy()
114: */
117: static PetscErrorCode PCDestroy_NN(PC pc)
118: {
119:   PC_NN          *pcnn = (PC_NN*)pc->data;

123:   PCISDestroy(pc);

125:   if (pcnn->coarse_mat)  {MatDestroy(pcnn->coarse_mat);}
126:   if (pcnn->coarse_x)    {VecDestroy(pcnn->coarse_x);}
127:   if (pcnn->coarse_b)    {VecDestroy(pcnn->coarse_b);}
128:   if (pcnn->ksp_coarse) {KSPDestroy(pcnn->ksp_coarse);}
129:   if (pcnn->DZ_IN) {
130:     PetscFree(pcnn->DZ_IN[0]);
131:     PetscFree(pcnn->DZ_IN);
132:   }

134:   /*
135:       Free the private data structure that was hanging off the PC
136:   */
137:   PetscFree(pcnn);
138:   return(0);
139: }

141: /* -------------------------------------------------------------------------- */
142: /*MC
143:    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.

145:    Options Database Keys:
146: +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
147:                                        (this skips the first coarse grid solve in the preconditioner)
148: .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
149:                                        (this skips the second coarse grid solve in the preconditioner)
150: .    -pc_is_damp_fixed <fact> -
151: .    -pc_is_remove_nullspace_fixed -
152: .    -pc_is_set_damping_factor_floating <fact> -
153: .    -pc_is_not_damp_floating -
154: +    -pc_is_not_remove_nullspace_floating - 

156:    Level: intermediate

158:    Notes: The matrix used with this preconditioner must be of type MATIS 

160:           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
161:           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
162:           on the subdomains; though in our experience using approximate solvers is slower.).

164:           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
165:           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
166:           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx

168:    Contributed by Paulo Goldfeld

170: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MATIS
171: M*/
175: PetscErrorCode  PCCreate_NN(PC pc)
176: {
178:   PC_NN          *pcnn;

181:   /*
182:      Creates the private data structure for this preconditioner and
183:      attach it to the PC object.
184:   */
185:   PetscNewLog(pc,PC_NN,&pcnn);
186:   pc->data  = (void*)pcnn;

188:   PCISCreate(pc);
189:   pcnn->coarse_mat  = 0;
190:   pcnn->coarse_x    = 0;
191:   pcnn->coarse_b    = 0;
192:   pcnn->ksp_coarse = 0;
193:   pcnn->DZ_IN       = 0;

195:   /*
196:       Set the pointers for the functions that are provided above.
197:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
198:       are called, they will automatically call these functions.  Note we
199:       choose not to provide a couple of these functions since they are
200:       not needed.
201:   */
202:   pc->ops->apply               = PCApply_NN;
203:   pc->ops->applytranspose      = 0;
204:   pc->ops->setup               = PCSetUp_NN;
205:   pc->ops->destroy             = PCDestroy_NN;
206:   pc->ops->view                = 0;
207:   pc->ops->applyrichardson     = 0;
208:   pc->ops->applysymmetricleft  = 0;
209:   pc->ops->applysymmetricright = 0;
210:   return(0);
211: }


215: /* -------------------------------------------------------------------------- */
216: /*
217:    PCNNCreateCoarseMatrix - 
218: */
221: PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
222: {
223:   MPI_Request    *send_request, *recv_request;
225:   PetscInt       i, j, k;
226:   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
227:   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */

229:   /* aliasing some names */
230:   PC_IS*         pcis     = (PC_IS*)(pc->data);
231:   PC_NN*         pcnn     = (PC_NN*)pc->data;
232:   PetscInt       n_neigh  = pcis->n_neigh;
233:   PetscInt*      neigh    = pcis->neigh;
234:   PetscInt*      n_shared = pcis->n_shared;
235:   PetscInt**     shared   = pcis->shared;
236:   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */

239:   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
240:   PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);

242:   /* Allocate memory for DZ */
243:   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
244:   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
245:   {
246:     PetscInt size_of_Z = 0;
247:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
248:     DZ_IN = pcnn->DZ_IN;
249:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
250:     for (i=0; i<n_neigh; i++) {
251:       size_of_Z += n_shared[i];
252:     }
253:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
254:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
255:   }
256:   for (i=1; i<n_neigh; i++) {
257:     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
258:     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
259:   }

261:   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
262:   /* First, set the auxiliary array pcis->work_N. */
263:   PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
264:   for (i=1; i<n_neigh; i++){
265:     for (j=0; j<n_shared[i]; j++) {
266:       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
267:     }
268:   }

270:   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
271:   /* Notice that send_request[] and recv_request[] could have one less element. */
272:   /* We make them longer to have request[i] corresponding to neigh[i].          */
273:   {
274:     PetscMPIInt tag;
275:     PetscObjectGetNewTag((PetscObject)pc,&tag);
276:     PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
277:     recv_request = send_request + (n_neigh);
278:     for (i=1; i<n_neigh; i++) {
279:       MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,((PetscObject)pc)->comm,&(send_request[i]));
280:       MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,((PetscObject)pc)->comm,&(recv_request[i]));
281:     }
282:   }

284:   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
285:   for(j=0; j<n_shared[0]; j++) {
286:     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
287:   }

289:   /* Start computing with local D*Z while communication goes on.    */
290:   /* Apply Schur complement. The result is "stored" in vec (more    */
291:   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
292:   /* and also scattered to pcnn->work_N.                            */
293:   PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
294:                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);

296:   /* Compute the first column, while completing the receiving. */
297:   for (i=0; i<n_neigh; i++) {
298:     MPI_Status  stat;
299:     PetscMPIInt ind=0;
300:     if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
301:     mat[ind*n_neigh+0] = 0.0;
302:     for (k=0; k<n_shared[ind]; k++) {
303:       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
304:     }
305:   }

307:   /* Compute the remaining of the columns */
308:   for (j=1; j<n_neigh; j++) {
309:     PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
310:                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
311:     for (i=0; i<n_neigh; i++) {
312:       mat[i*n_neigh+j] = 0.0;
313:       for (k=0; k<n_shared[i]; k++) {
314:         mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
315:       }
316:     }
317:   }

319:   /* Complete the sending. */
320:   if (n_neigh>1) {
321:     MPI_Status *stat;
322:     PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
323:     if (n_neigh-1) {MPI_Waitall(n_neigh-1,&(send_request[1]),stat);}
324:     PetscFree(stat);
325:   }

327:   /* Free the memory for the MPI requests */
328:   PetscFree(send_request);

330:   /* Free the memory for DZ_OUT */
331:   if (DZ_OUT) {
332:     PetscFree(DZ_OUT[0]);
333:     PetscFree(DZ_OUT);
334:   }

336:   {
337:     PetscMPIInt size;
338:     MPI_Comm_size(((PetscObject)pc)->comm,&size);
339:     /* Create the global coarse vectors (rhs and solution). */
340:     VecCreateMPI(((PetscObject)pc)->comm,1,size,&(pcnn->coarse_b));
341:     VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
342:     /* Create and set the global coarse AIJ matrix. */
343:     MatCreate(((PetscObject)pc)->comm,&(pcnn->coarse_mat));
344:     MatSetSizes(pcnn->coarse_mat,1,1,size,size);
345:     MatSetType(pcnn->coarse_mat,MATAIJ);
346:     MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);
347:     MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);
348:     MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
349:     MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
350:     MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
351:   }

353:   {
354:     PetscMPIInt rank;
355:     PetscScalar one = 1.0;
356:     MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
357:     /* "Zero out" rows of not-purely-Neumann subdomains */
358:     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
359:       MatZeroRows(pcnn->coarse_mat,0,PETSC_NULL,one);
360:     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
361:       PetscInt row = (PetscInt) rank;
362:       MatZeroRows(pcnn->coarse_mat,1,&row,one);
363:     }
364:   }

366:   /* Create the coarse linear solver context */
367:   {
368:     PC  pc_ctx, inner_pc;
369:     KSPCreate(((PetscObject)pc)->comm,&pcnn->ksp_coarse);
370:     PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse,(PetscObject)pc,2);
371:     KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
372:     KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
373:     PCSetType(pc_ctx,PCREDUNDANT);
374:     KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
375:     PCRedundantGetPC(pc_ctx,&inner_pc);
376:     PCSetType(inner_pc,PCLU);
377:     KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
378:     KSPSetFromOptions(pcnn->ksp_coarse);
379:     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
380:     KSPSetUp(pcnn->ksp_coarse);
381:   }

383:   /* Free the memory for mat */
384:   PetscFree(mat);

386:   /* for DEBUGGING, save the coarse matrix to a file. */
387:   {
388:     PetscTruth flg;
389:     PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);
390:     if (flg) {
391:       PetscViewer viewer;
392:       PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
393:       PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
394:       MatView(pcnn->coarse_mat,viewer);
395:       PetscViewerDestroy(viewer);
396:     }
397:   }

399:   /*  Set the variable pcnn->factor_coarse_rhs. */
400:   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;

402:   /* See historical note 02, at the bottom of this file. */
403:   return(0);
404: }

406: /* -------------------------------------------------------------------------- */
407: /*
408:    PCNNApplySchurToChunk - 

410:    Input parameters:
411: .  pcnn
412: .  n - size of chunk
413: .  idx - indices of chunk
414: .  chunk - values

416:    Output parameters:
417: .  array_N - result of Schur complement applied to chunk, scattered to big array
418: .  vec1_B  - result of Schur complement applied to chunk
419: .  vec2_B  - garbage (used as work space)
420: .  vec1_D  - garbage (used as work space)
421: .  vec2_D  - garbage (used as work space)

423: */
426: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
427: {
429:   PetscInt       i;
430:   PC_IS          *pcis = (PC_IS*)(pc->data);

433:   PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
434:   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
435:   PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
436:   PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
437:   PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
438:   return(0);
439: }

441: /* -------------------------------------------------------------------------- */
442: /*
443:    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 
444:                                       the preconditioner for the Schur complement.

446:    Input parameter:
447: .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.

449:    Output parameters:
450: .  z - global vector of interior and interface nodes. The values on the interface are the result of
451:        the application of the interface preconditioner to the interface part of r. The values on the
452:        interior nodes are garbage.
453: .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
454: .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
455: .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
456: .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
457: .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
458: .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
459: .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
460: .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)

462: */
465: PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
466:                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
467: {
469:   PC_IS*         pcis = (PC_IS*)(pc->data);

472:   /*
473:     First balancing step.
474:   */
475:   {
476:     PetscTruth flg;
477:     PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);
478:     if (!flg) {
479:       PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
480:     } else {
481:       VecCopy(r,z);
482:     }
483:   }

485:   /*
486:     Extract the local interface part of z and scale it by D 
487:   */
488:   VecScatterBegin(pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
489:   VecScatterEnd  (pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
490:   VecPointwiseMult(vec2_B,pcis->D,vec1_B);

492:   /* Neumann Solver */
493:   PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);

495:   /*
496:     Second balancing step.
497:   */
498:   {
499:     PetscTruth flg;
500:     PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);
501:     if (!flg) {
502:       PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
503:     } else {
504:       VecPointwiseMult(vec2_B,pcis->D,vec1_B);
505:       VecSet(z,0.0);
506:       VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
507:       VecScatterEnd  (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
508:     }
509:   }
510:   return(0);
511: }

513: /* -------------------------------------------------------------------------- */
514: /*
515:    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
516:                    input argument u is provided), or s, as given in equations
517:                    (12) and (13), if the input argument u is a null vector.
518:                    Notice that the input argument u plays the role of u_i in
519:                    equation (14). The equation numbers refer to [Man93].

521:    Input Parameters:
522: .  pcnn - NN preconditioner context.
523: .  r - MPI vector of all nodes (interior and interface). It's preserved.
524: .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.

526:    Output Parameters:
527: .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
528: .  vec1_B - Sequential vector of local interface nodes. Workspace.
529: .  vec2_B - Sequential vector of local interface nodes. Workspace.
530: .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
531: .  vec1_D - Sequential vector of local interior nodes. Workspace.
532: .  vec2_D - Sequential vector of local interior nodes. Workspace.
533: .  work_N - Array of all local nodes (interior and interface). Workspace.

535: */
538: PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
539:                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
540: {
542:   PetscInt       k;
543:   PetscScalar    value;
544:   PetscScalar*   lambda;
545:   PC_NN*         pcnn     = (PC_NN*)(pc->data);
546:   PC_IS*         pcis     = (PC_IS*)(pc->data);

549:   PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);
550:   if (u) {
551:     if (!vec3_B) { vec3_B = u; }
552:     VecPointwiseMult(vec1_B,pcis->D,u);
553:     VecSet(z,0.0);
554:     VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
555:     VecScatterEnd  (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
556:     VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
557:     VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
558:     PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
559:     VecScale(vec3_B,-1.0);
560:     VecCopy(r,z);
561:     VecScatterBegin(pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
562:     VecScatterEnd  (pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
563:   } else {
564:     VecCopy(r,z);
565:   }
566:   VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
567:   VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
568:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
569:   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
570:   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
571:   {
572:     PetscMPIInt rank;
573:     MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
574:     VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
575:     /*
576:        Since we are only inserting local values (one value actually) we don't need to do the 
577:        reduction that tells us there is no data that needs to be moved. Hence we comment out these
578:        VecAssemblyBegin(pcnn->coarse_b); 
579:        VecAssemblyEnd  (pcnn->coarse_b);
580:     */
581:   }
582:   KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);
583:   if (!u) { VecScale(pcnn->coarse_x,-1.0); }
584:   VecGetArray(pcnn->coarse_x,&lambda);
585:   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
586:   VecRestoreArray(pcnn->coarse_x,&lambda);
587:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
588:   VecSet(z,0.0);
589:   VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
590:   VecScatterEnd  (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
591:   if (!u) {
592:     VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
593:     VecScatterEnd  (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
594:     PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
595:     VecCopy(r,z);
596:   }
597:   VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
598:   VecScatterEnd  (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
599:   PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);
600:   return(0);
601: }




607: /*  -------   E N D   O F   T H E   C O D E   -------  */
608: /*                                                     */
609: /*  From now on, "footnotes" (or "historical notes").  */
610: /*                                                     */
611: /*  -------------------------------------------------  */



615: /* --------------------------------------------------------------------------
616:    Historical note 01 
617:    -------------------------------------------------------------------------- */
618: /*
619:    We considered the possibility of an alternative D_i that would still
620:    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
621:    The basic principle was still the pseudo-inverse of the counting
622:    function; the difference was that we would not count subdomains
623:    that do not contribute to the coarse space (i.e., not pure-Neumann
624:    subdomains).

626:    This turned out to be a bad idea:  we would solve trivial Neumann
627:    problems in the not pure-Neumann subdomains, since we would be scaling
628:    the balanced residual by zero.
629: */




634: /* --------------------------------------------------------------------------
635:    Historical note 02 
636:    -------------------------------------------------------------------------- */
637: /*
638:    We tried an alternative coarse problem, that would eliminate exactly a
639:    constant error. Turned out not to improve the overall convergence.
640: */