Actual source code: nn.c
1: #define PETSCKSP_DLL
3: #include ../src/ksp/pc/impls/is/nn/nn.h
5: /* -------------------------------------------------------------------------- */
6: /*
7: PCSetUp_NN - Prepares for the use of the NN preconditioner
8: by setting data structures and options.
10: Input Parameter:
11: . pc - the preconditioner context
13: Application Interface Routine: PCSetUp()
15: Notes:
16: The interface routine PCSetUp() is not usually called directly by
17: the user, but instead is called by PCApply() if necessary.
18: */
21: static PetscErrorCode PCSetUp_NN(PC pc)
22: {
24:
26: if (!pc->setupcalled) {
27: /* Set up all the "iterative substructuring" common block */
28: PCISSetUp(pc);
29: /* Create the coarse matrix. */
30: PCNNCreateCoarseMatrix(pc);
31: }
32: return(0);
33: }
35: /* -------------------------------------------------------------------------- */
36: /*
37: PCApply_NN - Applies the NN preconditioner to a vector.
39: Input Parameters:
40: . pc - the preconditioner context
41: . r - input vector (global)
43: Output Parameter:
44: . z - output vector (global)
46: Application Interface Routine: PCApply()
47: */
50: static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
51: {
52: PC_IS *pcis = (PC_IS*)(pc->data);
54: PetscScalar m_one = -1.0;
55: Vec w = pcis->vec1_global;
58: /*
59: Dirichlet solvers.
60: Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
61: Storing the local results at vec2_D
62: */
63: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
64: VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
65: KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
66:
67: /*
68: Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
69: Storing the result in the interface portion of the global vector w.
70: */
71: MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
72: VecScale(pcis->vec1_B,m_one);
73: VecCopy(r,w);
74: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);
75: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);
77: /*
78: Apply the interface preconditioner
79: */
80: PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
81: pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);
83: /*
84: Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
85: The result is stored in vec1_D.
86: */
87: VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
88: VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
89: MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);
91: /*
92: Dirichlet solvers.
93: Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
94: $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
95: */
96: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
97: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
98: KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
99: VecScale(pcis->vec2_D,m_one);
100: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
101: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
102: return(0);
103: }
105: /* -------------------------------------------------------------------------- */
106: /*
107: PCDestroy_NN - Destroys the private context for the NN preconditioner
108: that was created with PCCreate_NN().
110: Input Parameter:
111: . pc - the preconditioner context
113: Application Interface Routine: PCDestroy()
114: */
117: static PetscErrorCode PCDestroy_NN(PC pc)
118: {
119: PC_NN *pcnn = (PC_NN*)pc->data;
123: PCISDestroy(pc);
125: if (pcnn->coarse_mat) {MatDestroy(pcnn->coarse_mat);}
126: if (pcnn->coarse_x) {VecDestroy(pcnn->coarse_x);}
127: if (pcnn->coarse_b) {VecDestroy(pcnn->coarse_b);}
128: if (pcnn->ksp_coarse) {KSPDestroy(pcnn->ksp_coarse);}
129: if (pcnn->DZ_IN) {
130: PetscFree(pcnn->DZ_IN[0]);
131: PetscFree(pcnn->DZ_IN);
132: }
134: /*
135: Free the private data structure that was hanging off the PC
136: */
137: PetscFree(pcnn);
138: return(0);
139: }
141: /* -------------------------------------------------------------------------- */
142: /*MC
143: PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
145: Options Database Keys:
146: + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
147: (this skips the first coarse grid solve in the preconditioner)
148: . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
149: (this skips the second coarse grid solve in the preconditioner)
150: . -pc_is_damp_fixed <fact> -
151: . -pc_is_remove_nullspace_fixed -
152: . -pc_is_set_damping_factor_floating <fact> -
153: . -pc_is_not_damp_floating -
154: + -pc_is_not_remove_nullspace_floating -
156: Level: intermediate
158: Notes: The matrix used with this preconditioner must be of type MATIS
160: Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
161: degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
162: on the subdomains; though in our experience using approximate solvers is slower.).
164: Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
165: Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
166: Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
168: Contributed by Paulo Goldfeld
170: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS
171: M*/
175: PetscErrorCode PCCreate_NN(PC pc)
176: {
178: PC_NN *pcnn;
181: /*
182: Creates the private data structure for this preconditioner and
183: attach it to the PC object.
184: */
185: PetscNewLog(pc,PC_NN,&pcnn);
186: pc->data = (void*)pcnn;
188: PCISCreate(pc);
189: pcnn->coarse_mat = 0;
190: pcnn->coarse_x = 0;
191: pcnn->coarse_b = 0;
192: pcnn->ksp_coarse = 0;
193: pcnn->DZ_IN = 0;
195: /*
196: Set the pointers for the functions that are provided above.
197: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
198: are called, they will automatically call these functions. Note we
199: choose not to provide a couple of these functions since they are
200: not needed.
201: */
202: pc->ops->apply = PCApply_NN;
203: pc->ops->applytranspose = 0;
204: pc->ops->setup = PCSetUp_NN;
205: pc->ops->destroy = PCDestroy_NN;
206: pc->ops->view = 0;
207: pc->ops->applyrichardson = 0;
208: pc->ops->applysymmetricleft = 0;
209: pc->ops->applysymmetricright = 0;
210: return(0);
211: }
215: /* -------------------------------------------------------------------------- */
216: /*
217: PCNNCreateCoarseMatrix -
218: */
221: PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
222: {
223: MPI_Request *send_request, *recv_request;
225: PetscInt i, j, k;
226: PetscScalar* mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */
227: PetscScalar** DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
229: /* aliasing some names */
230: PC_IS* pcis = (PC_IS*)(pc->data);
231: PC_NN* pcnn = (PC_NN*)pc->data;
232: PetscInt n_neigh = pcis->n_neigh;
233: PetscInt* neigh = pcis->neigh;
234: PetscInt* n_shared = pcis->n_shared;
235: PetscInt** shared = pcis->shared;
236: PetscScalar** DZ_IN; /* Must be initialized after memory allocation. */
239: /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
240: PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);
242: /* Allocate memory for DZ */
243: /* Notice that DZ_OUT[0] is allocated some space that is never used. */
244: /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
245: {
246: PetscInt size_of_Z = 0;
247: PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
248: DZ_IN = pcnn->DZ_IN;
249: PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
250: for (i=0; i<n_neigh; i++) {
251: size_of_Z += n_shared[i];
252: }
253: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
254: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
255: }
256: for (i=1; i<n_neigh; i++) {
257: DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1];
258: DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
259: }
261: /* Set the values of DZ_OUT, in order to send this info to the neighbours */
262: /* First, set the auxiliary array pcis->work_N. */
263: PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
264: for (i=1; i<n_neigh; i++){
265: for (j=0; j<n_shared[i]; j++) {
266: DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
267: }
268: }
270: /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
271: /* Notice that send_request[] and recv_request[] could have one less element. */
272: /* We make them longer to have request[i] corresponding to neigh[i]. */
273: {
274: PetscMPIInt tag;
275: PetscObjectGetNewTag((PetscObject)pc,&tag);
276: PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
277: recv_request = send_request + (n_neigh);
278: for (i=1; i<n_neigh; i++) {
279: MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,((PetscObject)pc)->comm,&(send_request[i]));
280: MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,((PetscObject)pc)->comm,&(recv_request[i]));
281: }
282: }
284: /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
285: for(j=0; j<n_shared[0]; j++) {
286: DZ_IN[0][j] = pcis->work_N[shared[0][j]];
287: }
289: /* Start computing with local D*Z while communication goes on. */
290: /* Apply Schur complement. The result is "stored" in vec (more */
291: /* precisely, vec points to the result, stored in pc_nn->vec1_B) */
292: /* and also scattered to pcnn->work_N. */
293: PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
294: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
296: /* Compute the first column, while completing the receiving. */
297: for (i=0; i<n_neigh; i++) {
298: MPI_Status stat;
299: PetscMPIInt ind=0;
300: if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
301: mat[ind*n_neigh+0] = 0.0;
302: for (k=0; k<n_shared[ind]; k++) {
303: mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
304: }
305: }
307: /* Compute the remaining of the columns */
308: for (j=1; j<n_neigh; j++) {
309: PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
310: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
311: for (i=0; i<n_neigh; i++) {
312: mat[i*n_neigh+j] = 0.0;
313: for (k=0; k<n_shared[i]; k++) {
314: mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
315: }
316: }
317: }
319: /* Complete the sending. */
320: if (n_neigh>1) {
321: MPI_Status *stat;
322: PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
323: if (n_neigh-1) {MPI_Waitall(n_neigh-1,&(send_request[1]),stat);}
324: PetscFree(stat);
325: }
327: /* Free the memory for the MPI requests */
328: PetscFree(send_request);
330: /* Free the memory for DZ_OUT */
331: if (DZ_OUT) {
332: PetscFree(DZ_OUT[0]);
333: PetscFree(DZ_OUT);
334: }
336: {
337: PetscMPIInt size;
338: MPI_Comm_size(((PetscObject)pc)->comm,&size);
339: /* Create the global coarse vectors (rhs and solution). */
340: VecCreateMPI(((PetscObject)pc)->comm,1,size,&(pcnn->coarse_b));
341: VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
342: /* Create and set the global coarse AIJ matrix. */
343: MatCreate(((PetscObject)pc)->comm,&(pcnn->coarse_mat));
344: MatSetSizes(pcnn->coarse_mat,1,1,size,size);
345: MatSetType(pcnn->coarse_mat,MATAIJ);
346: MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);
347: MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);
348: MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
349: MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
350: MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
351: }
353: {
354: PetscMPIInt rank;
355: PetscScalar one = 1.0;
356: MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
357: /* "Zero out" rows of not-purely-Neumann subdomains */
358: if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
359: MatZeroRows(pcnn->coarse_mat,0,PETSC_NULL,one);
360: } else { /* here it DOES zero the row, since it's not a floating subdomain. */
361: PetscInt row = (PetscInt) rank;
362: MatZeroRows(pcnn->coarse_mat,1,&row,one);
363: }
364: }
366: /* Create the coarse linear solver context */
367: {
368: PC pc_ctx, inner_pc;
369: KSPCreate(((PetscObject)pc)->comm,&pcnn->ksp_coarse);
370: PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse,(PetscObject)pc,2);
371: KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
372: KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
373: PCSetType(pc_ctx,PCREDUNDANT);
374: KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
375: PCRedundantGetPC(pc_ctx,&inner_pc);
376: PCSetType(inner_pc,PCLU);
377: KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
378: KSPSetFromOptions(pcnn->ksp_coarse);
379: /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
380: KSPSetUp(pcnn->ksp_coarse);
381: }
383: /* Free the memory for mat */
384: PetscFree(mat);
386: /* for DEBUGGING, save the coarse matrix to a file. */
387: {
388: PetscTruth flg;
389: PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);
390: if (flg) {
391: PetscViewer viewer;
392: PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
393: PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
394: MatView(pcnn->coarse_mat,viewer);
395: PetscViewerDestroy(viewer);
396: }
397: }
399: /* Set the variable pcnn->factor_coarse_rhs. */
400: pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
402: /* See historical note 02, at the bottom of this file. */
403: return(0);
404: }
406: /* -------------------------------------------------------------------------- */
407: /*
408: PCNNApplySchurToChunk -
410: Input parameters:
411: . pcnn
412: . n - size of chunk
413: . idx - indices of chunk
414: . chunk - values
416: Output parameters:
417: . array_N - result of Schur complement applied to chunk, scattered to big array
418: . vec1_B - result of Schur complement applied to chunk
419: . vec2_B - garbage (used as work space)
420: . vec1_D - garbage (used as work space)
421: . vec2_D - garbage (used as work space)
423: */
426: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
427: {
429: PetscInt i;
430: PC_IS *pcis = (PC_IS*)(pc->data);
433: PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
434: for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
435: PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
436: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
437: PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
438: return(0);
439: }
441: /* -------------------------------------------------------------------------- */
442: /*
443: PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
444: the preconditioner for the Schur complement.
446: Input parameter:
447: . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
449: Output parameters:
450: . z - global vector of interior and interface nodes. The values on the interface are the result of
451: the application of the interface preconditioner to the interface part of r. The values on the
452: interior nodes are garbage.
453: . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
454: . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
455: . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
456: . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
457: . vec1_D - vector of local interior nodes; returns garbage (used as work space)
458: . vec2_D - vector of local interior nodes; returns garbage (used as work space)
459: . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
460: . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
462: */
465: PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
466: Vec vec2_D, Vec vec1_N, Vec vec2_N)
467: {
469: PC_IS* pcis = (PC_IS*)(pc->data);
472: /*
473: First balancing step.
474: */
475: {
476: PetscTruth flg;
477: PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);
478: if (!flg) {
479: PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
480: } else {
481: VecCopy(r,z);
482: }
483: }
485: /*
486: Extract the local interface part of z and scale it by D
487: */
488: VecScatterBegin(pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
489: VecScatterEnd (pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
490: VecPointwiseMult(vec2_B,pcis->D,vec1_B);
492: /* Neumann Solver */
493: PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);
495: /*
496: Second balancing step.
497: */
498: {
499: PetscTruth flg;
500: PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);
501: if (!flg) {
502: PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
503: } else {
504: VecPointwiseMult(vec2_B,pcis->D,vec1_B);
505: VecSet(z,0.0);
506: VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
507: VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
508: }
509: }
510: return(0);
511: }
513: /* -------------------------------------------------------------------------- */
514: /*
515: PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
516: input argument u is provided), or s, as given in equations
517: (12) and (13), if the input argument u is a null vector.
518: Notice that the input argument u plays the role of u_i in
519: equation (14). The equation numbers refer to [Man93].
521: Input Parameters:
522: . pcnn - NN preconditioner context.
523: . r - MPI vector of all nodes (interior and interface). It's preserved.
524: . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
526: Output Parameters:
527: . z - MPI vector of interior and interface nodes. Returns s or z (see description above).
528: . vec1_B - Sequential vector of local interface nodes. Workspace.
529: . vec2_B - Sequential vector of local interface nodes. Workspace.
530: . vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
531: . vec1_D - Sequential vector of local interior nodes. Workspace.
532: . vec2_D - Sequential vector of local interior nodes. Workspace.
533: . work_N - Array of all local nodes (interior and interface). Workspace.
535: */
538: PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
539: Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
540: {
542: PetscInt k;
543: PetscScalar value;
544: PetscScalar* lambda;
545: PC_NN* pcnn = (PC_NN*)(pc->data);
546: PC_IS* pcis = (PC_IS*)(pc->data);
549: PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);
550: if (u) {
551: if (!vec3_B) { vec3_B = u; }
552: VecPointwiseMult(vec1_B,pcis->D,u);
553: VecSet(z,0.0);
554: VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
555: VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
556: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
557: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
558: PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
559: VecScale(vec3_B,-1.0);
560: VecCopy(r,z);
561: VecScatterBegin(pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
562: VecScatterEnd (pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
563: } else {
564: VecCopy(r,z);
565: }
566: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
567: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
568: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
569: for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
570: value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
571: {
572: PetscMPIInt rank;
573: MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
574: VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
575: /*
576: Since we are only inserting local values (one value actually) we don't need to do the
577: reduction that tells us there is no data that needs to be moved. Hence we comment out these
578: VecAssemblyBegin(pcnn->coarse_b);
579: VecAssemblyEnd (pcnn->coarse_b);
580: */
581: }
582: KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);
583: if (!u) { VecScale(pcnn->coarse_x,-1.0); }
584: VecGetArray(pcnn->coarse_x,&lambda);
585: for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
586: VecRestoreArray(pcnn->coarse_x,&lambda);
587: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
588: VecSet(z,0.0);
589: VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
590: VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
591: if (!u) {
592: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
593: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
594: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
595: VecCopy(r,z);
596: }
597: VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
598: VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
599: PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);
600: return(0);
601: }
607: /* ------- E N D O F T H E C O D E ------- */
608: /* */
609: /* From now on, "footnotes" (or "historical notes"). */
610: /* */
611: /* ------------------------------------------------- */
615: /* --------------------------------------------------------------------------
616: Historical note 01
617: -------------------------------------------------------------------------- */
618: /*
619: We considered the possibility of an alternative D_i that would still
620: provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $).
621: The basic principle was still the pseudo-inverse of the counting
622: function; the difference was that we would not count subdomains
623: that do not contribute to the coarse space (i.e., not pure-Neumann
624: subdomains).
626: This turned out to be a bad idea: we would solve trivial Neumann
627: problems in the not pure-Neumann subdomains, since we would be scaling
628: the balanced residual by zero.
629: */
634: /* --------------------------------------------------------------------------
635: Historical note 02
636: -------------------------------------------------------------------------- */
637: /*
638: We tried an alternative coarse problem, that would eliminate exactly a
639: constant error. Turned out not to improve the overall convergence.
640: */