xref: /petsc/src/ksp/pc/impls/is/nn/nn.c (revision e2df7a95c5ea77c899beea10ff9effd6061e7c8f)
1 #define PETSCKSP_DLL
2 
3 #include "src/ksp/pc/impls/is/nn/nn.h"
4 
5 /* -------------------------------------------------------------------------- */
6 /*
7    PCSetUp_NN - Prepares for the use of the NN preconditioner
8                     by setting data structures and options.
9 
10    Input Parameter:
11 .  pc - the preconditioner context
12 
13    Application Interface Routine: PCSetUp()
14 
15    Notes:
16    The interface routine PCSetUp() is not usually called directly by
17    the user, but instead is called by PCApply() if necessary.
18 */
19 #undef __FUNCT__
20 #define __FUNCT__ "PCSetUp_NN"
21 static PetscErrorCode PCSetUp_NN(PC pc)
22 {
23   PetscErrorCode ierr;
24 
25   PetscFunctionBegin;
26   if (!pc->setupcalled) {
27     /* Set up all the "iterative substructuring" common block */
28     ierr = PCISSetUp(pc);CHKERRQ(ierr);
29     /* Create the coarse matrix. */
30     ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr);
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 /* -------------------------------------------------------------------------- */
36 /*
37    PCApply_NN - Applies the NN preconditioner to a vector.
38 
39    Input Parameters:
40 .  pc - the preconditioner context
41 .  r - input vector (global)
42 
43    Output Parameter:
44 .  z - output vector (global)
45 
46    Application Interface Routine: PCApply()
47  */
48 #undef __FUNCT__
49 #define __FUNCT__ "PCApply_NN"
50 static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
51 {
52   PC_IS          *pcis = (PC_IS*)(pc->data);
53   PetscErrorCode ierr;
54   PetscScalar    m_one = -1.0;
55   Vec            w = pcis->vec1_global;
56 
57   PetscFunctionBegin;
58   /*
59     Dirichlet solvers.
60     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
61     Storing the local results at vec2_D
62   */
63   ierr = VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
64   ierr = VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);CHKERRQ(ierr);
65   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
66 
67   /*
68     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
69     Storing the result in the interface portion of the global vector w.
70   */
71   ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr);
72   ierr = VecScale(pcis->vec1_B,m_one);CHKERRQ(ierr);
73   ierr = VecCopy(r,w);CHKERRQ(ierr);
74   ierr = VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
75   ierr = VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
76 
77   /*
78     Apply the interface preconditioner
79   */
80   ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
81                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
82 
83   /*
84     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
85     The result is stored in vec1_D.
86   */
87   ierr = VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
88   ierr = VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
89   ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr);
90 
91   /*
92     Dirichlet solvers.
93     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
94     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
95   */
96   ierr = VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
97   ierr = VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
98   ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
99   ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr);
100   ierr = VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
101   ierr = VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 /* -------------------------------------------------------------------------- */
106 /*
107    PCDestroy_NN - Destroys the private context for the NN preconditioner
108    that was created with PCCreate_NN().
109 
110    Input Parameter:
111 .  pc - the preconditioner context
112 
113    Application Interface Routine: PCDestroy()
114 */
115 #undef __FUNCT__
116 #define __FUNCT__ "PCDestroy_NN"
117 static PetscErrorCode PCDestroy_NN(PC pc)
118 {
119   PC_NN          *pcnn = (PC_NN*)pc->data;
120   PetscErrorCode ierr;
121 
122   PetscFunctionBegin;
123   ierr = PCISDestroy(pc);CHKERRQ(ierr);
124 
125   if (pcnn->coarse_mat)  {ierr = MatDestroy(pcnn->coarse_mat);CHKERRQ(ierr);}
126   if (pcnn->coarse_x)    {ierr = VecDestroy(pcnn->coarse_x);CHKERRQ(ierr);}
127   if (pcnn->coarse_b)    {ierr = VecDestroy(pcnn->coarse_b);CHKERRQ(ierr);}
128   if (pcnn->ksp_coarse) {ierr = KSPDestroy(pcnn->ksp_coarse);CHKERRQ(ierr);}
129   if (pcnn->DZ_IN) {
130     if (pcnn->DZ_IN[0]) {ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr);}
131     ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr);
132   }
133 
134   /*
135       Free the private data structure that was hanging off the PC
136   */
137   ierr = PetscFree(pcnn);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 /* -------------------------------------------------------------------------- */
142 /*MC
143    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
144 
145    Options Database Keys:
146 +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
147                                        (this skips the first coarse grid solve in the preconditioner)
148 .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
149                                        (this skips the second coarse grid solve in the preconditioner)
150 .    -pc_is_damp_fixed <fact> -
151 .    -pc_is_remove_nullspace_fixed -
152 .    -pc_is_set_damping_factor_floating <fact> -
153 .    -pc_is_not_damp_floating -
154 +    -pc_is_not_remove_nullspace_floating -
155 
156    Level: intermediate
157 
158    Notes: The matrix used with this preconditioner must be of type MATIS
159 
160           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
161           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
162           on the subdomains; though in our experience using approximate solvers is slower.).
163 
164           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
165           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
166           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
167 
168    Contributed by Paulo Goldfeld
169 
170 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MatIS
171 M*/
172 EXTERN_C_BEGIN
173 #undef __FUNCT__
174 #define __FUNCT__ "PCCreate_NN"
175 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_NN(PC pc)
176 {
177   PetscErrorCode ierr;
178   PC_NN          *pcnn;
179 
180   PetscFunctionBegin;
181   /*
182      Creates the private data structure for this preconditioner and
183      attach it to the PC object.
184   */
185   ierr      = PetscNew(PC_NN,&pcnn);CHKERRQ(ierr);
186   pc->data  = (void*)pcnn;
187 
188   /*
189      Logs the memory usage; this is not needed but allows PETSc to
190      monitor how much memory is being used for various purposes.
191   */
192   ierr = PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS));CHKERRQ(ierr); /* Is this the right thing to do? */
193 
194   ierr = PCISCreate(pc);CHKERRQ(ierr);
195   pcnn->coarse_mat  = 0;
196   pcnn->coarse_x    = 0;
197   pcnn->coarse_b    = 0;
198   pcnn->ksp_coarse = 0;
199   pcnn->DZ_IN       = 0;
200 
201   /*
202       Set the pointers for the functions that are provided above.
203       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
204       are called, they will automatically call these functions.  Note we
205       choose not to provide a couple of these functions since they are
206       not needed.
207   */
208   pc->ops->apply               = PCApply_NN;
209   pc->ops->applytranspose      = 0;
210   pc->ops->setup               = PCSetUp_NN;
211   pc->ops->destroy             = PCDestroy_NN;
212   pc->ops->view                = 0;
213   pc->ops->applyrichardson     = 0;
214   pc->ops->applysymmetricleft  = 0;
215   pc->ops->applysymmetricright = 0;
216   PetscFunctionReturn(0);
217 }
218 EXTERN_C_END
219 
220 
221 /* -------------------------------------------------------------------------- */
222 /*
223    PCNNCreateCoarseMatrix -
224 */
225 #undef __FUNCT__
226 #define __FUNCT__ "PCNNCreateCoarseMatrix"
227 PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
228 {
229   MPI_Request    *send_request, *recv_request;
230   PetscErrorCode ierr;
231   PetscInt       i, j, k;
232   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
233   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
234 
235   /* aliasing some names */
236   PC_IS*         pcis     = (PC_IS*)(pc->data);
237   PC_NN*         pcnn     = (PC_NN*)pc->data;
238   PetscInt       n_neigh  = pcis->n_neigh;
239   PetscInt*      neigh    = pcis->neigh;
240   PetscInt*      n_shared = pcis->n_shared;
241   PetscInt**     shared   = pcis->shared;
242   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */
243 
244   PetscFunctionBegin;
245   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
246   ierr = PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);CHKERRQ(ierr);
247 
248   /* Allocate memory for DZ */
249   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
250   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
251   {
252     PetscInt size_of_Z = 0;
253     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr);
254     DZ_IN = pcnn->DZ_IN;
255     ierr  = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr);
256     for (i=0; i<n_neigh; i++) {
257       size_of_Z += n_shared[i];
258     }
259     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr);
260     ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr);
261   }
262   for (i=1; i<n_neigh; i++) {
263     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
264     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
265   }
266 
267   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
268   /* First, set the auxiliary array pcis->work_N. */
269   ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
270   for (i=1; i<n_neigh; i++){
271     for (j=0; j<n_shared[i]; j++) {
272       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
273     }
274   }
275 
276   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
277   /* Notice that send_request[] and recv_request[] could have one less element. */
278   /* We make them longer to have request[i] corresponding to neigh[i].          */
279   {
280     PetscMPIInt tag;
281     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
282     ierr = PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);CHKERRQ(ierr);
283     recv_request = send_request + (n_neigh);
284     for (i=1; i<n_neigh; i++) {
285       ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));CHKERRQ(ierr);
286       ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));CHKERRQ(ierr);
287     }
288   }
289 
290   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
291   for(j=0; j<n_shared[0]; j++) {
292     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
293   }
294 
295   /* Start computing with local D*Z while communication goes on.    */
296   /* Apply Schur complement. The result is "stored" in vec (more    */
297   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
298   /* and also scattered to pcnn->work_N.                            */
299   ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
300                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
301 
302   /* Compute the first column, while completing the receiving. */
303   for (i=0; i<n_neigh; i++) {
304     MPI_Status  stat;
305     PetscMPIInt ind=0;
306     if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;}
307     mat[ind*n_neigh+0] = 0.0;
308     for (k=0; k<n_shared[ind]; k++) {
309       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
310     }
311   }
312 
313   /* Compute the remaining of the columns */
314   for (j=1; j<n_neigh; j++) {
315     ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
316                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
317     for (i=0; i<n_neigh; i++) {
318       mat[i*n_neigh+j] = 0.0;
319       for (k=0; k<n_shared[i]; k++) {
320 	mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
321       }
322     }
323   }
324 
325   /* Complete the sending. */
326   if (n_neigh>1) {
327     MPI_Status *stat;
328     ierr = PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);CHKERRQ(ierr);
329     if (n_neigh-1) {ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr);}
330     ierr = PetscFree(stat);CHKERRQ(ierr);
331   }
332 
333   /* Free the memory for the MPI requests */
334   ierr = PetscFree(send_request);CHKERRQ(ierr);
335 
336   /* Free the memory for DZ_OUT */
337   if (DZ_OUT) {
338     if (DZ_OUT[0]) { ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); }
339     ierr = PetscFree(DZ_OUT);CHKERRQ(ierr);
340   }
341 
342   {
343     PetscMPIInt size;
344     ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
345     /* Create the global coarse vectors (rhs and solution). */
346     ierr = VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));CHKERRQ(ierr);
347     ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr);
348     /* Create and set the global coarse AIJ matrix. */
349     ierr = MatCreate(pc->comm,&(pcnn->coarse_mat));CHKERRQ(ierr);
350     ierr = MatSetSizes(pcnn->coarse_mat,1,1,size,size);CHKERRQ(ierr);
351     ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr);
352     ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);CHKERRQ(ierr);
354     ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr);
355     ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
356     ierr = MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
357   }
358 
359   {
360     PetscMPIInt rank;
361     PetscScalar one = 1.0;
362     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
363     /* "Zero out" rows of not-purely-Neumann subdomains */
364     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
365       ierr = MatZeroRows(pcnn->coarse_mat,0,PETSC_NULL,one);CHKERRQ(ierr);
366     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
367       PetscInt row = (PetscInt) rank;
368       ierr = MatZeroRows(pcnn->coarse_mat,1,&row,one);CHKERRQ(ierr);
369     }
370   }
371 
372   /* Create the coarse linear solver context */
373   {
374     PC  pc_ctx, inner_pc;
375     ierr = KSPCreate(pc->comm,&pcnn->ksp_coarse);CHKERRQ(ierr);
376     ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
377     ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr);
378     ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr);
379     ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr);
380     ierr = PCRedundantGetPC(pc_ctx,&inner_pc);CHKERRQ(ierr);
381     ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr);
382     ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr);
383     ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr);
384     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
385     ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr);
386   }
387 
388   /* Free the memory for mat */
389   ierr = PetscFree(mat);CHKERRQ(ierr);
390 
391   /* for DEBUGGING, save the coarse matrix to a file. */
392   {
393     PetscTruth flg;
394     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);CHKERRQ(ierr);
395     if (flg) {
396       PetscViewer viewer;
397       ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr);
398       ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
399       ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr);
400       ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
401     }
402   }
403 
404   /*  Set the variable pcnn->factor_coarse_rhs. */
405   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
406 
407   /* See historical note 02, at the bottom of this file. */
408   PetscFunctionReturn(0);
409 }
410 
411 /* -------------------------------------------------------------------------- */
412 /*
413    PCNNApplySchurToChunk -
414 
415    Input parameters:
416 .  pcnn
417 .  n - size of chunk
418 .  idx - indices of chunk
419 .  chunk - values
420 
421    Output parameters:
422 .  array_N - result of Schur complement applied to chunk, scattered to big array
423 .  vec1_B  - result of Schur complement applied to chunk
424 .  vec2_B  - garbage (used as work space)
425 .  vec1_D  - garbage (used as work space)
426 .  vec2_D  - garbage (used as work space)
427 
428 */
429 #undef __FUNCT__
430 #define __FUNCT__ "PCNNApplySchurToChunk"
431 PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
432 {
433   PetscErrorCode ierr;
434   PetscInt       i;
435   PC_IS          *pcis = (PC_IS*)(pc->data);
436 
437   PetscFunctionBegin;
438   ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr);
439   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
440   ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
441   ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
442   ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
443   PetscFunctionReturn(0);
444 }
445 
446 /* -------------------------------------------------------------------------- */
447 /*
448    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
449                                       the preconditioner for the Schur complement.
450 
451    Input parameter:
452 .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
453 
454    Output parameters:
455 .  z - global vector of interior and interface nodes. The values on the interface are the result of
456        the application of the interface preconditioner to the interface part of r. The values on the
457        interior nodes are garbage.
458 .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
459 .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
460 .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
461 .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
462 .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
463 .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
464 .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
465 .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
466 
467 */
468 #undef __FUNCT__
469 #define __FUNCT__ "PCNNApplyInterfacePreconditioner"
470 PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
471                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
472 {
473   PetscErrorCode ierr;
474   PC_IS*         pcis = (PC_IS*)(pc->data);
475 
476   PetscFunctionBegin;
477   /*
478     First balancing step.
479   */
480   {
481     PetscTruth flg;
482     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);CHKERRQ(ierr);
483     if (!flg) {
484       ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
485     } else {
486       ierr = VecCopy(r,z);CHKERRQ(ierr);
487     }
488   }
489 
490   /*
491     Extract the local interface part of z and scale it by D
492   */
493   ierr = VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
494   ierr = VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
495   ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr);
496 
497   /* Neumann Solver */
498   ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr);
499 
500   /*
501     Second balancing step.
502   */
503   {
504     PetscTruth flg;
505     ierr = PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);CHKERRQ(ierr);
506     if (!flg) {
507       ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr);
508     } else {
509       ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr);
510       ierr = VecSet(z,0.0);CHKERRQ(ierr);
511       ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
512       ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
513     }
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 /* -------------------------------------------------------------------------- */
519 /*
520    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
521                    input argument u is provided), or s, as given in equations
522                    (12) and (13), if the input argument u is a null vector.
523                    Notice that the input argument u plays the role of u_i in
524                    equation (14). The equation numbers refer to [Man93].
525 
526    Input Parameters:
527 .  pcnn - NN preconditioner context.
528 .  r - MPI vector of all nodes (interior and interface). It's preserved.
529 .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
530 
531    Output Parameters:
532 .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
533 .  vec1_B - Sequential vector of local interface nodes. Workspace.
534 .  vec2_B - Sequential vector of local interface nodes. Workspace.
535 .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
536 .  vec1_D - Sequential vector of local interior nodes. Workspace.
537 .  vec2_D - Sequential vector of local interior nodes. Workspace.
538 .  work_N - Array of all local nodes (interior and interface). Workspace.
539 
540 */
541 #undef __FUNCT__
542 #define __FUNCT__ "PCNNBalancing"
543 PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
544                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
545 {
546   PetscErrorCode ierr;
547   PetscInt       k;
548   PetscScalar    value;
549   PetscScalar*   lambda;
550   PC_NN*         pcnn     = (PC_NN*)(pc->data);
551   PC_IS*         pcis     = (PC_IS*)(pc->data);
552 
553   PetscFunctionBegin;
554   ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
555   if (u) {
556     if (!vec3_B) { vec3_B = u; }
557     ierr = VecPointwiseMult(vec1_B,pcis->D,u);CHKERRQ(ierr);
558     ierr = VecSet(z,0.0);CHKERRQ(ierr);
559     ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
560     ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
561     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
562     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
563     ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
564     ierr = VecScale(vec3_B,-1.0);CHKERRQ(ierr);
565     ierr = VecCopy(r,z);CHKERRQ(ierr);
566     ierr = VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
567     ierr = VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
568   } else {
569     ierr = VecCopy(r,z);CHKERRQ(ierr);
570   }
571   ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
572   ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
573   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr);
574   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
575   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
576   {
577     PetscMPIInt rank;
578     ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
579     ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr);
580     /*
581        Since we are only inserting local values (one value actually) we don't need to do the
582        reduction that tells us there is no data that needs to be moved. Hence we comment out these
583        ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr);
584        ierr = VecAssemblyEnd  (pcnn->coarse_b);CHKERRQ(ierr);
585     */
586   }
587   ierr = KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);CHKERRQ(ierr);
588   if (!u) { ierr = VecScale(pcnn->coarse_x,-1.0);CHKERRQ(ierr); }
589   ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
590   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
591   ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr);
592   ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr);
593   ierr = VecSet(z,0.0);CHKERRQ(ierr);
594   ierr = VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
595   ierr = VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
596   if (!u) {
597     ierr = VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
598     ierr = VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);CHKERRQ(ierr);
599     ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr);
600     ierr = VecCopy(r,z);CHKERRQ(ierr);
601   }
602   ierr = VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
603   ierr = VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);CHKERRQ(ierr);
604   ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 
610 
611 
612 /*  -------   E N D   O F   T H E   C O D E   -------  */
613 /*                                                     */
614 /*  From now on, "footnotes" (or "historical notes").  */
615 /*                                                     */
616 /*  -------------------------------------------------  */
617 
618 
619 
620 /* --------------------------------------------------------------------------
621    Historical note 01
622    -------------------------------------------------------------------------- */
623 /*
624    We considered the possibility of an alternative D_i that would still
625    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
626    The basic principle was still the pseudo-inverse of the counting
627    function; the difference was that we would not count subdomains
628    that do not contribute to the coarse space (i.e., not pure-Neumann
629    subdomains).
630 
631    This turned out to be a bad idea:  we would solve trivial Neumann
632    problems in the not pure-Neumann subdomains, since we would be scaling
633    the balanced residual by zero.
634 */
635 
636 
637 
638 
639 /* --------------------------------------------------------------------------
640    Historical note 02
641    -------------------------------------------------------------------------- */
642 /*
643    We tried an alternative coarse problem, that would eliminate exactly a
644    constant error. Turned out not to improve the overall convergence.
645 */
646 
647 
648