1 2 #include <../src/ksp/pc/impls/is/nn/nn.h> 3 4 /* -------------------------------------------------------------------------- */ 5 /* 6 PCSetUp_NN - Prepares for the use of the NN preconditioner 7 by setting data structures and options. 8 9 Input Parameter: 10 . pc - the preconditioner context 11 12 Application Interface Routine: PCSetUp() 13 14 Notes: 15 The interface routine PCSetUp() is not usually called directly by 16 the user, but instead is called by PCApply() if necessary. 17 */ 18 static PetscErrorCode PCSetUp_NN(PC pc) 19 { 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 if (!pc->setupcalled) { 24 /* Set up all the "iterative substructuring" common block */ 25 ierr = PCISSetUp(pc,PETSC_TRUE);CHKERRQ(ierr); 26 /* Create the coarse matrix. */ 27 ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 /* -------------------------------------------------------------------------- */ 33 /* 34 PCApply_NN - Applies the NN preconditioner to a vector. 35 36 Input Parameters: 37 . pc - the preconditioner context 38 . r - input vector (global) 39 40 Output Parameter: 41 . z - output vector (global) 42 43 Application Interface Routine: PCApply() 44 */ 45 static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z) 46 { 47 PC_IS *pcis = (PC_IS*)(pc->data); 48 PetscErrorCode ierr; 49 PetscScalar m_one = -1.0; 50 Vec w = pcis->vec1_global; 51 52 PetscFunctionBegin; 53 /* 54 Dirichlet solvers. 55 Solving $ B_I^{(i)}r_I^{(i)} $ at each processor. 56 Storing the local results at vec2_D 57 */ 58 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 59 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 60 ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 61 62 /* 63 Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ . 64 Storing the result in the interface portion of the global vector w. 65 */ 66 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 67 ierr = VecScale(pcis->vec1_B,m_one);CHKERRQ(ierr); 68 ierr = VecCopy(r,w);CHKERRQ(ierr); 69 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 70 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 71 72 /* 73 Apply the interface preconditioner 74 */ 75 ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D, 76 pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 77 78 /* 79 Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $ 80 The result is stored in vec1_D. 81 */ 82 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 83 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 84 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 85 86 /* 87 Dirichlet solvers. 88 Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks 89 $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $. 90 */ 91 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 92 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 93 ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 94 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 95 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 96 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 /* -------------------------------------------------------------------------- */ 101 /* 102 PCDestroy_NN - Destroys the private context for the NN preconditioner 103 that was created with PCCreate_NN(). 104 105 Input Parameter: 106 . pc - the preconditioner context 107 108 Application Interface Routine: PCDestroy() 109 */ 110 static PetscErrorCode PCDestroy_NN(PC pc) 111 { 112 PC_NN *pcnn = (PC_NN*)pc->data; 113 PetscErrorCode ierr; 114 115 PetscFunctionBegin; 116 ierr = PCISDestroy(pc);CHKERRQ(ierr); 117 118 ierr = MatDestroy(&pcnn->coarse_mat);CHKERRQ(ierr); 119 ierr = VecDestroy(&pcnn->coarse_x);CHKERRQ(ierr); 120 ierr = VecDestroy(&pcnn->coarse_b);CHKERRQ(ierr); 121 ierr = KSPDestroy(&pcnn->ksp_coarse);CHKERRQ(ierr); 122 if (pcnn->DZ_IN) { 123 ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr); 124 ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr); 125 } 126 127 /* 128 Free the private data structure that was hanging off the PC 129 */ 130 ierr = PetscFree(pc->data);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 /* -------------------------------------------------------------------------- */ 135 /*MC 136 PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs. 137 138 Options Database Keys: 139 + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems 140 (this skips the first coarse grid solve in the preconditioner) 141 . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems 142 (this skips the second coarse grid solve in the preconditioner) 143 . -pc_is_damp_fixed <fact> - 144 . -pc_is_remove_nullspace_fixed - 145 . -pc_is_set_damping_factor_floating <fact> - 146 . -pc_is_not_damp_floating - 147 + -pc_is_not_remove_nullspace_floating - 148 149 Level: intermediate 150 151 Notes: The matrix used with this preconditioner must be of type MATIS 152 153 Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the 154 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 155 on the subdomains; though in our experience using approximate solvers is slower.). 156 157 Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx 158 Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx 159 Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx 160 161 Contributed by Paulo Goldfeld 162 163 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 164 M*/ 165 166 PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc) 167 { 168 PetscErrorCode ierr; 169 PC_NN *pcnn; 170 171 PetscFunctionBegin; 172 /* 173 Creates the private data structure for this preconditioner and 174 attach it to the PC object. 175 */ 176 ierr = PetscNewLog(pc,&pcnn);CHKERRQ(ierr); 177 pc->data = (void*)pcnn; 178 179 ierr = PCISCreate(pc);CHKERRQ(ierr); 180 pcnn->coarse_mat = 0; 181 pcnn->coarse_x = 0; 182 pcnn->coarse_b = 0; 183 pcnn->ksp_coarse = 0; 184 pcnn->DZ_IN = 0; 185 186 /* 187 Set the pointers for the functions that are provided above. 188 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 189 are called, they will automatically call these functions. Note we 190 choose not to provide a couple of these functions since they are 191 not needed. 192 */ 193 pc->ops->apply = PCApply_NN; 194 pc->ops->applytranspose = 0; 195 pc->ops->setup = PCSetUp_NN; 196 pc->ops->destroy = PCDestroy_NN; 197 pc->ops->view = 0; 198 pc->ops->applyrichardson = 0; 199 pc->ops->applysymmetricleft = 0; 200 pc->ops->applysymmetricright = 0; 201 PetscFunctionReturn(0); 202 } 203 204 /* -------------------------------------------------------------------------- */ 205 /* 206 PCNNCreateCoarseMatrix - 207 */ 208 PetscErrorCode PCNNCreateCoarseMatrix(PC pc) 209 { 210 MPI_Request *send_request, *recv_request; 211 PetscErrorCode ierr; 212 PetscInt i, j, k; 213 PetscScalar *mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */ 214 PetscScalar **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */ 215 216 /* aliasing some names */ 217 PC_IS *pcis = (PC_IS*)(pc->data); 218 PC_NN *pcnn = (PC_NN*)pc->data; 219 PetscInt n_neigh = pcis->n_neigh; 220 PetscInt *neigh = pcis->neigh; 221 PetscInt *n_shared = pcis->n_shared; 222 PetscInt **shared = pcis->shared; 223 PetscScalar **DZ_IN; /* Must be initialized after memory allocation. */ 224 225 PetscFunctionBegin; 226 /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */ 227 ierr = PetscMalloc1(n_neigh*n_neigh+1,&mat);CHKERRQ(ierr); 228 229 /* Allocate memory for DZ */ 230 /* Notice that DZ_OUT[0] is allocated some space that is never used. */ 231 /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */ 232 { 233 PetscInt size_of_Z = 0; 234 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr); 235 DZ_IN = pcnn->DZ_IN; 236 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr); 237 for (i=0; i<n_neigh; i++) size_of_Z += n_shared[i]; 238 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr); 239 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr); 240 } 241 for (i=1; i<n_neigh; i++) { 242 DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1]; 243 DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1]; 244 } 245 246 /* Set the values of DZ_OUT, in order to send this info to the neighbours */ 247 /* First, set the auxiliary array pcis->work_N. */ 248 ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 249 for (i=1; i<n_neigh; i++) { 250 for (j=0; j<n_shared[i]; j++) { 251 DZ_OUT[i][j] = pcis->work_N[shared[i][j]]; 252 } 253 } 254 255 /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */ 256 /* Notice that send_request[] and recv_request[] could have one less element. */ 257 /* We make them longer to have request[i] corresponding to neigh[i]. */ 258 { 259 PetscMPIInt tag; 260 ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr); 261 ierr = PetscMalloc2(n_neigh+1,&send_request,n_neigh+1,&recv_request);CHKERRQ(ierr); 262 for (i=1; i<n_neigh; i++) { 263 ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(send_request[i]));CHKERRQ(ierr); 264 ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(recv_request[i]));CHKERRQ(ierr); 265 } 266 } 267 268 /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */ 269 for (j=0; j<n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]]; 270 271 /* Start computing with local D*Z while communication goes on. */ 272 /* Apply Schur complement. The result is "stored" in vec (more */ 273 /* precisely, vec points to the result, stored in pc_nn->vec1_B) */ 274 /* and also scattered to pcnn->work_N. */ 275 ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B, 276 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 277 278 /* Compute the first column, while completing the receiving. */ 279 for (i=0; i<n_neigh; i++) { 280 MPI_Status stat; 281 PetscMPIInt ind=0; 282 if (i>0) { ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRQ(ierr); ind++;} 283 mat[ind*n_neigh+0] = 0.0; 284 for (k=0; k<n_shared[ind]; k++) mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]]; 285 } 286 287 /* Compute the remaining of the columns */ 288 for (j=1; j<n_neigh; j++) { 289 ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B, 290 pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 291 for (i=0; i<n_neigh; i++) { 292 mat[i*n_neigh+j] = 0.0; 293 for (k=0; k<n_shared[i]; k++) mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]]; 294 } 295 } 296 297 /* Complete the sending. */ 298 if (n_neigh>1) { 299 MPI_Status *stat; 300 ierr = PetscMalloc1(n_neigh-1,&stat);CHKERRQ(ierr); 301 if (n_neigh-1) {ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRQ(ierr);} 302 ierr = PetscFree(stat);CHKERRQ(ierr); 303 } 304 305 /* Free the memory for the MPI requests */ 306 ierr = PetscFree2(send_request,recv_request);CHKERRQ(ierr); 307 308 /* Free the memory for DZ_OUT */ 309 if (DZ_OUT) { 310 ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); 311 ierr = PetscFree(DZ_OUT);CHKERRQ(ierr); 312 } 313 314 { 315 PetscMPIInt size; 316 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 317 /* Create the global coarse vectors (rhs and solution). */ 318 ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),1,size,&(pcnn->coarse_b));CHKERRQ(ierr); 319 ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr); 320 /* Create and set the global coarse AIJ matrix. */ 321 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&(pcnn->coarse_mat));CHKERRQ(ierr); 322 ierr = MatSetSizes(pcnn->coarse_mat,1,1,size,size);CHKERRQ(ierr); 323 ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr); 324 ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,NULL);CHKERRQ(ierr); 325 ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,NULL,n_neigh,NULL);CHKERRQ(ierr); 326 ierr = MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 327 ierr = MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 328 ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr); 329 ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 } 332 333 { 334 PetscMPIInt rank; 335 PetscScalar one = 1.0; 336 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 337 /* "Zero out" rows of not-purely-Neumann subdomains */ 338 if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */ 339 ierr = MatZeroRows(pcnn->coarse_mat,0,NULL,one,0,0);CHKERRQ(ierr); 340 } else { /* here it DOES zero the row, since it's not a floating subdomain. */ 341 PetscInt row = (PetscInt) rank; 342 ierr = MatZeroRows(pcnn->coarse_mat,1,&row,one,0,0);CHKERRQ(ierr); 343 } 344 } 345 346 /* Create the coarse linear solver context */ 347 { 348 PC pc_ctx, inner_pc; 349 KSP inner_ksp; 350 351 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcnn->ksp_coarse);CHKERRQ(ierr); 352 ierr = PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse,(PetscObject)pc,2);CHKERRQ(ierr); 353 ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat);CHKERRQ(ierr); 354 ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr); 355 ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr); 356 ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr); 357 ierr = PCRedundantGetKSP(pc_ctx,&inner_ksp);CHKERRQ(ierr); 358 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 359 ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr); 360 ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr); 361 ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr); 362 /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */ 363 ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr); 364 } 365 366 /* Free the memory for mat */ 367 ierr = PetscFree(mat);CHKERRQ(ierr); 368 369 /* for DEBUGGING, save the coarse matrix to a file. */ 370 { 371 PetscBool flg = PETSC_FALSE; 372 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_nn_save_coarse_matrix",&flg,NULL);CHKERRQ(ierr); 373 if (flg) { 374 PetscViewer viewer; 375 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr); 376 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 377 ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr); 378 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 379 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 380 } 381 } 382 383 /* Set the variable pcnn->factor_coarse_rhs. */ 384 pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0; 385 386 /* See historical note 02, at the bottom of this file. */ 387 PetscFunctionReturn(0); 388 } 389 390 /* -------------------------------------------------------------------------- */ 391 /* 392 PCNNApplySchurToChunk - 393 394 Input parameters: 395 . pcnn 396 . n - size of chunk 397 . idx - indices of chunk 398 . chunk - values 399 400 Output parameters: 401 . array_N - result of Schur complement applied to chunk, scattered to big array 402 . vec1_B - result of Schur complement applied to chunk 403 . vec2_B - garbage (used as work space) 404 . vec1_D - garbage (used as work space) 405 . vec2_D - garbage (used as work space) 406 407 */ 408 PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 409 { 410 PetscErrorCode ierr; 411 PetscInt i; 412 PC_IS *pcis = (PC_IS*)(pc->data); 413 414 PetscFunctionBegin; 415 ierr = PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));CHKERRQ(ierr); 416 for (i=0; i<n; i++) array_N[idx[i]] = chunk[i]; 417 ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 418 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 419 ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 /* -------------------------------------------------------------------------- */ 424 /* 425 PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 426 the preconditioner for the Schur complement. 427 428 Input parameter: 429 . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used. 430 431 Output parameters: 432 . z - global vector of interior and interface nodes. The values on the interface are the result of 433 the application of the interface preconditioner to the interface part of r. The values on the 434 interior nodes are garbage. 435 . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 436 . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 437 . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 438 . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 439 . vec1_D - vector of local interior nodes; returns garbage (used as work space) 440 . vec2_D - vector of local interior nodes; returns garbage (used as work space) 441 . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 442 . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 443 444 */ 445 PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,Vec vec2_D, Vec vec1_N, Vec vec2_N) 446 { 447 PetscErrorCode ierr; 448 PC_IS *pcis = (PC_IS*)(pc->data); 449 450 PetscFunctionBegin; 451 /* 452 First balancing step. 453 */ 454 { 455 PetscBool flg = PETSC_FALSE; 456 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_nn_turn_off_first_balancing",&flg,NULL);CHKERRQ(ierr); 457 if (!flg) { 458 ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 459 } else { 460 ierr = VecCopy(r,z);CHKERRQ(ierr); 461 } 462 } 463 464 /* 465 Extract the local interface part of z and scale it by D 466 */ 467 ierr = VecScatterBegin(pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = VecScatterEnd (pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 469 ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr); 470 471 /* Neumann Solver */ 472 ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr); 473 474 /* 475 Second balancing step. 476 */ 477 { 478 PetscBool flg = PETSC_FALSE; 479 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_turn_off_second_balancing",&flg,NULL);CHKERRQ(ierr); 480 if (!flg) { 481 ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 482 } else { 483 ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr); 484 ierr = VecSet(z,0.0);CHKERRQ(ierr); 485 ierr = VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 486 ierr = VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 487 } 488 } 489 PetscFunctionReturn(0); 490 } 491 492 /* -------------------------------------------------------------------------- */ 493 /* 494 PCNNBalancing - Computes z, as given in equations (15) and (16) (if the 495 input argument u is provided), or s, as given in equations 496 (12) and (13), if the input argument u is a null vector. 497 Notice that the input argument u plays the role of u_i in 498 equation (14). The equation numbers refer to [Man93]. 499 500 Input Parameters: 501 . pcnn - NN preconditioner context. 502 . r - MPI vector of all nodes (interior and interface). It's preserved. 503 . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null. 504 505 Output Parameters: 506 . z - MPI vector of interior and interface nodes. Returns s or z (see description above). 507 . vec1_B - Sequential vector of local interface nodes. Workspace. 508 . vec2_B - Sequential vector of local interface nodes. Workspace. 509 . vec3_B - (Optional) sequential vector of local interface nodes. Workspace. 510 . vec1_D - Sequential vector of local interior nodes. Workspace. 511 . vec2_D - Sequential vector of local interior nodes. Workspace. 512 . work_N - Array of all local nodes (interior and interface). Workspace. 513 514 */ 515 PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,Vec vec1_D, Vec vec2_D, PetscScalar *work_N) 516 { 517 PetscErrorCode ierr; 518 PetscInt k; 519 PetscScalar value; 520 PetscScalar *lambda; 521 PC_NN *pcnn = (PC_NN*)(pc->data); 522 PC_IS *pcis = (PC_IS*)(pc->data); 523 524 PetscFunctionBegin; 525 ierr = PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 526 if (u) { 527 if (!vec3_B) vec3_B = u; 528 ierr = VecPointwiseMult(vec1_B,pcis->D,u);CHKERRQ(ierr); 529 ierr = VecSet(z,0.0);CHKERRQ(ierr); 530 ierr = VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 531 ierr = VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 532 ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 533 ierr = VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 534 ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 535 ierr = VecScale(vec3_B,-1.0);CHKERRQ(ierr); 536 ierr = VecCopy(r,z);CHKERRQ(ierr); 537 ierr = VecScatterBegin(pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 538 ierr = VecScatterEnd (pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 539 } else { 540 ierr = VecCopy(r,z);CHKERRQ(ierr); 541 } 542 ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 543 ierr = VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 544 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 545 for (k=0, value=0.0; k<pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; 546 value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */ 547 { 548 PetscMPIInt rank; 549 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 550 ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr); 551 /* 552 Since we are only inserting local values (one value actually) we don't need to do the 553 reduction that tells us there is no data that needs to be moved. Hence we comment out these 554 ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr); 555 ierr = VecAssemblyEnd (pcnn->coarse_b);CHKERRQ(ierr); 556 */ 557 } 558 ierr = KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);CHKERRQ(ierr); 559 if (!u) { ierr = VecScale(pcnn->coarse_x,-1.0);CHKERRQ(ierr); } 560 ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 561 for (k=0; k<pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; 562 ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 563 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 564 ierr = VecSet(z,0.0);CHKERRQ(ierr); 565 ierr = VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 566 ierr = VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 567 if (!u) { 568 ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 569 ierr = VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 570 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 571 ierr = VecCopy(r,z);CHKERRQ(ierr); 572 } 573 ierr = VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 574 ierr = VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 575 ierr = PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 580 581 582 /* ------- E N D O F T H E C O D E ------- */ 583 /* */ 584 /* From now on, "footnotes" (or "historical notes"). */ 585 /* */ 586 /* ------------------------------------------------- */ 587 588 589 590 /* -------------------------------------------------------------------------- 591 Historical note 01 592 -------------------------------------------------------------------------- */ 593 /* 594 We considered the possibility of an alternative D_i that would still 595 provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $). 596 The basic principle was still the pseudo-inverse of the counting 597 function; the difference was that we would not count subdomains 598 that do not contribute to the coarse space (i.e., not pure-Neumann 599 subdomains). 600 601 This turned out to be a bad idea: we would solve trivial Neumann 602 problems in the not pure-Neumann subdomains, since we would be scaling 603 the balanced residual by zero. 604 */ 605 606 607 608 609 /* -------------------------------------------------------------------------- 610 Historical note 02 611 -------------------------------------------------------------------------- */ 612 /* 613 We tried an alternative coarse problem, that would eliminate exactly a 614 constant error. Turned out not to improve the overall convergence. 615 */ 616 617 618