1 2 #include <../src/ksp/pc/impls/is/nn/nn.h> 3 4 /* -------------------------------------------------------------------------- */ 5 /* 6 PCSetUp_NN - Prepares for the use of the NN preconditioner 7 by setting data structures and options. 8 9 Input Parameter: 10 . pc - the preconditioner context 11 12 Application Interface Routine: PCSetUp() 13 14 Notes: 15 The interface routine PCSetUp() is not usually called directly by 16 the user, but instead is called by PCApply() if necessary. 17 */ 18 static PetscErrorCode PCSetUp_NN(PC pc) 19 { 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 if (!pc->setupcalled) { 24 /* Set up all the "iterative substructuring" common block */ 25 ierr = PCISSetUp(pc,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 26 /* Create the coarse matrix. */ 27 ierr = PCNNCreateCoarseMatrix(pc);CHKERRQ(ierr); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 /* -------------------------------------------------------------------------- */ 33 /* 34 PCApply_NN - Applies the NN preconditioner to a vector. 35 36 Input Parameters: 37 . pc - the preconditioner context 38 . r - input vector (global) 39 40 Output Parameter: 41 . z - output vector (global) 42 43 Application Interface Routine: PCApply() 44 */ 45 static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z) 46 { 47 PC_IS *pcis = (PC_IS*)(pc->data); 48 PetscErrorCode ierr; 49 PetscScalar m_one = -1.0; 50 Vec w = pcis->vec1_global; 51 52 PetscFunctionBegin; 53 /* 54 Dirichlet solvers. 55 Solving $ B_I^{(i)}r_I^{(i)} $ at each processor. 56 Storing the local results at vec2_D 57 */ 58 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 59 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 60 ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 61 62 /* 63 Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ . 64 Storing the result in the interface portion of the global vector w. 65 */ 66 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 67 ierr = VecScale(pcis->vec1_B,m_one);CHKERRQ(ierr); 68 ierr = VecCopy(r,w);CHKERRQ(ierr); 69 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 70 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 71 72 /* 73 Apply the interface preconditioner 74 */ 75 ierr = PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D, 76 pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 77 78 /* 79 Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $ 80 The result is stored in vec1_D. 81 */ 82 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 83 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 84 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 85 86 /* 87 Dirichlet solvers. 88 Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks 89 $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $. 90 */ 91 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 92 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 93 ierr = KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 94 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 95 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 96 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 /* -------------------------------------------------------------------------- */ 101 /* 102 PCDestroy_NN - Destroys the private context for the NN preconditioner 103 that was created with PCCreate_NN(). 104 105 Input Parameter: 106 . pc - the preconditioner context 107 108 Application Interface Routine: PCDestroy() 109 */ 110 static PetscErrorCode PCDestroy_NN(PC pc) 111 { 112 PC_NN *pcnn = (PC_NN*)pc->data; 113 PetscErrorCode ierr; 114 115 PetscFunctionBegin; 116 ierr = PCISDestroy(pc);CHKERRQ(ierr); 117 118 ierr = MatDestroy(&pcnn->coarse_mat);CHKERRQ(ierr); 119 ierr = VecDestroy(&pcnn->coarse_x);CHKERRQ(ierr); 120 ierr = VecDestroy(&pcnn->coarse_b);CHKERRQ(ierr); 121 ierr = KSPDestroy(&pcnn->ksp_coarse);CHKERRQ(ierr); 122 if (pcnn->DZ_IN) { 123 ierr = PetscFree(pcnn->DZ_IN[0]);CHKERRQ(ierr); 124 ierr = PetscFree(pcnn->DZ_IN);CHKERRQ(ierr); 125 } 126 127 /* 128 Free the private data structure that was hanging off the PC 129 */ 130 ierr = PetscFree(pc->data);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 /* -------------------------------------------------------------------------- */ 135 /*MC 136 PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs. 137 138 Options Database Keys: 139 + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems 140 (this skips the first coarse grid solve in the preconditioner) 141 . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems 142 (this skips the second coarse grid solve in the preconditioner) 143 . -pc_is_damp_fixed <fact> - 144 . -pc_is_remove_nullspace_fixed - 145 . -pc_is_set_damping_factor_floating <fact> - 146 . -pc_is_not_damp_floating - 147 - -pc_is_not_remove_nullspace_floating - 148 149 Level: intermediate 150 151 Notes: 152 The matrix used with this preconditioner must be of type MATIS 153 154 Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the 155 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 156 on the subdomains; though in our experience using approximate solvers is slower.). 157 158 Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx 159 Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx 160 Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx 161 162 Contributed by Paulo Goldfeld 163 164 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 165 M*/ 166 167 PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc) 168 { 169 PetscErrorCode ierr; 170 PC_NN *pcnn; 171 172 PetscFunctionBegin; 173 /* 174 Creates the private data structure for this preconditioner and 175 attach it to the PC object. 176 */ 177 ierr = PetscNewLog(pc,&pcnn);CHKERRQ(ierr); 178 pc->data = (void*)pcnn; 179 180 ierr = PCISCreate(pc);CHKERRQ(ierr); 181 pcnn->coarse_mat = NULL; 182 pcnn->coarse_x = NULL; 183 pcnn->coarse_b = NULL; 184 pcnn->ksp_coarse = NULL; 185 pcnn->DZ_IN = NULL; 186 187 /* 188 Set the pointers for the functions that are provided above. 189 Now when the user-level routines (such as PCApply(), PCDestroy(), etc.) 190 are called, they will automatically call these functions. Note we 191 choose not to provide a couple of these functions since they are 192 not needed. 193 */ 194 pc->ops->apply = PCApply_NN; 195 pc->ops->applytranspose = NULL; 196 pc->ops->setup = PCSetUp_NN; 197 pc->ops->destroy = PCDestroy_NN; 198 pc->ops->view = NULL; 199 pc->ops->applyrichardson = NULL; 200 pc->ops->applysymmetricleft = NULL; 201 pc->ops->applysymmetricright = NULL; 202 PetscFunctionReturn(0); 203 } 204 205 /* -------------------------------------------------------------------------- */ 206 /* 207 PCNNCreateCoarseMatrix - 208 */ 209 PetscErrorCode PCNNCreateCoarseMatrix(PC pc) 210 { 211 MPI_Request *send_request, *recv_request; 212 PetscErrorCode ierr; 213 PetscInt i, j, k; 214 PetscScalar *mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */ 215 PetscScalar **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */ 216 217 /* aliasing some names */ 218 PC_IS *pcis = (PC_IS*)(pc->data); 219 PC_NN *pcnn = (PC_NN*)pc->data; 220 PetscInt n_neigh = pcis->n_neigh; 221 PetscInt *neigh = pcis->neigh; 222 PetscInt *n_shared = pcis->n_shared; 223 PetscInt **shared = pcis->shared; 224 PetscScalar **DZ_IN; /* Must be initialized after memory allocation. */ 225 226 PetscFunctionBegin; 227 /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */ 228 ierr = PetscMalloc1(n_neigh*n_neigh+1,&mat);CHKERRQ(ierr); 229 230 /* Allocate memory for DZ */ 231 /* Notice that DZ_OUT[0] is allocated some space that is never used. */ 232 /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */ 233 { 234 PetscInt size_of_Z = 0; 235 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);CHKERRQ(ierr); 236 DZ_IN = pcnn->DZ_IN; 237 ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);CHKERRQ(ierr); 238 for (i=0; i<n_neigh; i++) size_of_Z += n_shared[i]; 239 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);CHKERRQ(ierr); 240 ierr = PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);CHKERRQ(ierr); 241 } 242 for (i=1; i<n_neigh; i++) { 243 DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1]; 244 DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1]; 245 } 246 247 /* Set the values of DZ_OUT, in order to send this info to the neighbours */ 248 /* First, set the auxiliary array pcis->work_N. */ 249 ierr = PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 250 for (i=1; i<n_neigh; i++) { 251 for (j=0; j<n_shared[i]; j++) { 252 DZ_OUT[i][j] = pcis->work_N[shared[i][j]]; 253 } 254 } 255 256 /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */ 257 /* Notice that send_request[] and recv_request[] could have one less element. */ 258 /* We make them longer to have request[i] corresponding to neigh[i]. */ 259 { 260 PetscMPIInt tag; 261 ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr); 262 ierr = PetscMalloc2(n_neigh+1,&send_request,n_neigh+1,&recv_request);CHKERRQ(ierr); 263 for (i=1; i<n_neigh; i++) { 264 ierr = MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(send_request[i]));CHKERRMPI(ierr); 265 ierr = MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(recv_request[i]));CHKERRMPI(ierr); 266 } 267 } 268 269 /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */ 270 for (j=0; j<n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]]; 271 272 /* Start computing with local D*Z while communication goes on. */ 273 /* Apply Schur complement. The result is "stored" in vec (more */ 274 /* precisely, vec points to the result, stored in pc_nn->vec1_B) */ 275 /* and also scattered to pcnn->work_N. */ 276 ierr = PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 277 278 /* Compute the first column, while completing the receiving. */ 279 for (i=0; i<n_neigh; i++) { 280 MPI_Status stat; 281 PetscMPIInt ind=0; 282 if (i>0) {ierr = MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat);CHKERRMPI(ierr); ind++;} 283 mat[ind*n_neigh+0] = 0.0; 284 for (k=0; k<n_shared[ind]; k++) mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]]; 285 } 286 287 /* Compute the remaining of the columns */ 288 for (j=1; j<n_neigh; j++) { 289 ierr = PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 290 for (i=0; i<n_neigh; i++) { 291 mat[i*n_neigh+j] = 0.0; 292 for (k=0; k<n_shared[i]; k++) mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]]; 293 } 294 } 295 296 /* Complete the sending. */ 297 if (n_neigh>1) { 298 MPI_Status *stat; 299 ierr = PetscMalloc1(n_neigh-1,&stat);CHKERRQ(ierr); 300 if (n_neigh-1) {ierr = MPI_Waitall(n_neigh-1,&(send_request[1]),stat);CHKERRMPI(ierr);} 301 ierr = PetscFree(stat);CHKERRQ(ierr); 302 } 303 304 /* Free the memory for the MPI requests */ 305 ierr = PetscFree2(send_request,recv_request);CHKERRQ(ierr); 306 307 /* Free the memory for DZ_OUT */ 308 if (DZ_OUT) { 309 ierr = PetscFree(DZ_OUT[0]);CHKERRQ(ierr); 310 ierr = PetscFree(DZ_OUT);CHKERRQ(ierr); 311 } 312 313 { 314 PetscMPIInt size; 315 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 316 /* Create the global coarse vectors (rhs and solution). */ 317 ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),1,size,&(pcnn->coarse_b));CHKERRQ(ierr); 318 ierr = VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));CHKERRQ(ierr); 319 /* Create and set the global coarse AIJ matrix. */ 320 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&(pcnn->coarse_mat));CHKERRQ(ierr); 321 ierr = MatSetSizes(pcnn->coarse_mat,1,1,size,size);CHKERRQ(ierr); 322 ierr = MatSetType(pcnn->coarse_mat,MATAIJ);CHKERRQ(ierr); 323 ierr = MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,NULL);CHKERRQ(ierr); 324 ierr = MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,NULL,n_neigh,NULL);CHKERRQ(ierr); 325 ierr = MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 326 ierr = MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 327 ierr = MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);CHKERRQ(ierr); 328 ierr = MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 329 ierr = MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 } 331 332 { 333 PetscMPIInt rank; 334 PetscScalar one = 1.0; 335 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 336 /* "Zero out" rows of not-purely-Neumann subdomains */ 337 if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */ 338 ierr = MatZeroRows(pcnn->coarse_mat,0,NULL,one,NULL,NULL);CHKERRQ(ierr); 339 } else { /* here it DOES zero the row, since it's not a floating subdomain. */ 340 PetscInt row = (PetscInt) rank; 341 ierr = MatZeroRows(pcnn->coarse_mat,1,&row,one,NULL,NULL);CHKERRQ(ierr); 342 } 343 } 344 345 /* Create the coarse linear solver context */ 346 { 347 PC pc_ctx, inner_pc; 348 KSP inner_ksp; 349 350 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcnn->ksp_coarse);CHKERRQ(ierr); 351 ierr = PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse,(PetscObject)pc,2);CHKERRQ(ierr); 352 ierr = KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat);CHKERRQ(ierr); 353 ierr = KSPGetPC(pcnn->ksp_coarse,&pc_ctx);CHKERRQ(ierr); 354 ierr = PCSetType(pc_ctx,PCREDUNDANT);CHKERRQ(ierr); 355 ierr = KSPSetType(pcnn->ksp_coarse,KSPPREONLY);CHKERRQ(ierr); 356 ierr = PCRedundantGetKSP(pc_ctx,&inner_ksp);CHKERRQ(ierr); 357 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 358 ierr = PCSetType(inner_pc,PCLU);CHKERRQ(ierr); 359 ierr = KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");CHKERRQ(ierr); 360 ierr = KSPSetFromOptions(pcnn->ksp_coarse);CHKERRQ(ierr); 361 /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */ 362 ierr = KSPSetUp(pcnn->ksp_coarse);CHKERRQ(ierr); 363 } 364 365 /* Free the memory for mat */ 366 ierr = PetscFree(mat);CHKERRQ(ierr); 367 368 /* for DEBUGGING, save the coarse matrix to a file. */ 369 { 370 PetscBool flg = PETSC_FALSE; 371 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_nn_save_coarse_matrix",&flg,NULL);CHKERRQ(ierr); 372 if (flg) { 373 PetscViewer viewer; 374 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);CHKERRQ(ierr); 375 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 376 ierr = MatView(pcnn->coarse_mat,viewer);CHKERRQ(ierr); 377 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 378 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 379 } 380 } 381 382 /* Set the variable pcnn->factor_coarse_rhs. */ 383 pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0; 384 385 /* See historical note 02, at the bottom of this file. */ 386 PetscFunctionReturn(0); 387 } 388 389 /* -------------------------------------------------------------------------- */ 390 /* 391 PCNNApplySchurToChunk - 392 393 Input parameters: 394 . pcnn 395 . n - size of chunk 396 . idx - indices of chunk 397 . chunk - values 398 399 Output parameters: 400 . array_N - result of Schur complement applied to chunk, scattered to big array 401 . vec1_B - result of Schur complement applied to chunk 402 . vec2_B - garbage (used as work space) 403 . vec1_D - garbage (used as work space) 404 . vec2_D - garbage (used as work space) 405 406 */ 407 PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 408 { 409 PetscErrorCode ierr; 410 PetscInt i; 411 PC_IS *pcis = (PC_IS*)(pc->data); 412 413 PetscFunctionBegin; 414 ierr = PetscArrayzero(array_N, pcis->n);CHKERRQ(ierr); 415 for (i=0; i<n; i++) array_N[idx[i]] = chunk[i]; 416 ierr = PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 417 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 418 ierr = PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 /* -------------------------------------------------------------------------- */ 423 /* 424 PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 425 the preconditioner for the Schur complement. 426 427 Input parameter: 428 . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used. 429 430 Output parameters: 431 . z - global vector of interior and interface nodes. The values on the interface are the result of 432 the application of the interface preconditioner to the interface part of r. The values on the 433 interior nodes are garbage. 434 . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 435 . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 436 . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 437 . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space) 438 . vec1_D - vector of local interior nodes; returns garbage (used as work space) 439 . vec2_D - vector of local interior nodes; returns garbage (used as work space) 440 . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 441 . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space) 442 443 */ 444 PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,Vec vec2_D, Vec vec1_N, Vec vec2_N) 445 { 446 PetscErrorCode ierr; 447 PC_IS *pcis = (PC_IS*)(pc->data); 448 449 PetscFunctionBegin; 450 /* 451 First balancing step. 452 */ 453 { 454 PetscBool flg = PETSC_FALSE; 455 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_nn_turn_off_first_balancing",&flg,NULL);CHKERRQ(ierr); 456 if (!flg) { 457 ierr = PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 458 } else { 459 ierr = VecCopy(r,z);CHKERRQ(ierr); 460 } 461 } 462 463 /* 464 Extract the local interface part of z and scale it by D 465 */ 466 ierr = VecScatterBegin(pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = VecScatterEnd (pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr); 469 470 /* Neumann Solver */ 471 ierr = PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);CHKERRQ(ierr); 472 473 /* 474 Second balancing step. 475 */ 476 { 477 PetscBool flg = PETSC_FALSE; 478 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_turn_off_second_balancing",&flg,NULL);CHKERRQ(ierr); 479 if (!flg) { 480 ierr = PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);CHKERRQ(ierr); 481 } else { 482 ierr = VecPointwiseMult(vec2_B,pcis->D,vec1_B);CHKERRQ(ierr); 483 ierr = VecSet(z,0.0);CHKERRQ(ierr); 484 ierr = VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 485 ierr = VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 486 } 487 } 488 PetscFunctionReturn(0); 489 } 490 491 /* -------------------------------------------------------------------------- */ 492 /* 493 PCNNBalancing - Computes z, as given in equations (15) and (16) (if the 494 input argument u is provided), or s, as given in equations 495 (12) and (13), if the input argument u is a null vector. 496 Notice that the input argument u plays the role of u_i in 497 equation (14). The equation numbers refer to [Man93]. 498 499 Input Parameters: 500 . pcnn - NN preconditioner context. 501 . r - MPI vector of all nodes (interior and interface). It's preserved. 502 . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null. 503 504 Output Parameters: 505 . z - MPI vector of interior and interface nodes. Returns s or z (see description above). 506 . vec1_B - Sequential vector of local interface nodes. Workspace. 507 . vec2_B - Sequential vector of local interface nodes. Workspace. 508 . vec3_B - (Optional) sequential vector of local interface nodes. Workspace. 509 . vec1_D - Sequential vector of local interior nodes. Workspace. 510 . vec2_D - Sequential vector of local interior nodes. Workspace. 511 . work_N - Array of all local nodes (interior and interface). Workspace. 512 513 */ 514 PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,Vec vec1_D, Vec vec2_D, PetscScalar *work_N) 515 { 516 PetscErrorCode ierr; 517 PetscInt k; 518 PetscScalar value; 519 PetscScalar *lambda; 520 PC_NN *pcnn = (PC_NN*)(pc->data); 521 PC_IS *pcis = (PC_IS*)(pc->data); 522 523 PetscFunctionBegin; 524 ierr = PetscLogEventBegin(PC_ApplyCoarse,pc,0,0,0);CHKERRQ(ierr); 525 if (u) { 526 if (!vec3_B) vec3_B = u; 527 ierr = VecPointwiseMult(vec1_B,pcis->D,u);CHKERRQ(ierr); 528 ierr = VecSet(z,0.0);CHKERRQ(ierr); 529 ierr = VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 530 ierr = VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 531 ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 532 ierr = VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 533 ierr = PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 534 ierr = VecScale(vec3_B,-1.0);CHKERRQ(ierr); 535 ierr = VecCopy(r,z);CHKERRQ(ierr); 536 ierr = VecScatterBegin(pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 537 ierr = VecScatterEnd (pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 538 } else { 539 ierr = VecCopy(r,z);CHKERRQ(ierr); 540 } 541 ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 542 ierr = VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 543 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);CHKERRQ(ierr); 544 for (k=0, value=0.0; k<pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; 545 value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */ 546 { 547 PetscMPIInt rank; 548 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 549 ierr = VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);CHKERRQ(ierr); 550 /* 551 Since we are only inserting local values (one value actually) we don't need to do the 552 reduction that tells us there is no data that needs to be moved. Hence we comment out these 553 ierr = VecAssemblyBegin(pcnn->coarse_b);CHKERRQ(ierr); 554 ierr = VecAssemblyEnd (pcnn->coarse_b);CHKERRQ(ierr); 555 */ 556 } 557 ierr = KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);CHKERRQ(ierr); 558 if (!u) { ierr = VecScale(pcnn->coarse_x,-1.0);CHKERRQ(ierr); } 559 ierr = VecGetArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 560 for (k=0; k<pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; 561 ierr = VecRestoreArray(pcnn->coarse_x,&lambda);CHKERRQ(ierr); 562 ierr = PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);CHKERRQ(ierr); 563 ierr = VecSet(z,0.0);CHKERRQ(ierr); 564 ierr = VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 565 ierr = VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 566 if (!u) { 567 ierr = VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 568 ierr = VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 569 ierr = PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);CHKERRQ(ierr); 570 ierr = VecCopy(r,z);CHKERRQ(ierr); 571 } 572 ierr = VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 573 ierr = VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 574 ierr = PetscLogEventEnd(PC_ApplyCoarse,pc,0,0,0);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 /* ------- E N D O F T H E C O D E ------- */ 579 /* */ 580 /* From now on, "footnotes" (or "historical notes"). */ 581 /* */ 582 /* ------------------------------------------------- */ 583 584 /* -------------------------------------------------------------------------- 585 Historical note 01 586 -------------------------------------------------------------------------- */ 587 /* 588 We considered the possibility of an alternative D_i that would still 589 provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $). 590 The basic principle was still the pseudo-inverse of the counting 591 function; the difference was that we would not count subdomains 592 that do not contribute to the coarse space (i.e., not pure-Neumann 593 subdomains). 594 595 This turned out to be a bad idea: we would solve trivial Neumann 596 problems in the not pure-Neumann subdomains, since we would be scaling 597 the balanced residual by zero. 598 */ 599 600 /* -------------------------------------------------------------------------- 601 Historical note 02 602 -------------------------------------------------------------------------- */ 603 /* 604 We tried an alternative coarse problem, that would eliminate exactly a 605 constant error. Turned out not to improve the overall convergence. 606 */ 607 608