1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 5 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 6 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 7 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 8 code refactoring: 9 - pick up better names for static functions 10 change options structure: 11 - insert BDDC into MG framework? 12 provide other ops? Ask to developers 13 remove all unused printf 14 man pages 15 */ 16 17 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 18 Implementation of BDDC preconditioner based on: 19 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 20 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 21 22 #include "bddc.h" /*I "petscpc.h" I*/ 23 #include <petscblaslapack.h> 24 /* -------------------------------------------------------------------------- */ 25 #undef __FUNCT__ 26 #define __FUNCT__ "PCSetFromOptions_BDDC" 27 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 28 { 29 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 34 /* Verbose debugging of main data structures */ 35 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,NULL);CHKERRQ(ierr); 36 /* Some customization for default primal space */ 37 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,NULL);CHKERRQ(ierr); 38 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,NULL);CHKERRQ(ierr); 39 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,NULL);CHKERRQ(ierr); 40 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,NULL);CHKERRQ(ierr); 41 42 /* Coarse solver context */ 43 static const char * const avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel","CoarseProblemType","PC_BDDC_",0}; /* order of choiches depends on ENUM defined in bddc.h */ 44 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,NULL);CHKERRQ(ierr); 45 46 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 47 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->inexact_prec_type,&pcbddc->inexact_prec_type,NULL);CHKERRQ(ierr); 48 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,NULL);CHKERRQ(ierr); 49 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,NULL);CHKERRQ(ierr); 50 51 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 52 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsInt("-pc_bddc_max_levels","Set maximum number of levels for multilevel","none",pcbddc->max_levels,&pcbddc->max_levels,NULL);CHKERRQ(ierr); 55 ierr = PetscOptionsTail();CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 /* -------------------------------------------------------------------------- */ 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 62 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 63 { 64 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 65 66 PetscFunctionBegin; 67 pcbddc->coarse_problem_type = CPT; 68 PetscFunctionReturn(0); 69 } 70 71 #undef __FUNCT__ 72 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 73 /*@ 74 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 75 76 Not collective 77 78 Input Parameters: 79 + pc - the preconditioning context 80 - CoarseProblemType - pick a better name and explain what this is 81 82 Level: intermediate 83 84 Notes: 85 Not collective but all procs must call with same arguments. 86 87 .seealso: PCBDDC 88 @*/ 89 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 90 { 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 95 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 /* -------------------------------------------------------------------------- */ 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCSetCoarseningRatio_BDDC" 101 static PetscErrorCode PCBDDCSetCoarseningRatio_BDDC(PC pc,PetscInt k) 102 { 103 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 104 105 PetscFunctionBegin; 106 pcbddc->coarsening_ratio=k; 107 PetscFunctionReturn(0); 108 } 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCBDDCSetCoarseningRatio" 112 /*@ 113 PCBDDCSetCoarseningRatio - Set coarsening ratio used in multilevel coarsening 114 115 Logically collective on PC 116 117 Input Parameters: 118 + pc - the preconditioning context 119 - k - coarsening ratio 120 121 Approximatively k subdomains at the finer level will be aggregated into a single subdomain at the coarser level. 122 123 Level: intermediate 124 125 Notes: 126 127 .seealso: PCBDDC 128 @*/ 129 PetscErrorCode PCBDDCSetCoarseningRatio(PC pc,PetscInt k) 130 { 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 135 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseningRatio_C",(PC,PetscInt),(pc,k));CHKERRQ(ierr); 136 PetscFunctionReturn(0); 137 } 138 /* -------------------------------------------------------------------------- */ 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "PCBDDCSetMaxLevels_BDDC" 142 static PetscErrorCode PCBDDCSetMaxLevels_BDDC(PC pc,PetscInt max_levels) 143 { 144 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 145 146 PetscFunctionBegin; 147 pcbddc->max_levels=max_levels; 148 PetscFunctionReturn(0); 149 } 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "PCBDDCSetMaxLevels" 153 /*@ 154 PCBDDCSetMaxLevels - Sets the maximum number of levels within the multilevel approach. 155 156 Logically collective on PC 157 158 Input Parameters: 159 + pc - the preconditioning context 160 - max_levels - the maximum number of levels 161 162 Default value is 1, i.e. coarse problem will be solved inexactly with one application 163 of PCBDDC preconditioner if the multilevel approach is requested. 164 165 Level: intermediate 166 167 Notes: 168 169 .seealso: PCBDDC 170 @*/ 171 PetscErrorCode PCBDDCSetMaxLevels(PC pc,PetscInt max_levels) 172 { 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 177 ierr = PetscTryMethod(pc,"PCBDDCSetMaxLevels_C",(PC,PetscInt),(pc,max_levels));CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 /* -------------------------------------------------------------------------- */ 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "PCBDDCSetNullSpace_BDDC" 184 static PetscErrorCode PCBDDCSetNullSpace_BDDC(PC pc,MatNullSpace NullSpace) 185 { 186 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 187 PetscErrorCode ierr; 188 189 PetscFunctionBegin; 190 ierr = PetscObjectReference((PetscObject)NullSpace);CHKERRQ(ierr); 191 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 192 193 pcbddc->NullSpace = NullSpace; 194 PetscFunctionReturn(0); 195 } 196 197 #undef __FUNCT__ 198 #define __FUNCT__ "PCBDDCSetNullSpace" 199 /*@ 200 PCBDDCSetNullSpace - Set NullSpace of global operator of BDDC preconditioned mat. 201 202 Logically collective on PC and MatNullSpace 203 204 Input Parameters: 205 + pc - the preconditioning context 206 - NullSpace - Null space of the linear operator to be preconditioned. 207 208 Level: intermediate 209 210 Notes: 211 212 .seealso: PCBDDC 213 @*/ 214 PetscErrorCode PCBDDCSetNullSpace(PC pc,MatNullSpace NullSpace) 215 { 216 PetscErrorCode ierr; 217 218 PetscFunctionBegin; 219 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 220 ierr = PetscTryMethod(pc,"PCBDDCSetNullSpace_C",(PC,MatNullSpace),(pc,NullSpace));CHKERRQ(ierr); 221 PetscFunctionReturn(0); 222 } 223 /* -------------------------------------------------------------------------- */ 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 227 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 228 { 229 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 230 PetscErrorCode ierr; 231 232 PetscFunctionBegin; 233 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 234 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 235 236 pcbddc->DirichletBoundaries = DirichletBoundaries; 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 242 /*@ 243 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 244 of Dirichlet boundaries for the global problem. 245 246 Not collective 247 248 Input Parameters: 249 + pc - the preconditioning context 250 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be NULL) 251 252 Level: intermediate 253 254 Notes: 255 256 .seealso: PCBDDC 257 @*/ 258 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 259 { 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 264 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 /* -------------------------------------------------------------------------- */ 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 271 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 272 { 273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 278 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 279 280 pcbddc->NeumannBoundaries = NeumannBoundaries; 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 286 /*@ 287 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 288 of Neumann boundaries for the global problem. 289 290 Not collective 291 292 Input Parameters: 293 + pc - the preconditioning context 294 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be NULL) 295 296 Level: intermediate 297 298 Notes: 299 300 .seealso: PCBDDC 301 @*/ 302 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 303 { 304 PetscErrorCode ierr; 305 306 PetscFunctionBegin; 307 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 308 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 /* -------------------------------------------------------------------------- */ 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 315 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 316 { 317 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 318 319 PetscFunctionBegin; 320 *DirichletBoundaries = pcbddc->DirichletBoundaries; 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 326 /*@ 327 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 328 of Dirichlet boundaries for the global problem. 329 330 Not collective 331 332 Input Parameters: 333 + pc - the preconditioning context 334 335 Output Parameters: 336 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 337 338 Level: intermediate 339 340 Notes: 341 342 .seealso: PCBDDC 343 @*/ 344 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 345 { 346 PetscErrorCode ierr; 347 348 PetscFunctionBegin; 349 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 350 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 /* -------------------------------------------------------------------------- */ 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 357 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 358 { 359 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 360 361 PetscFunctionBegin; 362 *NeumannBoundaries = pcbddc->NeumannBoundaries; 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 368 /*@ 369 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 370 of Neumann boundaries for the global problem. 371 372 Not collective 373 374 Input Parameters: 375 + pc - the preconditioning context 376 377 Output Parameters: 378 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 379 380 Level: intermediate 381 382 Notes: 383 384 .seealso: PCBDDC 385 @*/ 386 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 387 { 388 PetscErrorCode ierr; 389 390 PetscFunctionBegin; 391 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 392 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 /* -------------------------------------------------------------------------- */ 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 399 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PCBDDCGraph mat_graph=pcbddc->mat_graph; 403 PetscErrorCode ierr; 404 405 PetscFunctionBegin; 406 mat_graph->nvtxs=nvtxs; 407 408 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 409 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 410 if (copymode == PETSC_COPY_VALUES) { 411 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 412 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 413 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 414 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 415 } else if (copymode == PETSC_OWN_POINTER) { 416 mat_graph->xadj = (PetscInt*)xadj; 417 mat_graph->adjncy = (PetscInt*)adjncy; 418 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d\n",copymode); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 424 /*@ 425 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 426 427 Not collective 428 429 Input Parameters: 430 + pc - the preconditioning context 431 - nvtxs - number of local vertices of the graph 432 - xadj, adjncy - the CSR graph 433 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 434 in the latter case, memory must be obtained with PetscMalloc. 435 436 Level: intermediate 437 438 Notes: 439 440 .seealso: PCBDDC 441 @*/ 442 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode) 443 { 444 PetscInt nrows,ncols; 445 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 446 PetscErrorCode ierr; 447 448 PetscFunctionBegin; 449 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 450 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 451 if (nvtxs != nrows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,nrows); 452 else { 453 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,const PetscInt[],const PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 454 } 455 PetscFunctionReturn(0); 456 } 457 /* -------------------------------------------------------------------------- */ 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 461 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 462 { 463 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 464 PetscInt i; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 /* Destroy ISes if they were already set */ 469 for (i=0; i<pcbddc->n_ISForDofs; i++) { 470 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 471 } 472 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 473 /* allocate space then set */ 474 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 475 for (i=0; i<n_is; i++) { 476 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 477 478 pcbddc->ISForDofs[i]=ISForDofs[i]; 479 } 480 pcbddc->n_ISForDofs=n_is; 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCBDDCSetDofsSplitting" 486 /*@ 487 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 488 489 Not collective 490 491 Input Parameters: 492 + pc - the preconditioning context 493 - n - number of index sets defining the fields 494 - IS[] - array of IS describing the fields 495 496 Level: intermediate 497 498 Notes: 499 500 .seealso: PCBDDC 501 @*/ 502 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 503 { 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 508 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 /* -------------------------------------------------------------------------- */ 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCPreSolve_BDDC" 514 /* -------------------------------------------------------------------------- */ 515 /* 516 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 517 guess if a transformation of basis approach has been selected. 518 519 Input Parameter: 520 + pc - the preconditioner contex 521 522 Application Interface Routine: PCPreSolve() 523 524 Notes: 525 The interface routine PCPreSolve() is not usually called directly by 526 the user, but instead is called by KSPSolve(). 527 */ 528 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 529 { 530 PetscErrorCode ierr; 531 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 532 PC_IS *pcis = (PC_IS*)(pc->data); 533 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 534 Mat temp_mat; 535 IS dirIS; 536 PetscInt dirsize,i,*is_indices; 537 PetscScalar *array_x,*array_diagonal; 538 Vec used_vec; 539 PetscBool guess_nonzero; 540 541 PetscFunctionBegin; 542 if (x) { 543 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 544 used_vec = x; 545 } else { 546 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 547 used_vec = pcbddc->temp_solution; 548 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 549 } 550 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 551 if (ksp) { 552 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 553 if (!guess_nonzero) { 554 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 555 } 556 } 557 /* store the original rhs */ 558 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 559 560 /* Take into account zeroed rows -> change rhs and store solution removed */ 561 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 562 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 563 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 564 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 565 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 566 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 567 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 568 if (dirIS) { 569 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 570 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 571 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 572 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 573 for (i=0; i<dirsize; i++) array_x[is_indices[i]] = array_diagonal[is_indices[i]]; 574 575 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 576 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 577 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 578 } 579 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 580 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 581 582 /* remove the computed solution from the rhs */ 583 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 584 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 585 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 586 587 /* store partially computed solution and set initial guess */ 588 if (x) { 589 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 590 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 591 if (pcbddc->use_exact_dirichlet) { 592 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 593 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 594 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 595 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 596 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 597 if (ksp) { 598 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 599 } 600 } 601 } 602 603 /* rhs change of basis */ 604 if (pcbddc->usechangeofbasis) { 605 /* swap pointers for local matrices */ 606 temp_mat = matis->A; 607 matis->A = pcbddc->local_mat; 608 pcbddc->local_mat = temp_mat; 609 /* Get local rhs and apply transformation of basis */ 610 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 611 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 612 /* from original basis to modified basis */ 613 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 614 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 615 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 616 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 617 if (ksp && pcbddc->NullSpace) { 618 ierr = MatNullSpaceRemove(pcbddc->NullSpace,used_vec,NULL);CHKERRQ(ierr); 619 ierr = MatNullSpaceRemove(pcbddc->NullSpace,rhs,NULL);CHKERRQ(ierr); 620 } 621 } 622 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 /* -------------------------------------------------------------------------- */ 626 #undef __FUNCT__ 627 #define __FUNCT__ "PCPostSolve_BDDC" 628 /* -------------------------------------------------------------------------- */ 629 /* 630 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 631 approach has been selected. Also, restores rhs to its original state. 632 633 Input Parameter: 634 + pc - the preconditioner contex 635 636 Application Interface Routine: PCPostSolve() 637 638 Notes: 639 The interface routine PCPostSolve() is not usually called directly by 640 the user, but instead is called by KSPSolve(). 641 */ 642 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 643 { 644 PetscErrorCode ierr; 645 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 646 PC_IS *pcis = (PC_IS*)(pc->data); 647 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 648 Mat temp_mat; 649 650 PetscFunctionBegin; 651 if (pcbddc->usechangeofbasis) { 652 /* swap pointers for local matrices */ 653 temp_mat = matis->A; 654 matis->A = pcbddc->local_mat; 655 pcbddc->local_mat = temp_mat; 656 /* restore rhs to its original state */ 657 if (rhs) { 658 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 659 } 660 /* Get Local boundary and apply transformation of basis to solution vector */ 661 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 663 /* from modified basis to original basis */ 664 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 665 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 666 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 667 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 668 } 669 /* add solution removed in presolve */ 670 if (x) { 671 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 672 } 673 PetscFunctionReturn(0); 674 } 675 /* -------------------------------------------------------------------------- */ 676 #undef __FUNCT__ 677 #define __FUNCT__ "PCSetUp_BDDC" 678 /* -------------------------------------------------------------------------- */ 679 /* 680 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 681 by setting data structures and options. 682 683 Input Parameter: 684 + pc - the preconditioner context 685 686 Application Interface Routine: PCSetUp() 687 688 Notes: 689 The interface routine PCSetUp() is not usually called directly by 690 the user, but instead is called by PCApply() if necessary. 691 */ 692 PetscErrorCode PCSetUp_BDDC(PC pc) 693 { 694 PetscErrorCode ierr; 695 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 696 697 PetscFunctionBegin; 698 if (!pc->setupcalled) { 699 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 700 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 701 Also, we decide to directly build the (same) Dirichlet problem */ 702 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 703 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 704 /* Set up all the "iterative substructuring" common block */ 705 706 ierr = PCISSetUp(pc);CHKERRQ(ierr); 707 /* Get stdout for dbg */ 708 if (pcbddc->dbg_flag) { 709 ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)pc),&pcbddc->dbg_viewer);CHKERRQ(ierr); 710 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 711 } 712 /* Analyze local interface */ 713 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 714 /* Set up local constraint matrix */ 715 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 716 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 717 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 718 } 719 PetscFunctionReturn(0); 720 } 721 722 /* -------------------------------------------------------------------------- */ 723 /* 724 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 725 726 Input Parameters: 727 . pc - the preconditioner context 728 . r - input vector (global) 729 730 Output Parameter: 731 . z - output vector (global) 732 733 Application Interface Routine: PCApply() 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "PCApply_BDDC" 737 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 738 { 739 PC_IS *pcis = (PC_IS*)(pc->data); 740 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 741 PetscErrorCode ierr; 742 const PetscScalar one = 1.0; 743 const PetscScalar m_one = -1.0; 744 const PetscScalar zero = 0.0; 745 746 /* This code is similar to that provided in nn.c for PCNN 747 NN interface preconditioner changed to BDDC 748 Added support for M_3 preconditioner in the reference article (code is active if pcbddc->inexact_prec_type = PETSC_TRUE) */ 749 750 PetscFunctionBegin; 751 if (!pcbddc->use_exact_dirichlet) { 752 /* First Dirichlet solve */ 753 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 755 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 756 /* 757 Assembling right hand side for BDDC operator 758 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 759 - the interface part of the global vector z 760 */ 761 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 762 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 763 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 764 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 765 ierr = VecCopy(r,z);CHKERRQ(ierr); 766 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 767 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 768 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 769 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 } else { 771 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 772 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 773 ierr = VecSet(pcis->vec1_D,zero);CHKERRQ(ierr); 774 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 775 } 776 777 /* Apply partition of unity */ 778 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 779 780 /* Apply interface preconditioner 781 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 782 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 783 784 /* Apply partition of unity and sum boundary values */ 785 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 786 ierr = VecSet(z,zero);CHKERRQ(ierr); 787 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 788 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 789 790 /* Second Dirichlet solve and assembling of output */ 791 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 794 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 795 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 796 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 797 if (pcbddc->inexact_prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 798 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 799 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 800 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 803 } 804 /* -------------------------------------------------------------------------- */ 805 #undef __FUNCT__ 806 #define __FUNCT__ "PCDestroy_BDDC" 807 PetscErrorCode PCDestroy_BDDC(PC pc) 808 { 809 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 810 PetscInt i; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 /* free data created by PCIS */ 815 ierr = PCISDestroy(pc);CHKERRQ(ierr); 816 /* free BDDC data */ 817 ierr = MatNullSpaceDestroy(&pcbddc->CoarseNullSpace);CHKERRQ(ierr); 818 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 819 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 820 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 821 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 822 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 823 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 824 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 825 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 826 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 827 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 828 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 829 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 830 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 831 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 832 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 833 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 834 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 835 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 836 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 837 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 838 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 839 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 840 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 841 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 842 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 843 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 844 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 845 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 846 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 847 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 848 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 849 for (i=0; i<pcbddc->n_ISForDofs; i++) { 850 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 851 } 852 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 853 for (i=0; i<pcbddc->n_ISForFaces; i++) { 854 ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); 855 } 856 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 857 for (i=0; i<pcbddc->n_ISForEdges; i++) { 858 ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 861 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 862 /* Free graph structure */ 863 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 864 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 865 if (pcbddc->mat_graph->nvtxs) { 866 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 867 } 868 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 869 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 870 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 871 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 872 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 873 /* remove functions */ 874 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseningRatio_C",NULL);CHKERRQ(ierr); 875 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetMaxLevels_C",NULL);CHKERRQ(ierr); 876 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNullSpace_C",NULL);CHKERRQ(ierr); 877 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C",NULL);CHKERRQ(ierr); 878 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C",NULL);CHKERRQ(ierr); 879 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C",NULL);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C",NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseProblemType_C",NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDofsSplitting_C",NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C",NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPreSolve_C",NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPostSolve_C",NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C",NULL);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C",NULL);CHKERRQ(ierr); 888 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C",NULL);CHKERRQ(ierr); 889 /* Free the private data structure that was hanging off the PC */ 890 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 /* -------------------------------------------------------------------------- */ 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 897 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 898 { 899 FETIDPMat_ctx *mat_ctx; 900 PC_IS * pcis; 901 PC_BDDC * pcbddc; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 906 pcis = (PC_IS*)mat_ctx->pc->data; 907 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 908 909 /* change of basis for physical rhs if needed 910 It also changes the rhs in case of dirichlet boundaries */ 911 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,NULL,standard_rhs,NULL); 912 /* store vectors for computation of fetidp final solution */ 913 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 /* scale rhs since it should be unassembled */ 918 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 919 if (!pcbddc->inexact_prec_type) { 920 /* compute partially subassembled Schur complement right-hand side */ 921 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 922 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 923 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 924 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 925 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 926 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 927 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 929 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 930 } 931 /* BDDC rhs */ 932 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 933 if (pcbddc->inexact_prec_type) { 934 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 935 } 936 /* apply BDDC */ 937 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 938 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 939 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 940 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 941 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 /* restore original rhs */ 944 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 950 /*@ 951 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 952 953 Collective 954 955 Input Parameters: 956 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 957 + standard_rhs - the rhs of your linear system 958 959 Output Parameters: 960 + fetidp_flux_rhs - the rhs of the FETIDP linear system 961 962 Level: developer 963 964 Notes: 965 966 .seealso: PCBDDC 967 @*/ 968 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 969 { 970 FETIDPMat_ctx *mat_ctx; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 975 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 /* -------------------------------------------------------------------------- */ 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 982 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 983 { 984 FETIDPMat_ctx *mat_ctx; 985 PC_IS *pcis; 986 PC_BDDC *pcbddc; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 991 pcis = (PC_IS*)mat_ctx->pc->data; 992 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 993 994 /* apply B_delta^T */ 995 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 996 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 998 /* compute rhs for BDDC application */ 999 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 1000 if (pcbddc->inexact_prec_type) { 1001 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 1002 } 1003 /* apply BDDC */ 1004 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1005 /* put values into standard global vector */ 1006 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1007 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 if (!pcbddc->inexact_prec_type) { 1009 /* compute values into the interior if solved for the partially subassembled Schur complement */ 1010 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 1011 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 1012 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 1013 } 1014 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1015 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 /* final change of basis if needed 1017 Is also sums the dirichlet part removed during RHS assembling */ 1018 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,NULL,NULL,standard_sol); 1019 PetscFunctionReturn(0); 1020 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 1025 /*@ 1026 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 1027 1028 Collective 1029 1030 Input Parameters: 1031 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 1032 + fetidp_flux_sol - the solution of the FETIDP linear system 1033 1034 Output Parameters: 1035 + standard_sol - the solution on the global domain 1036 1037 Level: developer 1038 1039 Notes: 1040 1041 .seealso: PCBDDC 1042 @*/ 1043 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 1044 { 1045 FETIDPMat_ctx *mat_ctx; 1046 PetscErrorCode ierr; 1047 1048 PetscFunctionBegin; 1049 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 1050 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 /* -------------------------------------------------------------------------- */ 1054 1055 extern PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 1056 extern PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 1057 extern PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 1058 extern PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 1061 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 1062 { 1063 FETIDPMat_ctx *fetidpmat_ctx; 1064 Mat newmat; 1065 FETIDPPC_ctx *fetidppc_ctx; 1066 PC newpc; 1067 MPI_Comm comm; 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 1072 /* FETIDP linear matrix */ 1073 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 1074 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 1075 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 1076 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 1077 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 1078 ierr = MatSetUp(newmat);CHKERRQ(ierr); 1079 /* FETIDP preconditioner */ 1080 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 1081 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 1082 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 1083 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 1084 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 1085 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 1086 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 1087 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1088 ierr = PCSetUp(newpc);CHKERRQ(ierr); 1089 1090 /* return pointers for objects created */ 1091 *fetidp_mat = newmat; 1092 *fetidp_pc = newpc; 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 1098 /*@ 1099 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 1100 1101 Collective 1102 1103 Input Parameters: 1104 + pc - the BDDC preconditioning context (setup must be already called) 1105 1106 Level: developer 1107 1108 Notes: 1109 1110 .seealso: PCBDDC 1111 @*/ 1112 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 1113 { 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1118 if (pc->setupcalled) { 1119 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 1120 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC() first \n"); 1121 PetscFunctionReturn(0); 1122 } 1123 /* -------------------------------------------------------------------------- */ 1124 /*MC 1125 PCBDDC - Balancing Domain Decomposition by Constraints. 1126 1127 Options Database Keys: 1128 . -pcbddc ??? - 1129 1130 Level: intermediate 1131 1132 Notes: The matrix used with this preconditioner must be of type MATIS 1133 1134 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 1135 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 1136 on the subdomains). 1137 1138 Options for the coarse grid preconditioner can be set with - 1139 Options for the Dirichlet subproblem can be set with - 1140 Options for the Neumann subproblem can be set with - 1141 1142 Contributed by Stefano Zampini 1143 1144 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1145 M*/ 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "PCCreate_BDDC" 1149 PETSC_EXTERN PetscErrorCode PCCreate_BDDC(PC pc) 1150 { 1151 PetscErrorCode ierr; 1152 PC_BDDC *pcbddc; 1153 PCBDDCGraph mat_graph; 1154 1155 PetscFunctionBegin; 1156 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1157 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1158 pc->data = (void*)pcbddc; 1159 1160 /* create PCIS data structure */ 1161 ierr = PCISCreate(pc);CHKERRQ(ierr); 1162 1163 /* BDDC specific */ 1164 pcbddc->CoarseNullSpace = 0; 1165 pcbddc->NullSpace = 0; 1166 pcbddc->temp_solution = 0; 1167 pcbddc->original_rhs = 0; 1168 pcbddc->local_mat = 0; 1169 pcbddc->ChangeOfBasisMatrix = 0; 1170 pcbddc->usechangeofbasis = PETSC_TRUE; 1171 pcbddc->usechangeonfaces = PETSC_FALSE; 1172 pcbddc->coarse_vec = 0; 1173 pcbddc->coarse_rhs = 0; 1174 pcbddc->coarse_ksp = 0; 1175 pcbddc->coarse_phi_B = 0; 1176 pcbddc->coarse_phi_D = 0; 1177 pcbddc->vec1_P = 0; 1178 pcbddc->vec1_R = 0; 1179 pcbddc->vec2_R = 0; 1180 pcbddc->local_auxmat1 = 0; 1181 pcbddc->local_auxmat2 = 0; 1182 pcbddc->R_to_B = 0; 1183 pcbddc->R_to_D = 0; 1184 pcbddc->ksp_D = 0; 1185 pcbddc->ksp_R = 0; 1186 pcbddc->local_primal_indices = 0; 1187 pcbddc->inexact_prec_type = PETSC_FALSE; 1188 pcbddc->NeumannBoundaries = 0; 1189 pcbddc->ISForDofs = 0; 1190 pcbddc->ISForVertices = 0; 1191 pcbddc->n_ISForFaces = 0; 1192 pcbddc->n_ISForEdges = 0; 1193 pcbddc->ConstraintMatrix = 0; 1194 pcbddc->use_nnsp_true = PETSC_FALSE; 1195 pcbddc->local_primal_sizes = 0; 1196 pcbddc->local_primal_displacements = 0; 1197 pcbddc->replicated_local_primal_indices = 0; 1198 pcbddc->replicated_local_primal_values = 0; 1199 pcbddc->coarse_loc_to_glob = 0; 1200 pcbddc->dbg_flag = PETSC_FALSE; 1201 pcbddc->coarsening_ratio = 8; 1202 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1203 pcbddc->current_level = 0; 1204 pcbddc->max_levels = 1; 1205 1206 /* allocate and initialize needed graph structure */ 1207 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1208 pcbddc->mat_graph->xadj = 0; 1209 pcbddc->mat_graph->adjncy = 0; 1210 1211 /* function pointers */ 1212 pc->ops->apply = PCApply_BDDC; 1213 pc->ops->applytranspose = 0; 1214 pc->ops->setup = PCSetUp_BDDC; 1215 pc->ops->destroy = PCDestroy_BDDC; 1216 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1217 pc->ops->view = 0; 1218 pc->ops->applyrichardson = 0; 1219 pc->ops->applysymmetricleft = 0; 1220 pc->ops->applysymmetricright = 0; 1221 pc->ops->presolve = PCPreSolve_BDDC; 1222 pc->ops->postsolve = PCPostSolve_BDDC; 1223 1224 /* composing function */ 1225 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseningRatio_C",PCBDDCSetCoarseningRatio_BDDC);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetMaxLevels_C",PCBDDCSetMaxLevels_BDDC);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNullSpace_C",PCBDDCSetNullSpace_BDDC);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C",PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C",PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C",PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C",PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetCoarseProblemType_C",PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetDofsSplitting_C",PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C",PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPreSolve_C",PCPreSolve_BDDC);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCPostSolve_C",PCPostSolve_BDDC);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C",PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C",PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C",PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 /* -------------------------------------------------------------------------- */ 1244 /* All static functions from now on */ 1245 /* -------------------------------------------------------------------------- */ 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "PCBDDCApplyNullSpaceCorrectionPC" 1248 static PetscErrorCode PCBDDCApplyNullSpaceCorrectionPC(PC pc,Vec x,Vec y) 1249 { 1250 NullSpaceCorrection_ctx *pc_ctx; 1251 PetscErrorCode ierr; 1252 1253 PetscFunctionBegin; 1254 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1255 /* E */ 1256 ierr = MatMultTranspose(pc_ctx->Lbasis_mat,x,pc_ctx->work_small_2);CHKERRQ(ierr); 1257 ierr = MatMultAdd(pc_ctx->Kbasis_mat,pc_ctx->work_small_2,x,pc_ctx->work_full_1);CHKERRQ(ierr); 1258 /* P^-1 */ 1259 ierr = PCApply(pc_ctx->local_pc,pc_ctx->work_full_1,pc_ctx->work_full_2);CHKERRQ(ierr); 1260 /* E^T */ 1261 ierr = MatMultTranspose(pc_ctx->Kbasis_mat,pc_ctx->work_full_2,pc_ctx->work_small_1);CHKERRQ(ierr); 1262 ierr = VecScale(pc_ctx->work_small_1,-1.0);CHKERRQ(ierr); 1263 ierr = MatMultAdd(pc_ctx->Lbasis_mat,pc_ctx->work_small_1,pc_ctx->work_full_2,pc_ctx->work_full_1);CHKERRQ(ierr); 1264 /* Sum contributions */ 1265 ierr = MatMultAdd(pc_ctx->basis_mat,pc_ctx->work_small_2,pc_ctx->work_full_1,y);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "PCBDDCDestroyNullSpaceCorrectionPC" 1271 static PetscErrorCode PCBDDCDestroyNullSpaceCorrectionPC(PC pc) 1272 { 1273 NullSpaceCorrection_ctx *pc_ctx; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1278 ierr = VecDestroy(&pc_ctx->work_small_1);CHKERRQ(ierr); 1279 ierr = VecDestroy(&pc_ctx->work_small_2);CHKERRQ(ierr); 1280 ierr = VecDestroy(&pc_ctx->work_full_1);CHKERRQ(ierr); 1281 ierr = VecDestroy(&pc_ctx->work_full_2);CHKERRQ(ierr); 1282 ierr = MatDestroy(&pc_ctx->basis_mat);CHKERRQ(ierr); 1283 ierr = MatDestroy(&pc_ctx->Lbasis_mat);CHKERRQ(ierr); 1284 ierr = MatDestroy(&pc_ctx->Kbasis_mat);CHKERRQ(ierr); 1285 ierr = PCDestroy(&pc_ctx->local_pc);CHKERRQ(ierr); 1286 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "PCBDDCAdaptLocalProblem" 1292 static PetscErrorCode PCBDDCAdaptLocalProblem(PC pc,IS local_dofs) 1293 { 1294 extern PetscErrorCode PCBDDCApplyNullSpaceCorrectionPC(PC,Vec,Vec); 1295 extern PetscErrorCode PCBDDCDestroyNullSpaceCorrectionPC(PC); 1296 1297 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1298 PC_IS *pcis = (PC_IS*)pc->data; 1299 Mat_IS * matis = (Mat_IS*)pc->pmat->data; 1300 KSP *local_ksp; 1301 PC newpc; 1302 NullSpaceCorrection_ctx *shell_ctx; 1303 Mat local_mat,local_pmat,small_mat,inv_small_mat; 1304 MatStructure local_mat_struct; 1305 Vec work1,work2,work3; 1306 const Vec *nullvecs; 1307 VecScatter scatter_ctx; 1308 IS is_aux; 1309 MatFactorInfo matinfo; 1310 PetscScalar *basis_mat,*Kbasis_mat,*array,*array_mat; 1311 PetscScalar one = 1.0,zero = 0.0, m_one = -1.0; 1312 PetscInt basis_dofs,basis_size,nnsp_size,i,k,n_I,n_R; 1313 PetscBool nnsp_has_cnst; 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 /* Infer the local solver */ 1318 ierr = ISGetSize(local_dofs,&basis_dofs);CHKERRQ(ierr); 1319 ierr = VecGetSize(pcis->vec1_D,&n_I);CHKERRQ(ierr); 1320 ierr = VecGetSize(pcbddc->vec1_R,&n_R);CHKERRQ(ierr); 1321 if (basis_dofs == n_I) { 1322 /* Dirichlet solver */ 1323 local_ksp = &pcbddc->ksp_D; 1324 } else if (basis_dofs == n_R) { 1325 /* Neumann solver */ 1326 local_ksp = &pcbddc->ksp_R; 1327 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in %s: unknown local IS size %d. n_I=%d, n_R=%d)\n",basis_dofs,n_I,n_R); 1328 ierr = KSPGetOperators(*local_ksp,&local_mat,&local_pmat,&local_mat_struct);CHKERRQ(ierr); 1329 1330 /* Get null space vecs */ 1331 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nnsp_has_cnst,&nnsp_size,&nullvecs);CHKERRQ(ierr); 1332 basis_size = nnsp_size; 1333 if (nnsp_has_cnst) basis_size++; 1334 1335 /* Create shell ctx */ 1336 ierr = PetscMalloc(sizeof(*shell_ctx),&shell_ctx);CHKERRQ(ierr); 1337 1338 /* Create work vectors in shell context */ 1339 ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_small_1);CHKERRQ(ierr); 1340 ierr = VecSetSizes(shell_ctx->work_small_1,basis_size,basis_size);CHKERRQ(ierr); 1341 ierr = VecSetType(shell_ctx->work_small_1,VECSEQ);CHKERRQ(ierr); 1342 ierr = VecDuplicate(shell_ctx->work_small_1,&shell_ctx->work_small_2);CHKERRQ(ierr); 1343 ierr = VecCreate(PETSC_COMM_SELF,&shell_ctx->work_full_1);CHKERRQ(ierr); 1344 ierr = VecSetSizes(shell_ctx->work_full_1,basis_dofs,basis_dofs);CHKERRQ(ierr); 1345 ierr = VecSetType(shell_ctx->work_full_1,VECSEQ);CHKERRQ(ierr); 1346 ierr = VecDuplicate(shell_ctx->work_full_1,&shell_ctx->work_full_2);CHKERRQ(ierr); 1347 1348 /* Allocate workspace */ 1349 ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->basis_mat);CHKERRQ(ierr); 1350 ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_dofs,basis_size,NULL,&shell_ctx->Kbasis_mat);CHKERRQ(ierr); 1351 ierr = MatDenseGetArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); 1352 ierr = MatDenseGetArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); 1353 1354 /* Restrict local null space on selected dofs (Dirichlet or Neumann) 1355 and compute matrices N and K*N */ 1356 ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); 1357 ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); 1358 ierr = VecScatterCreate(pcis->vec1_N,local_dofs,work1,(IS)0,&scatter_ctx);CHKERRQ(ierr); 1359 k = 0; 1360 for (; k<nnsp_size; k++) { 1361 ierr = VecScatterBegin(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1362 ierr = VecScatterEnd(matis->ctx,nullvecs[k],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1363 ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); 1364 ierr = VecScatterBegin(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1365 ierr = VecScatterEnd(scatter_ctx,pcis->vec1_N,work1,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1366 ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); 1367 ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); 1368 ierr = VecResetArray(work1);CHKERRQ(ierr); 1369 ierr = VecResetArray(work2);CHKERRQ(ierr); 1370 } 1371 if (nnsp_has_cnst) { 1372 ierr = VecPlaceArray(work1,(const PetscScalar*)&basis_mat[k*basis_dofs]);CHKERRQ(ierr); 1373 ierr = VecSet(work1,one);CHKERRQ(ierr); 1374 ierr = VecPlaceArray(work2,(const PetscScalar*)&Kbasis_mat[k*basis_dofs]);CHKERRQ(ierr); 1375 ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); 1376 ierr = VecResetArray(work1);CHKERRQ(ierr); 1377 ierr = VecResetArray(work2);CHKERRQ(ierr); 1378 } 1379 ierr = VecDestroy(&work1);CHKERRQ(ierr); 1380 ierr = VecDestroy(&work2);CHKERRQ(ierr); 1381 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 1382 ierr = MatDenseRestoreArray(shell_ctx->basis_mat,&basis_mat);CHKERRQ(ierr); 1383 ierr = MatDenseRestoreArray(shell_ctx->Kbasis_mat,&Kbasis_mat);CHKERRQ(ierr); 1384 1385 /* Assemble another Mat object in shell context */ 1386 ierr = MatTransposeMatMult(shell_ctx->basis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&small_mat);CHKERRQ(ierr); 1387 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1388 ierr = ISCreateStride(PETSC_COMM_SELF,basis_size,0,1,&is_aux);CHKERRQ(ierr); 1389 ierr = MatLUFactor(small_mat,is_aux,is_aux,&matinfo);CHKERRQ(ierr); 1390 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1391 ierr = PetscMalloc(basis_size*basis_size*sizeof(PetscScalar),&array_mat);CHKERRQ(ierr); 1392 for (k=0; k<basis_size; k++) { 1393 ierr = VecSet(shell_ctx->work_small_1,zero);CHKERRQ(ierr); 1394 ierr = VecSetValue(shell_ctx->work_small_1,k,one,INSERT_VALUES);CHKERRQ(ierr); 1395 ierr = VecAssemblyBegin(shell_ctx->work_small_1);CHKERRQ(ierr); 1396 ierr = VecAssemblyEnd(shell_ctx->work_small_1);CHKERRQ(ierr); 1397 ierr = MatSolve(small_mat,shell_ctx->work_small_1,shell_ctx->work_small_2);CHKERRQ(ierr); 1398 ierr = VecGetArray(shell_ctx->work_small_2,&array);CHKERRQ(ierr); 1399 for (i=0; i<basis_size; i++) array_mat[i*basis_size+k]=array[i]; 1400 ierr = VecRestoreArray(shell_ctx->work_small_2,&array);CHKERRQ(ierr); 1401 } 1402 ierr = MatCreateSeqDense(PETSC_COMM_SELF,basis_size,basis_size,array_mat,&inv_small_mat);CHKERRQ(ierr); 1403 ierr = MatMatMult(shell_ctx->basis_mat,inv_small_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&shell_ctx->Lbasis_mat);CHKERRQ(ierr); 1404 ierr = PetscFree(array_mat);CHKERRQ(ierr); 1405 ierr = MatDestroy(&inv_small_mat);CHKERRQ(ierr); 1406 ierr = MatDestroy(&small_mat);CHKERRQ(ierr); 1407 ierr = MatScale(shell_ctx->Kbasis_mat,m_one);CHKERRQ(ierr); 1408 1409 /* Rebuild local PC */ 1410 ierr = KSPGetPC(*local_ksp,&shell_ctx->local_pc);CHKERRQ(ierr); 1411 ierr = PetscObjectReference((PetscObject)shell_ctx->local_pc);CHKERRQ(ierr); 1412 ierr = PCCreate(PETSC_COMM_SELF,&newpc);CHKERRQ(ierr); 1413 ierr = PCSetOperators(newpc,local_mat,local_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1414 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 1415 ierr = PCShellSetContext(newpc,shell_ctx);CHKERRQ(ierr); 1416 ierr = PCShellSetApply(newpc,PCBDDCApplyNullSpaceCorrectionPC);CHKERRQ(ierr); 1417 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyNullSpaceCorrectionPC);CHKERRQ(ierr); 1418 ierr = PCSetUp(newpc);CHKERRQ(ierr); 1419 ierr = KSPSetPC(*local_ksp,newpc);CHKERRQ(ierr); 1420 ierr = PCDestroy(&newpc);CHKERRQ(ierr); 1421 ierr = KSPSetUp(*local_ksp);CHKERRQ(ierr); 1422 1423 /* test */ 1424 if (pcbddc->dbg_flag) { 1425 PetscReal test_err; 1426 KSP check_ksp; 1427 PC check_pc; 1428 PetscReal lambda_min,lambda_max; 1429 Mat test_mat; 1430 PetscViewer viewer=pcbddc->dbg_viewer; 1431 PetscBool setsym,issym=PETSC_FALSE; 1432 1433 ierr = KSPGetPC(*local_ksp,&check_pc);CHKERRQ(ierr); 1434 ierr = VecDuplicate(shell_ctx->work_full_1,&work1);CHKERRQ(ierr); 1435 ierr = VecDuplicate(shell_ctx->work_full_1,&work2);CHKERRQ(ierr); 1436 ierr = VecDuplicate(shell_ctx->work_full_1,&work3);CHKERRQ(ierr); 1437 ierr = VecSetRandom(shell_ctx->work_small_1,NULL);CHKERRQ(ierr); 1438 ierr = MatMult(shell_ctx->basis_mat,shell_ctx->work_small_1,work1);CHKERRQ(ierr); 1439 ierr = VecCopy(work1,work2);CHKERRQ(ierr); 1440 ierr = MatMult(local_mat,work1,work3);CHKERRQ(ierr); 1441 ierr = PCApply(check_pc,work3,work1);CHKERRQ(ierr); 1442 ierr = VecAXPY(work1,m_one,work2);CHKERRQ(ierr); 1443 ierr = VecNorm(work1,NORM_INFINITY,&test_err);CHKERRQ(ierr); 1444 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace correction for ",PetscGlobalRank); 1445 if (basis_dofs == n_I) { 1446 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Dirichlet ");CHKERRQ(ierr); 1447 } else { 1448 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Neumann ");CHKERRQ(ierr); 1449 } 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"solver is :%1.14e\n",test_err); 1451 1452 ierr = MatTransposeMatMult(shell_ctx->Lbasis_mat,shell_ctx->Kbasis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&test_mat);CHKERRQ(ierr); 1453 ierr = MatShift(test_mat,one);CHKERRQ(ierr); 1454 ierr = MatNorm(test_mat,NORM_INFINITY,&test_err);CHKERRQ(ierr); 1455 ierr = MatDestroy(&test_mat);CHKERRQ(ierr); 1456 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for nullspace matrices is :%1.14e\n",PetscGlobalRank,test_err); 1457 1458 /* Create ksp object suitable for extreme eigenvalues' estimation */ 1459 ierr = KSPCreate(PETSC_COMM_SELF,&check_ksp);CHKERRQ(ierr); 1460 ierr = KSPSetOperators(check_ksp,local_mat,local_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1461 ierr = KSPSetTolerances(check_ksp,1.e-8,1.e-8,PETSC_DEFAULT,basis_dofs);CHKERRQ(ierr); 1462 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 1463 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 1464 if (issym) { 1465 ierr = KSPSetType(check_ksp,KSPCG);CHKERRQ(ierr); 1466 } 1467 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 1468 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 1469 ierr = VecSetRandom(work1,NULL);CHKERRQ(ierr); 1470 ierr = MatMult(local_mat,work1,work2);CHKERRQ(ierr); 1471 ierr = KSPSolve(check_ksp,work2,work2);CHKERRQ(ierr); 1472 ierr = VecAXPY(work2,m_one,work1);CHKERRQ(ierr); 1473 ierr = VecNorm(work2,NORM_INFINITY,&test_err);CHKERRQ(ierr); 1474 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 1475 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d error for adapted KSP %1.14e (it %d, eigs %1.6e %1.6e)\n",PetscGlobalRank,test_err,k,lambda_min,lambda_max); 1477 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 1478 ierr = VecDestroy(&work1);CHKERRQ(ierr); 1479 ierr = VecDestroy(&work2);CHKERRQ(ierr); 1480 ierr = VecDestroy(&work3);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 1487 static PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 1488 { 1489 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1490 1491 PetscFunctionBegin; 1492 pcbddc->use_exact_dirichlet=use; 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "PCBDDCSetLevel" 1498 static PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 1499 { 1500 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1501 1502 PetscFunctionBegin; 1503 pcbddc->current_level=level; 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "PCBDDCAdaptNullSpace" 1509 static PetscErrorCode PCBDDCAdaptNullSpace(PC pc) 1510 { 1511 PC_IS *pcis = (PC_IS*) (pc->data); 1512 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1513 KSP inv_change; 1514 PC pc_change; 1515 const Vec *nsp_vecs; 1516 Vec *new_nsp_vecs; 1517 PetscInt i,nsp_size,new_nsp_size,start_new; 1518 PetscBool nsp_has_cnst; 1519 MatNullSpace new_nsp; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); 1524 ierr = KSPCreate(PETSC_COMM_SELF,&inv_change);CHKERRQ(ierr); 1525 ierr = KSPSetOperators(inv_change,pcbddc->ChangeOfBasisMatrix,pcbddc->ChangeOfBasisMatrix,SAME_PRECONDITIONER);CHKERRQ(ierr); 1526 ierr = KSPSetType(inv_change,KSPPREONLY);CHKERRQ(ierr); 1527 ierr = KSPGetPC(inv_change,&pc_change);CHKERRQ(ierr); 1528 ierr = PCSetType(pc_change,PCLU);CHKERRQ(ierr); 1529 ierr = KSPSetUp(inv_change);CHKERRQ(ierr); 1530 1531 new_nsp_size = nsp_size; 1532 if (nsp_has_cnst) new_nsp_size++; 1533 ierr = PetscMalloc(new_nsp_size*sizeof(Vec),&new_nsp_vecs);CHKERRQ(ierr); 1534 for (i=0;i<new_nsp_size;i++) { ierr = VecDuplicate(pcis->vec1_global,&new_nsp_vecs[i]);CHKERRQ(ierr); } 1535 start_new = 0; 1536 if (nsp_has_cnst) { 1537 start_new = 1; 1538 ierr = VecSet(new_nsp_vecs[0],1.0);CHKERRQ(ierr); 1539 ierr = VecSet(pcis->vec1_B,1.0);CHKERRQ(ierr); 1540 ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); 1541 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1542 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1543 } 1544 for (i=0; i<nsp_size; i++) { 1545 ierr = VecCopy(nsp_vecs[i],new_nsp_vecs[i+start_new]);CHKERRQ(ierr); 1546 ierr = VecScatterBegin(pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 ierr = VecScatterEnd (pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); 1549 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1550 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1551 } 1552 ierr = VecNormalize(new_nsp_vecs[0],NULL);CHKERRQ(ierr); 1553 /* TODO : Orthonormalize vecs when new_nsp_size > 0! */ 1554 1555 ierr = KSPDestroy(&inv_change);CHKERRQ(ierr); 1556 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)pc),PETSC_FALSE,new_nsp_size,new_nsp_vecs,&new_nsp);CHKERRQ(ierr); 1557 ierr = PCBDDCSetNullSpace(pc,new_nsp);CHKERRQ(ierr); 1558 ierr = MatNullSpaceDestroy(&new_nsp);CHKERRQ(ierr); 1559 /* 1560 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1561 printf("New Null Space, mat changed: %d\n",nsp_t); 1562 temp_mat = matis->A; 1563 matis->A = pcbddc->local_mat; 1564 pcbddc->local_mat = temp_mat; 1565 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1566 printf("New Null Space, mat original: %d\n",nsp_t);*/ 1567 1568 for (i=0; i<new_nsp_size; i++) { ierr = VecDestroy(&new_nsp_vecs[i]);CHKERRQ(ierr); } 1569 ierr = PetscFree(new_nsp_vecs);CHKERRQ(ierr); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1575 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1576 { 1577 FETIDPMat_ctx *newctx; 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1582 1583 newctx->lambda_local = 0; 1584 newctx->temp_solution_B = 0; 1585 newctx->temp_solution_D = 0; 1586 newctx->B_delta = 0; 1587 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1588 newctx->l2g_lambda = 0; 1589 1590 /* increase the reference count for BDDC preconditioner */ 1591 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1592 newctx->pc = pc; 1593 *fetidpmat_ctx = newctx; 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1599 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1600 { 1601 FETIDPPC_ctx *newctx; 1602 PetscErrorCode ierr; 1603 1604 PetscFunctionBegin; 1605 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1606 1607 newctx->lambda_local = 0; 1608 newctx->B_Ddelta = 0; 1609 newctx->l2g_lambda = 0; 1610 1611 /* increase the reference count for BDDC preconditioner */ 1612 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1613 newctx->pc = pc; 1614 *fetidppc_ctx = newctx; 1615 PetscFunctionReturn(0); 1616 } 1617 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1620 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1621 { 1622 FETIDPMat_ctx *mat_ctx; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1627 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1628 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1629 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1630 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1631 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1632 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1633 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1634 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1640 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1641 { 1642 FETIDPPC_ctx *pc_ctx; 1643 PetscErrorCode ierr; 1644 1645 PetscFunctionBegin; 1646 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1647 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1648 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1649 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1650 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1651 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1657 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx) 1658 { 1659 PetscErrorCode ierr; 1660 PC_IS *pcis =(PC_IS*)fetidpmat_ctx->pc->data; 1661 PC_BDDC *pcbddc =(PC_BDDC*)fetidpmat_ctx->pc->data; 1662 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1663 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1664 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1665 1666 Mat ScalingMat; 1667 Vec lambda_global; 1668 IS IS_l2g_lambda; 1669 1670 PetscBool skip_node,fully_redundant; 1671 PetscInt i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum; 1672 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1673 PetscMPIInt rank,nprocs; 1674 PetscScalar scalar_value; 1675 1676 PetscInt *vertex_indices,*temp_indices; 1677 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1678 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1679 PetscScalar *array,*scaling_factors,*vals_B_delta; 1680 PetscInt *aux_local_numbering_2,*dof_sizes,*dof_displs; 1681 PetscInt first_index,old_index; 1682 PetscBool first_found = PETSC_FALSE; 1683 1684 /* For communication of scaling factors */ 1685 PetscInt *ptrs_buffer,neigh_position; 1686 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1687 MPI_Request *send_reqs,*recv_reqs; 1688 1689 /* tests */ 1690 Vec test_vec; 1691 PetscBool test_fetidp; 1692 PetscViewer viewer; 1693 1694 PetscFunctionBegin; 1695 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1696 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1697 1698 /* Default type of lagrange multipliers is non-redundant */ 1699 fully_redundant = PETSC_FALSE; 1700 ierr = PetscOptionsGetBool(NULL,"-fetidp_fullyredundant",&fully_redundant,NULL);CHKERRQ(ierr); 1701 1702 /* Evaluate local and global number of lagrange multipliers */ 1703 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1704 n_local_lambda = 0; 1705 partial_sum = 0; 1706 n_boundary_dofs = 0; 1707 s = 0; 1708 n_vertices = 0; 1709 /* Get Vertices used to define the BDDC */ 1710 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1711 for (i=0; i<pcbddc->local_primal_size; i++) { 1712 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,NULL);CHKERRQ(ierr); 1713 if (j == 1) { 1714 vertex_indices[n_vertices]=temp_indices[0]; 1715 n_vertices++; 1716 } 1717 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,NULL);CHKERRQ(ierr); 1718 } 1719 dual_size = pcis->n_B-n_vertices; 1720 1721 ierr = PetscSortInt(n_vertices,vertex_indices);CHKERRQ(ierr); 1722 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1723 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1724 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1725 1726 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1727 for (i=0; i<pcis->n; i++) { 1728 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1729 k = 0; 1730 if (j > 0) k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1731 j = j - k; 1732 if (j > 0) n_boundary_dofs++; 1733 1734 skip_node = PETSC_FALSE; 1735 if (s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1736 skip_node = PETSC_TRUE; 1737 s++; 1738 } 1739 if (j < 1) skip_node = PETSC_TRUE; 1740 if (!skip_node) { 1741 if (fully_redundant) { 1742 /* fully redundant set of lagrange multipliers */ 1743 n_lambda_for_dof = (j*(j+1))/2; 1744 } else { 1745 n_lambda_for_dof = j; 1746 } 1747 n_local_lambda += j; 1748 /* needed to evaluate global number of lagrange multipliers */ 1749 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1750 /* store some data needed */ 1751 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1752 aux_local_numbering_1[partial_sum] = i; 1753 aux_local_numbering_2[partial_sum] = n_lambda_for_dof; 1754 partial_sum++; 1755 } 1756 } 1757 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1758 1759 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1760 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1761 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1762 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1763 1764 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1765 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1766 1767 /* compute global ordering of lagrange multipliers and associate l2g map */ 1768 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1769 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1770 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1771 for (i=0;i<dual_size;i++) array[aux_local_numbering_1[i]] = aux_local_numbering_2[i]; 1772 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1773 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1774 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1775 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1776 if (pcbddc->dbg_flag && (PetscInt)scalar_value != fetidpmat_ctx->n_lambda) { 1777 SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Global number of multipliers mismatch! (%d!=%d)\n",(PetscInt)scalar_value,fetidpmat_ctx->n_lambda); 1778 } 1779 1780 /* Fill pcis->vec1_global with cumulative function for global numbering */ 1781 ierr = VecGetArray(pcis->vec1_global,&array);CHKERRQ(ierr); 1782 ierr = VecGetLocalSize(pcis->vec1_global,&s);CHKERRQ(ierr); 1783 k = 0; 1784 first_index = -1; 1785 for (i=0; i<s; i++) { 1786 if (!first_found && array[i] > 0.0) { 1787 first_found = PETSC_TRUE; 1788 first_index = i; 1789 } 1790 k += (PetscInt)array[i]; 1791 } 1792 j = (!rank ? nprocs : 0); 1793 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1794 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1795 ierr = MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1796 if (!rank) { 1797 dof_displs[0]=0; 1798 for (i=1; i<nprocs; i++) dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1799 } 1800 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1801 if (first_found) { 1802 array[first_index] += k; 1803 1804 old_index = first_index; 1805 for (i=first_index+1; i<s; i++) { 1806 if (array[i] > 0.0) { 1807 array[i] += array[old_index]; 1808 old_index = i; 1809 } 1810 } 1811 } 1812 ierr = VecRestoreArray(pcis->vec1_global,&array);CHKERRQ(ierr); 1813 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1814 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1815 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1816 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1817 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1818 for (i=0; i<dual_size; i++) { 1819 aux_global_numbering[i] = (PetscInt)array[aux_local_numbering_1[i]]-aux_local_numbering_2[i]; 1820 } 1821 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1822 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1823 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1824 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1825 1826 /* init data for scaling factors exchange */ 1827 partial_sum = 0; 1828 j = 0; 1829 1830 ierr = PetscMalloc(pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1831 ierr = PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1832 ierr = PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1833 ierr = PetscMalloc(pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1834 1835 ptrs_buffer[0] = 0; 1836 for (i=1; i<pcis->n_neigh; i++) { 1837 partial_sum += pcis->n_shared[i]; 1838 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1839 } 1840 ierr = PetscMalloc(partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1841 ierr = PetscMalloc(partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1842 ierr = PetscMalloc(partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1843 for (i=0; i<pcis->n-1; i++) { 1844 j = mat_graph->count[i]; 1845 if (j>0) { 1846 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1847 j = j - k; 1848 } 1849 all_factors[i+1]=all_factors[i]+j; 1850 } 1851 /* scatter B scaling to N vec */ 1852 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1853 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1854 /* communications */ 1855 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1856 for (i=1; i<pcis->n_neigh; i++) { 1857 for (j=0; j<pcis->n_shared[i]; j++) send_buffer[ptrs_buffer[i-1]+j] = array[pcis->shared[i][j]]; 1858 1859 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1860 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[i-1]);CHKERRQ(ierr); 1861 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); 1862 } 1863 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1864 ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1865 /* put values in correct places */ 1866 for (i=1; i<pcis->n_neigh; i++) { 1867 for (j=0; j<pcis->n_shared[i]; j++) { 1868 k = pcis->shared[i][j]; 1869 1870 neigh_position = 0; 1871 while (mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) neigh_position++; 1872 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1873 1874 neigh_position = neigh_position - s; 1875 1876 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1877 } 1878 } 1879 ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1880 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1881 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1882 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1883 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1884 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1885 1886 /* Compute B and B_delta (local actions) */ 1887 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1888 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1889 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1890 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1891 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1892 1893 n_global_lambda = 0; 1894 partial_sum = 0; 1895 1896 for (i=0;i<dual_size;i++) { 1897 n_global_lambda = aux_global_numbering[i]; 1898 j = mat_graph->count[aux_local_numbering_1[i]]; 1899 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1900 j = j - k; 1901 aux_sums[0] = 0; 1902 for (s=1; s<j; s++) aux_sums[s]=aux_sums[s-1]+j-s+1; 1903 1904 array = all_factors[aux_local_numbering_1[i]]; 1905 n_neg_values = 0; 1906 1907 while (n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) n_neg_values++; 1908 n_pos_values = j - n_neg_values; 1909 1910 if (fully_redundant) { 1911 for (s=0; s<n_neg_values; s++) { 1912 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1913 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1914 vals_B_delta [partial_sum+s]=-1.0; 1915 scaling_factors[partial_sum+s]=array[s]; 1916 } 1917 for (s=0; s<n_pos_values; s++) { 1918 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1919 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1920 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1921 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1922 } 1923 partial_sum += j; 1924 } else { 1925 /* l2g_indices and default cols and vals of B_delta */ 1926 for (s=0; s<j; s++) { 1927 l2g_indices [partial_sum+s]=n_global_lambda+s; 1928 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1929 vals_B_delta [partial_sum+s]=0.0; 1930 } 1931 /* B_delta */ 1932 if (n_neg_values > 0) vals_B_delta[partial_sum+n_neg_values-1] = -1.0; /* there's a rank next to me to the left */ 1933 if (n_neg_values < j) vals_B_delta[partial_sum+n_neg_values] = 1.0; /* there's a rank next to me to the right */ 1934 1935 /* scaling as in Klawonn-Widlund 1999*/ 1936 for (s=0;s<n_neg_values;s++) { 1937 scalar_value = 0.0; 1938 for (k=0;k<s+1;k++) scalar_value += array[k]; 1939 scaling_factors[partial_sum+s] = -scalar_value; 1940 } 1941 for (s=0;s<n_pos_values;s++) { 1942 scalar_value = 0.0; 1943 for (k=s+n_neg_values;k<j;k++) scalar_value += array[k]; 1944 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1945 } 1946 partial_sum += j; 1947 } 1948 } 1949 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1950 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1951 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1952 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1953 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1954 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1955 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1956 1957 /* Local to global mapping of fetidpmat */ 1958 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1959 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1960 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1961 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1962 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1963 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1964 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1965 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1966 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1967 1968 /* Create local part of B_delta */ 1969 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1970 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1971 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1972 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,NULL);CHKERRQ(ierr); 1973 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1974 for (i=0; i<n_local_lambda; i++) { 1975 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1976 } 1977 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1978 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1979 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1980 1981 if (fully_redundant) { 1982 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1983 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1984 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1985 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,NULL);CHKERRQ(ierr); 1986 for (i=0; i<n_local_lambda; i++) { 1987 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1988 } 1989 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1990 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1991 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1992 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1993 } else { 1994 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1995 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1996 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1997 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,NULL);CHKERRQ(ierr); 1998 for (i=0; i<n_local_lambda; i++) { 1999 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 2000 } 2001 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2002 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2003 } 2004 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 2005 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 2006 2007 /* Create some vectors needed by fetidp */ 2008 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 2009 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 2010 2011 test_fetidp = PETSC_FALSE; 2012 2013 ierr = PetscOptionsGetBool(NULL,"-fetidp_check",&test_fetidp,NULL);CHKERRQ(ierr); 2014 2015 if (test_fetidp) { 2016 2017 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 2018 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 2019 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 2020 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 2021 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 2022 if (fully_redundant) { 2023 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 2024 } else { 2025 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 2026 } 2027 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2028 2029 /* TEST A/B: Test numbering of global lambda dofs */ 2030 2031 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 2032 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 2033 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 2034 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2035 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2036 scalar_value = -1.0; 2037 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2038 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2039 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 2040 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 2041 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2042 if (fully_redundant) { 2043 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2044 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 2045 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2046 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2047 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 2048 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 2049 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2050 } 2051 2052 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 2053 /* This is the meaning of the B matrix */ 2054 2055 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 2056 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2057 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2058 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2059 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2060 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2061 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2062 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2063 /* Action of B_delta */ 2064 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2065 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2066 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2067 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2068 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2069 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 2070 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2071 2072 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 2073 /* E_D = R_D^TR */ 2074 /* P_D = B_{D,delta}^T B_{delta} */ 2075 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 2076 2077 /* compute a random vector in \widetilde{W} */ 2078 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 2079 2080 scalar_value = 0.0; /* set zero at vertices */ 2081 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2082 for (i=0;i<n_vertices;i++) array[vertex_indices[i]] = scalar_value; 2083 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2084 2085 /* store w for final comparison */ 2086 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 2087 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2088 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2089 2090 /* Jump operator P_D : results stored in pcis->vec1_B */ 2091 2092 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2093 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2094 /* Action of B_delta */ 2095 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2096 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2097 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2098 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2099 /* Action of B_Ddelta^T */ 2100 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2101 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2102 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2103 2104 /* Average operator E_D : results stored in pcis->vec2_B */ 2105 2106 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2107 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2108 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 2109 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2110 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2111 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2112 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2113 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2114 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2117 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 2119 /* test E_D=I-P_D */ 2120 scalar_value = 1.0; 2121 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 2122 scalar_value = -1.0; 2123 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 2124 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2125 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 2126 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 2127 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2128 2129 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 2130 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 2131 2132 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 2133 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2134 2135 scalar_value = 0.0; /* set zero at vertices */ 2136 for (i=0;i<n_vertices;i++) array[vertex_indices[i]]=scalar_value; 2137 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2138 2139 /* Jump operator P_D : results stored in pcis->vec1_B */ 2140 2141 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2142 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2143 /* Action of B_delta */ 2144 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2145 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 2146 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2147 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2148 /* Action of B_Ddelta^T */ 2149 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2150 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2151 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2152 /* diagonal scaling */ 2153 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 2154 /* sum on the interface */ 2155 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2156 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2157 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2158 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2159 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2160 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2161 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2162 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 2163 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2164 2165 if (!fully_redundant) { 2166 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 2167 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 2168 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 2169 ierr = VecSetRandom(lambda_global,NULL);CHKERRQ(ierr); 2170 /* Action of B_Ddelta^T */ 2171 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2172 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2173 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2174 /* Action of B_delta */ 2175 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 2176 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 2177 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2178 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2179 scalar_value = -1.0; 2180 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 2181 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 2182 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 2183 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2184 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2185 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 2186 } 2187 } 2188 /* final cleanup */ 2189 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 2190 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 2196 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 2197 { 2198 FETIDPMat_ctx *mat_ctx; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 2203 /* get references from objects created when setting up feti mat context */ 2204 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 2205 2206 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 2207 2208 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 2209 2210 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 2211 2212 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 2213 2214 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 2215 PetscFunctionReturn(0); 2216 } 2217 2218 #undef __FUNCT__ 2219 #define __FUNCT__ "FETIDPMatMult" 2220 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 2221 { 2222 FETIDPMat_ctx *mat_ctx; 2223 PC_IS *pcis; 2224 PetscErrorCode ierr; 2225 2226 PetscFunctionBegin; 2227 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 2228 pcis = (PC_IS*)mat_ctx->pc->data; 2229 /* Application of B_delta^T */ 2230 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2231 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2232 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2233 /* Application of \widetilde{S}^-1 */ 2234 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 2235 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 2236 /* Application of B_delta */ 2237 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 2238 ierr = VecSet(y,0.0);CHKERRQ(ierr); 2239 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2240 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2241 PetscFunctionReturn(0); 2242 } 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "FETIDPPCApply" 2246 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 2247 { 2248 FETIDPPC_ctx *pc_ctx; 2249 PC_IS *pcis; 2250 PetscErrorCode ierr; 2251 2252 PetscFunctionBegin; 2253 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 2254 pcis = (PC_IS*)pc_ctx->pc->data; 2255 /* Application of B_Ddelta^T */ 2256 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2257 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2258 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 2259 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 2260 /* Application of S */ 2261 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2262 /* Application of B_Ddelta */ 2263 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 2264 ierr = VecSet(y,0.0);CHKERRQ(ierr); 2265 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2266 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 #undef __FUNCT__ 2271 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 2272 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 2273 { 2274 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2275 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2276 PetscInt nvtxs; 2277 const PetscInt *xadj,*adjncy; 2278 Mat mat_adj; 2279 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 2280 PCBDDCGraph mat_graph =pcbddc->mat_graph; 2281 PetscErrorCode ierr; 2282 2283 PetscFunctionBegin; 2284 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 2285 if (!mat_graph->xadj) { 2286 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2287 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2288 if (!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ()\n"); 2289 /* Get adjacency into BDDC workspace */ 2290 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2291 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2292 if (!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ()\n"); 2293 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2294 } 2295 PetscFunctionReturn(0); 2296 } 2297 /* -------------------------------------------------------------------------- */ 2298 #undef __FUNCT__ 2299 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2300 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 2301 { 2302 PetscErrorCode ierr; 2303 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2304 PC_IS *pcis = (PC_IS*)(pc->data); 2305 const PetscScalar zero = 0.0; 2306 2307 PetscFunctionBegin; 2308 /* Application of PHI^T */ 2309 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2310 if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2311 2312 /* Scatter data of coarse_rhs */ 2313 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 2314 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2315 2316 /* Local solution on R nodes */ 2317 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2318 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2319 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2320 if (pcbddc->inexact_prec_type) { 2321 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2322 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2323 } 2324 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 2325 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2326 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2327 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2328 if (pcbddc->inexact_prec_type) { 2329 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2330 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2331 } 2332 2333 /* Coarse solution */ 2334 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2335 if (pcbddc->coarse_rhs) { 2336 if (pcbddc->CoarseNullSpace) { 2337 ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_rhs,NULL);CHKERRQ(ierr); 2338 } 2339 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2340 if (pcbddc->CoarseNullSpace) { 2341 ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 2342 } 2343 } 2344 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2345 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2346 2347 /* Sum contributions from two levels */ 2348 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2349 if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2350 PetscFunctionReturn(0); 2351 } 2352 /* -------------------------------------------------------------------------- */ 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 2355 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 2356 { 2357 PetscErrorCode ierr; 2358 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2359 2360 PetscFunctionBegin; 2361 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2362 if (pcbddc->local_auxmat1) { 2363 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 2364 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2365 } 2366 PetscFunctionReturn(0); 2367 } 2368 /* -------------------------------------------------------------------------- */ 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2371 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 2372 { 2373 PetscErrorCode ierr; 2374 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2375 2376 PetscFunctionBegin; 2377 switch (pcbddc->coarse_communications_type) { 2378 case SCATTERS_BDDC: 2379 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 2380 break; 2381 case GATHERS_BDDC: 2382 break; 2383 } 2384 PetscFunctionReturn(0); 2385 } 2386 /* -------------------------------------------------------------------------- */ 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2389 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 2390 { 2391 PetscErrorCode ierr; 2392 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2393 PetscScalar *array_to; 2394 PetscScalar *array_from; 2395 MPI_Comm comm; 2396 PetscInt i; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 2400 switch (pcbddc->coarse_communications_type) { 2401 case SCATTERS_BDDC: 2402 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 2403 break; 2404 case GATHERS_BDDC: 2405 if (vec_from) VecGetArray(vec_from,&array_from); 2406 if (vec_to) VecGetArray(vec_to,&array_to); 2407 switch (pcbddc->coarse_problem_type) { 2408 case SEQUENTIAL_BDDC: 2409 if (smode == SCATTER_FORWARD) { 2410 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 2411 if (vec_to) { 2412 if (imode == ADD_VALUES) { 2413 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2414 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 2415 } 2416 } else { 2417 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2418 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 2419 } 2420 } 2421 } 2422 } else { 2423 if (vec_from) { 2424 if (imode == ADD_VALUES) { 2425 printf("Scatter mode %d, insert mode %d for case %d not implemented!\n",smode,imode,pcbddc->coarse_problem_type); 2426 } 2427 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2428 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 2429 } 2430 } 2431 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 2432 } 2433 break; 2434 case REPLICATED_BDDC: 2435 if (smode == SCATTER_FORWARD) { 2436 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 2437 if (imode == ADD_VALUES) { 2438 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2439 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 2440 } 2441 } else { 2442 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2443 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 2444 } 2445 } 2446 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 2447 if (imode == ADD_VALUES) { 2448 for (i=0;i<pcbddc->local_primal_size;i++) { 2449 array_to[i]+=array_from[pcbddc->local_primal_indices[i]]; 2450 } 2451 } else { 2452 for (i=0;i<pcbddc->local_primal_size;i++) { 2453 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 2454 } 2455 } 2456 } 2457 break; 2458 case MULTILEVEL_BDDC: 2459 break; 2460 case PARALLEL_BDDC: 2461 break; 2462 } 2463 if (vec_from) VecRestoreArray(vec_from,&array_from); 2464 if (vec_to) VecRestoreArray(vec_to,&array_to); 2465 break; 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 /* -------------------------------------------------------------------------- */ 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 2472 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 2473 { 2474 PetscErrorCode ierr; 2475 PC_IS *pcis = (PC_IS*)(pc->data); 2476 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2477 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2478 PetscInt *nnz,*is_indices; 2479 PetscScalar *temp_quadrature_constraint; 2480 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 2481 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 2482 PetscInt n_constraints,n_vertices,size_of_constraint; 2483 PetscScalar quad_value; 2484 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 2485 PetscInt nnsp_size =0,nnsp_addone=0,temp_constraints,temp_start_ptr; 2486 IS *used_IS; 2487 MatType impMatType=MATSEQAIJ; 2488 PetscBLASInt Bs,Bt,lwork,lierr; 2489 PetscReal tol=1.0e-8; 2490 MatNullSpace nearnullsp; 2491 const Vec *nearnullvecs; 2492 Vec *localnearnullsp; 2493 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 2494 PetscReal *rwork,*singular_vals; 2495 PetscBLASInt Bone=1,*ipiv; 2496 Vec temp_vec; 2497 Mat temp_mat; 2498 KSP temp_ksp; 2499 PC temp_pc; 2500 PetscInt s,start_constraint,dual_dofs; 2501 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2502 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2503 PetscBool boolforface,*change_basis; 2504 2505 /* some ugly conditional declarations */ 2506 #if defined(PETSC_MISSING_LAPACK_GESVD) 2507 PetscScalar dot_result; 2508 PetscScalar one=1.0,zero=0.0; 2509 PetscInt ii; 2510 PetscScalar *singular_vectors; 2511 PetscBLASInt *iwork,*ifail; 2512 PetscReal dummy_real,abs_tol; 2513 PetscBLASInt eigs_found; 2514 #if defined(PETSC_USE_COMPLEX) 2515 PetscScalar val1,val2; 2516 #endif 2517 #endif 2518 PetscBLASInt dummy_int; 2519 PetscScalar dummy_scalar; 2520 2521 PetscFunctionBegin; 2522 /* check if near null space is attached to global mat */ 2523 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2524 if (nearnullsp) { 2525 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2526 } else { /* if near null space is not provided it uses constants */ 2527 nnsp_has_cnst = PETSC_TRUE; 2528 use_nnsp_true = PETSC_TRUE; 2529 } 2530 if (nnsp_has_cnst) nnsp_addone = 1; 2531 2532 /* 2533 Evaluate maximum storage size needed by the procedure 2534 - temp_indices will contain start index of each constraint stored as follows 2535 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2536 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2537 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2538 */ 2539 2540 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2541 total_counts *= (nnsp_addone+nnsp_size); 2542 2543 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2544 2545 total_counts += n_vertices; 2546 2547 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2548 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2549 2550 total_counts = 0; 2551 max_size_of_constraint = 0; 2552 for (i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++) { 2553 if (i<pcbddc->n_ISForEdges) used_IS = &pcbddc->ISForEdges[i]; 2554 else used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2555 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2556 total_counts += j; 2557 if (j>max_size_of_constraint) max_size_of_constraint=j; 2558 } 2559 total_counts *= (nnsp_addone+nnsp_size); 2560 total_counts += n_vertices; 2561 2562 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2563 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2564 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2565 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2566 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2567 2568 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 2569 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 2570 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2571 2572 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2573 rwork = 0; 2574 work = 0; 2575 singular_vals = 0; 2576 temp_basis = 0; 2577 correlation_mat = 0; 2578 if (!pcbddc->use_nnsp_true) { 2579 PetscScalar temp_work; 2580 #if defined(PETSC_MISSING_LAPACK_GESVD) 2581 /* POD */ 2582 PetscInt max_n; 2583 max_n = nnsp_addone+nnsp_size; 2584 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2585 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2586 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2587 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2588 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2589 #if defined(PETSC_USE_COMPLEX) 2590 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2591 #endif 2592 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2593 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2594 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2595 ierr = PetscBLASIntCast(max_n,&Bt);CHKERRQ(ierr); 2596 lwork =-1; 2597 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2598 #if !defined(PETSC_USE_COMPLEX) 2599 abs_tol=1.e-8; 2600 PetscStackCallBLAS("LAPACKsyevx",LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int,&abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr)); 2601 #else 2602 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2603 #endif 2604 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2605 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2606 #else /* on missing GESVD */ 2607 /* SVD */ 2608 PetscInt max_n,min_n; 2609 max_n = max_size_of_constraint; 2610 min_n = nnsp_addone+nnsp_size; 2611 if (max_size_of_constraint < (nnsp_addone+nnsp_size)) { 2612 min_n = max_size_of_constraint; 2613 max_n = nnsp_addone+nnsp_size; 2614 } 2615 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2616 #if defined(PETSC_USE_COMPLEX) 2617 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2618 #endif 2619 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2620 lwork =-1; 2621 ierr = PetscBLASIntCast(max_n,&Bs);CHKERRQ(ierr); 2622 ierr = PetscBLASIntCast(min_n,&Bt);CHKERRQ(ierr); 2623 dummy_int = Bs; 2624 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2625 #if !defined(PETSC_USE_COMPLEX) 2626 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2627 #else 2628 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2629 #endif 2630 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2631 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2632 #endif 2633 /* Allocate optimal workspace */ 2634 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2635 total_counts = (PetscInt)lwork; 2636 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2637 } 2638 /* get local part of global near null space vectors */ 2639 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2640 for (k=0; k<nnsp_size; k++) { 2641 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2642 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2643 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2644 } 2645 /* Now we can loop on constraining sets */ 2646 total_counts =0; 2647 temp_indices[0]=0; 2648 /* vertices */ 2649 PetscBool used_vertex; 2650 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2651 if (nnsp_has_cnst) { /* consider all vertices */ 2652 for (i=0; i<n_vertices; i++) { 2653 temp_indices_to_constraint[temp_indices[total_counts]] = is_indices[i]; 2654 temp_indices_to_constraint_B[temp_indices[total_counts]]= local_to_B[is_indices[i]]; 2655 temp_quadrature_constraint[temp_indices[total_counts]] = 1.0; 2656 temp_indices[total_counts+1] = temp_indices[total_counts]+1; 2657 change_basis[total_counts] = PETSC_FALSE; 2658 total_counts++; 2659 } 2660 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2661 for (i=0; i<n_vertices; i++) { 2662 used_vertex = PETSC_FALSE; 2663 k = 0; 2664 while (!used_vertex && k<nnsp_size) { 2665 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2666 if (PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2667 temp_indices_to_constraint[temp_indices[total_counts]] =is_indices[i]; 2668 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2669 temp_quadrature_constraint[temp_indices[total_counts]] =1.0; 2670 temp_indices[total_counts+1] =temp_indices[total_counts]+1; 2671 change_basis[total_counts] =PETSC_FALSE; 2672 total_counts++; 2673 used_vertex=PETSC_TRUE; 2674 } 2675 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2676 k++; 2677 } 2678 } 2679 } 2680 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2681 n_vertices = total_counts; 2682 2683 /* edges and faces */ 2684 for (i=0; i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces; i++) { 2685 if (i<pcbddc->n_ISForEdges) { 2686 used_IS = &pcbddc->ISForEdges[i]; 2687 boolforface = pcbddc->usechangeofbasis; 2688 } else { 2689 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2690 boolforface = pcbddc->usechangeonfaces; 2691 } 2692 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2693 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2694 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2695 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2696 if (nnsp_has_cnst) { 2697 temp_constraints++; 2698 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2699 for (j=0; j<size_of_constraint; j++) { 2700 temp_indices_to_constraint[temp_indices[total_counts]+j] =is_indices[j]; 2701 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2702 temp_quadrature_constraint[temp_indices[total_counts]+j] =quad_value; 2703 } 2704 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2705 change_basis[total_counts] =boolforface; 2706 total_counts++; 2707 } 2708 for (k=0; k<nnsp_size; k++) { 2709 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2710 for (j=0; j<size_of_constraint; j++) { 2711 temp_indices_to_constraint[temp_indices[total_counts]+j] =is_indices[j]; 2712 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2713 temp_quadrature_constraint[temp_indices[total_counts]+j] =array_vector[is_indices[j]]; 2714 } 2715 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2716 quad_value = 1.0; 2717 if (use_nnsp_true) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2718 ierr = PetscBLASIntCast(size_of_constraint,&Bs);CHKERRQ(ierr); 2719 PetscStackCallBLAS("BLASasum",quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone)); 2720 } 2721 if (quad_value > 0.0) { /* keep indices and values */ 2722 temp_constraints++; 2723 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2724 change_basis[total_counts] =boolforface; 2725 total_counts++; 2726 } 2727 } 2728 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2729 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2730 if (!use_nnsp_true) { 2731 ierr = PetscBLASIntCast(size_of_constraint,&Bs);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(temp_constraints,&Bt);CHKERRQ(ierr); 2733 2734 #if defined(PETSC_MISSING_LAPACK_GESVD) 2735 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2736 /* Store upper triangular part of correlation matrix */ 2737 for (j=0; j<temp_constraints; j++) { 2738 for (k=0; k<j+1; k++) { 2739 #if defined(PETSC_USE_COMPLEX) 2740 /* hand made complex dot product -> replace */ 2741 dot_result = 0.0; 2742 for (ii=0; ii<size_of_constraint; ii++) { 2743 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2744 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2745 dot_result += val1*PetscConj(val2); 2746 } 2747 #else 2748 PetscStackCallBLAS("BLASdot",dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone)); 2749 #endif 2750 correlation_mat[j*temp_constraints+k]=dot_result; 2751 } 2752 } 2753 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2754 #if !defined(PETSC_USE_COMPLEX) 2755 PetscStackCallBLAS("LAPACKsyevx",LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int,&abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr)); 2756 #else 2757 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2758 #endif 2759 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2760 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2761 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2762 j=0; 2763 while (j < Bt && singular_vals[j] < tol) j++; 2764 total_counts=total_counts-j; 2765 if (j<temp_constraints) { 2766 for (k=j;k<Bt;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2767 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2768 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs)); 2769 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2770 /* copy POD basis into used quadrature memory */ 2771 for (k=0;k<Bt-j;k++) { 2772 for (ii=0;ii<size_of_constraint;ii++) { 2773 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2774 } 2775 } 2776 } 2777 2778 #else /* on missing GESVD */ 2779 PetscInt min_n = temp_constraints; 2780 if (min_n > size_of_constraint) min_n = size_of_constraint; 2781 dummy_int = Bs; 2782 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2783 #if !defined(PETSC_USE_COMPLEX) 2784 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2785 #else 2786 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2787 #endif 2788 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2789 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2790 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2791 j=0; 2792 while (j < min_n && singular_vals[min_n-j-1] < tol) j++; 2793 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2794 #endif 2795 } 2796 } 2797 2798 n_constraints =total_counts-n_vertices; 2799 local_primal_size = total_counts; 2800 /* set quantities in pcbddc data structure */ 2801 pcbddc->n_vertices = n_vertices; 2802 pcbddc->n_constraints = n_constraints; 2803 pcbddc->local_primal_size = local_primal_size; 2804 2805 /* Create constraint matrix */ 2806 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2807 /* so we need to set it up properly either with or without change of basis */ 2808 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2809 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2810 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2811 2812 /* compute a local numbering of constraints : vertices first then constraints */ 2813 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2814 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2815 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2816 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2817 2818 total_counts=0; 2819 2820 /* find vertices: subdomain corners plus dofs with basis changed */ 2821 for (i=0; i<local_primal_size; i++) { 2822 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2823 if (change_basis[i] || size_of_constraint == 1) { 2824 k=0; 2825 while (k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2826 k=k+1; 2827 } 2828 j = temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2829 2830 array_vector[j] = 1.0; 2831 aux_primal_numbering[total_counts] = j; 2832 aux_primal_permutation[total_counts] = total_counts; 2833 total_counts++; 2834 } 2835 } 2836 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2837 /* permute indices in order to have a sorted set of vertices */ 2838 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2839 /* nonzero structure */ 2840 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2841 for (i=0;i<total_counts;i++) nnz[i]=1; 2842 2843 j=total_counts; 2844 for (i=n_vertices; i<local_primal_size; i++) { 2845 if (!change_basis[i]) { 2846 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2847 j++; 2848 } 2849 } 2850 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2851 ierr = PetscFree(nnz);CHKERRQ(ierr); 2852 /* set values in constraint matrix */ 2853 for (i=0; i<total_counts; i++) { 2854 j = aux_primal_permutation[i]; 2855 k = aux_primal_numbering[j]; 2856 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2857 } 2858 for (i=n_vertices; i<local_primal_size; i++) { 2859 if (!change_basis[i]) { 2860 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2861 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2862 total_counts++; 2863 } 2864 } 2865 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2866 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2867 /* assembling */ 2868 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2869 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2870 2871 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2872 if (pcbddc->usechangeofbasis) { 2873 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2874 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2875 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2876 /* work arrays */ 2877 /* we need to reuse these arrays, so we free them */ 2878 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2879 ierr = PetscFree(work);CHKERRQ(ierr); 2880 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2881 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2882 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2883 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2884 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2885 2886 /* Overestimated nonzeros per row */ 2887 k=1; 2888 for (i=pcbddc->n_vertices;i<local_primal_size;i++) { 2889 if (change_basis[i]) { 2890 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2891 if (k < size_of_constraint) k = size_of_constraint; 2892 2893 for (j=0;j<size_of_constraint;j++) { 2894 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2895 } 2896 } 2897 } 2898 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2899 ierr = PetscFree(nnz);CHKERRQ(ierr); 2900 /* Temporary array to store indices */ 2901 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2902 /* Set initial identity in the matrix */ 2903 for (i=0; i<pcis->n_B; i++) { 2904 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2905 } 2906 /* Now we loop on the constraints which need a change of basis */ 2907 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2908 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2909 temp_constraints = 0; 2910 if (pcbddc->n_vertices < local_primal_size) { 2911 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2912 } 2913 for (i=pcbddc->n_vertices; i<local_primal_size; i++) { 2914 if (change_basis[i]) { 2915 compute_submatrix = PETSC_FALSE; 2916 useksp = PETSC_FALSE; 2917 if (temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2918 temp_constraints++; 2919 if (i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2920 compute_submatrix = PETSC_TRUE; 2921 } 2922 } 2923 if (compute_submatrix) { 2924 if (temp_constraints > 1 || pcbddc->use_nnsp_true) useksp = PETSC_TRUE; 2925 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2926 if (useksp) { /* experimental */ 2927 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2928 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2929 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2930 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,NULL);CHKERRQ(ierr); 2931 } 2932 /* First _size_of_constraint-temp_constraints_ columns */ 2933 dual_dofs = size_of_constraint-temp_constraints; 2934 start_constraint = i+1-temp_constraints; 2935 for (s=0; s<dual_dofs; s++) { 2936 is_indices[0] = s; 2937 for (j=0;j<temp_constraints;j++) { 2938 for (k=0;k<temp_constraints;k++) { 2939 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2940 } 2941 work[j] = -temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2942 is_indices[j+1] = s+j+1; 2943 } 2944 Bt = temp_constraints; 2945 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2946 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr)); 2947 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2948 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2949 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2950 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2951 if (useksp) { 2952 /* temp mat with transposed rows and columns */ 2953 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2954 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2955 } 2956 } 2957 if (useksp) { 2958 /* last rows of temp_mat */ 2959 for (j=0;j<size_of_constraint;j++) is_indices[j] = j; 2960 2961 for (s=0;s<temp_constraints;s++) { 2962 k = s + dual_dofs; 2963 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2964 } 2965 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2967 ierr = MatGetVecs(temp_mat,&temp_vec,NULL);CHKERRQ(ierr); 2968 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2969 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2970 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2971 ierr = KSPGetPC(temp_ksp,&temp_pc);CHKERRQ(ierr); 2972 ierr = PCSetType(temp_pc,PCLU);CHKERRQ(ierr); 2973 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2974 for (s=0; s<temp_constraints; s++) { 2975 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2976 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2977 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2978 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2979 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2980 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2981 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2982 /* last columns of change of basis matrix associated to new primal dofs */ 2983 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2984 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2985 } 2986 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2987 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2988 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2989 } else { 2990 /* last columns of change of basis matrix associated to new primal dofs */ 2991 for (s=0; s<temp_constraints; s++) { 2992 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2993 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2994 } 2995 } 2996 /* prepare for the next cycle */ 2997 temp_constraints = 0; 2998 if (i != local_primal_size -1) temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2999 } 3000 } 3001 } 3002 /* assembling */ 3003 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3004 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 ierr = PetscFree(ipiv);CHKERRQ(ierr); 3006 ierr = PetscFree(is_indices);CHKERRQ(ierr); 3007 } 3008 /* free workspace no longer needed */ 3009 ierr = PetscFree(rwork);CHKERRQ(ierr); 3010 ierr = PetscFree(work);CHKERRQ(ierr); 3011 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3012 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3013 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3014 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 3015 ierr = PetscFree(change_basis);CHKERRQ(ierr); 3016 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 3017 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 3018 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 3019 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 3020 #if defined(PETSC_MISSING_LAPACK_GESVD) 3021 ierr = PetscFree(iwork);CHKERRQ(ierr); 3022 ierr = PetscFree(ifail);CHKERRQ(ierr); 3023 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 3024 #endif 3025 for (k=0; k<nnsp_size; k++) { 3026 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3027 } 3028 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3029 PetscFunctionReturn(0); 3030 } 3031 /* -------------------------------------------------------------------------- */ 3032 #undef __FUNCT__ 3033 #define __FUNCT__ "PCBDDCCoarseSetUp" 3034 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 3035 { 3036 PetscErrorCode ierr; 3037 PC_IS *pcis = (PC_IS*)(pc->data); 3038 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3039 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3040 Mat change_mat_all; 3041 IS is_R_local; 3042 IS is_V_local; 3043 IS is_C_local; 3044 IS is_aux1; 3045 IS is_aux2; 3046 VecType impVecType; 3047 MatType impMatType; 3048 PetscInt n_R =0; 3049 PetscInt n_D =0; 3050 PetscInt n_B =0; 3051 PetscScalar zero =0.0; 3052 PetscScalar one =1.0; 3053 PetscScalar m_one=-1.0; 3054 PetscScalar * array; 3055 PetscScalar *coarse_submat_vals; 3056 PetscInt *idx_R_local; 3057 PetscInt *idx_V_B; 3058 PetscScalar *coarsefunctions_errors; 3059 PetscScalar *constraints_errors; 3060 3061 /* auxiliary indices */ 3062 PetscInt i,j,k; 3063 3064 /* for verbose output of bddc */ 3065 PetscViewer viewer =pcbddc->dbg_viewer; 3066 PetscBool dbg_flag=pcbddc->dbg_flag; 3067 3068 /* for counting coarse dofs */ 3069 PetscInt n_vertices,n_constraints; 3070 PetscInt size_of_constraint; 3071 PetscInt *row_cmat_indices; 3072 PetscScalar *row_cmat_values; 3073 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 3074 3075 PetscFunctionBegin; 3076 /* Set Non-overlapping dimensions */ 3077 n_B = pcis->n_B; n_D = pcis->n - n_B; 3078 /* Set types for local objects needed by BDDC precondtioner */ 3079 impMatType = MATSEQDENSE; 3080 impVecType = VECSEQ; 3081 3082 /* get vertex indices from constraint matrix */ 3083 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 3084 n_vertices=0; 3085 for (i=0; i<pcbddc->local_primal_size; i++) { 3086 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3087 if (size_of_constraint == 1) { 3088 vertices[n_vertices]=row_cmat_indices[0]; 3089 n_vertices++; 3090 } 3091 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3092 } 3093 /* Set number of constraints */ 3094 n_constraints = pcbddc->local_primal_size-n_vertices; 3095 3096 /* vertices in boundary numbering */ 3097 if (n_vertices) { 3098 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 3099 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3100 for (i=0; i<n_vertices; i++) array[vertices[i]] = i; 3101 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3102 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 3105 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3106 for (i=0; i<n_vertices; i++) { 3107 j=0; 3108 while (array[j] != i) j++; 3109 idx_V_B[i]=j; 3110 } 3111 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3112 } 3113 3114 /* transform local matrices if needed */ 3115 if (pcbddc->usechangeofbasis) { 3116 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3117 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3118 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 3119 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3120 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3121 k=1; 3122 for (i=0;i<n_B;i++) { 3123 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 3124 nnz[is_indices[i]]=j; 3125 if (k < j) k = j; 3126 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 3127 } 3128 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3129 /* assemble change of basis matrix on the whole set of local dofs */ 3130 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 3131 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 3132 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3133 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 3134 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 3135 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3136 for (i=0; i<n_D; i++) { 3137 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3138 } 3139 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3140 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3141 for (i=0; i<n_B; i++) { 3142 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3143 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 3144 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 3145 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3146 } 3147 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3148 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3149 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 3150 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 3151 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 3152 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 3153 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 3154 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 3155 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 3156 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 3157 ierr = PetscFree(nnz);CHKERRQ(ierr); 3158 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 3159 } else { 3160 /* without change of basis, the local matrix is unchanged */ 3161 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 3162 3163 pcbddc->local_mat = matis->A; 3164 } 3165 /* Change global null space passed in by the user if change of basis has been performed */ 3166 if (pcbddc->NullSpace && pcbddc->usechangeofbasis) { 3167 ierr = PCBDDCAdaptNullSpace(pc);CHKERRQ(ierr); 3168 } 3169 3170 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 3171 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 3172 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3173 for (i=0;i<n_vertices;i++) array[vertices[i]] = zero; 3174 ierr = PetscMalloc((pcis->n - n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 3175 for (i=0, n_R=0; i<pcis->n; i++) { 3176 if (array[i] == one) { 3177 idx_R_local[n_R] = i; 3178 n_R++; 3179 } 3180 } 3181 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3182 if (dbg_flag) { 3183 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3184 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3185 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3186 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3187 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 3188 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 3189 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3190 } 3191 3192 /* Allocate needed vectors */ 3193 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 3194 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 3195 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 3196 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 3197 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 3198 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3199 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3200 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 3201 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 3202 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3203 3204 /* Creating some index sets needed */ 3205 /* For submatrices */ 3206 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 3207 if (n_vertices) { 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 3209 } 3210 if (n_constraints) { 3211 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 3212 } 3213 3214 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3215 { 3216 PetscInt *aux_array1; 3217 PetscInt *aux_array2; 3218 PetscInt *idx_I_local; 3219 3220 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 3221 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 3222 3223 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);CHKERRQ(ierr); 3224 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3225 for (i=0; i<n_D; i++) array[idx_I_local[i]] = 0; 3226 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);CHKERRQ(ierr); 3227 for (i=0, j=0; i<n_R; i++) { 3228 if (array[idx_R_local[i]] == one) { 3229 aux_array1[j] = i; 3230 j++; 3231 } 3232 } 3233 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3234 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 3235 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3236 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3237 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3238 for (i=0, j=0; i<n_B; i++) { 3239 if (array[i] == one) { 3240 aux_array2[j] = i; j++; 3241 } 3242 } 3243 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3244 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 3245 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3246 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 3247 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 3248 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3249 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3250 3251 if (pcbddc->inexact_prec_type || dbg_flag) { 3252 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 3253 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3254 for (i=0, j=0; i<n_R; i++) { 3255 if (array[idx_R_local[i]] == zero) { 3256 aux_array1[j] = i; 3257 j++; 3258 } 3259 } 3260 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3261 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 3262 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3263 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 3264 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3265 } 3266 } 3267 3268 /* Creating PC contexts for local Dirichlet and Neumann problems */ 3269 { 3270 Mat A_RR; 3271 PC pc_temp; 3272 3273 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 3274 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3275 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3276 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 3277 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3278 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 3279 3280 /* default */ 3281 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3282 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3283 3284 /* Allow user's customization */ 3285 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3286 3287 /* umfpack interface has a bug when matrix dimension is zero */ 3288 if (!n_D) { 3289 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3290 } 3291 3292 /* Set Up KSP for Dirichlet problem of BDDC */ 3293 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3294 3295 /* set ksp_D into pcis data */ 3296 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3297 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3298 pcis->ksp_D = pcbddc->ksp_D; 3299 3300 /* Matrix for Neumann problem is A_RR -> we need to create it */ 3301 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3302 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3303 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3304 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 3305 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3306 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 3307 3308 /* default */ 3309 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3310 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3311 3312 /* Allow user's customization */ 3313 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3314 3315 /* umfpack interface has a bug when matrix dimension is zero */ 3316 if (!pcis->n) { 3317 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3318 } 3319 3320 /* Set Up KSP for Neumann problem of BDDC */ 3321 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3322 3323 /* check Dirichlet and Neumann solvers and adapt them is a nullspace correction is needed */ 3324 { 3325 Vec temp_vec; 3326 PetscReal value; 3327 PetscMPIInt use_exact,use_exact_reduced; 3328 3329 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 3330 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 3331 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3332 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 3333 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 3334 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 3335 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 3336 use_exact = 1; 3337 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 3338 3339 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3340 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 3341 if (dbg_flag) { 3342 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3343 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3344 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 3345 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 3346 } 3347 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) { 3348 ierr = PCBDDCAdaptLocalProblem(pc,pcis->is_I_local); 3349 } 3350 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 3351 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 3352 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3353 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 3354 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 3355 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 3356 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 3357 3358 use_exact = 1; 3359 if (PetscAbsReal(value) > 1.e-4) use_exact = 0; 3360 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3361 if (dbg_flag) { 3362 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 3363 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3364 } 3365 if (pcbddc->NullSpace && !use_exact_reduced) { 3366 ierr = PCBDDCAdaptLocalProblem(pc,is_R_local); 3367 } 3368 } 3369 /* free Neumann problem's matrix */ 3370 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3371 } 3372 3373 /* Assemble all remaining stuff needed to apply BDDC */ 3374 { 3375 Mat A_RV,A_VR,A_VV; 3376 Mat M1; 3377 Mat C_CR; 3378 Mat AUXMAT; 3379 Vec vec1_C; 3380 Vec vec2_C; 3381 Vec vec1_V; 3382 Vec vec2_V; 3383 PetscInt *nnz; 3384 PetscInt *auxindices; 3385 PetscInt index; 3386 PetscScalar *array2; 3387 MatFactorInfo matinfo; 3388 3389 /* Allocating some extra storage just to be safe */ 3390 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3391 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 3392 for (i=0;i<pcis->n;i++) auxindices[i]=i; 3393 3394 /* some work vectors on vertices and/or constraints */ 3395 if (n_vertices) { 3396 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 3397 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 3398 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 3399 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 3400 } 3401 if (n_constraints) { 3402 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 3403 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 3404 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 3405 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 3406 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 3407 } 3408 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3409 if (n_constraints) { 3410 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3411 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 3412 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 3413 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,NULL);CHKERRQ(ierr); 3414 3415 /* Create Constraint matrix on R nodes: C_{CR} */ 3416 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3417 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 3418 3419 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 3420 for (i=0; i<n_constraints; i++) { 3421 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 3422 3423 /* Get row of constraint matrix in R numbering */ 3424 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 3425 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3426 for (j=0;j<size_of_constraint;j++) array[row_cmat_indices[j]] = -row_cmat_values[j]; 3427 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3428 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 3429 3430 /* Solve for row of constraint matrix in R numbering */ 3431 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3432 3433 /* Set values */ 3434 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 3435 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3436 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 3437 } 3438 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3439 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3440 3441 /* Assemble AUXMAT = (LUFactor)(-C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 3442 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 3443 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 3444 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 3445 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 3446 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3447 3448 /* Assemble explicitly M1 = (C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} needed in preproc */ 3449 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 3450 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 3451 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 3452 ierr = MatSeqDenseSetPreallocation(M1,NULL);CHKERRQ(ierr); 3453 for (i=0; i<n_constraints; i++) { 3454 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 3455 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 3456 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 3457 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 3458 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 3459 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 3460 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 3461 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3462 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 3463 } 3464 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3465 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3466 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3467 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 3468 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3469 3470 } 3471 3472 /* Get submatrices from subdomain matrix */ 3473 if (n_vertices) { 3474 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3475 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3476 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3477 } 3478 3479 /* Matrix of coarse basis functions (local) */ 3480 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3481 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 3482 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 3483 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,NULL);CHKERRQ(ierr); 3484 if (pcbddc->inexact_prec_type || dbg_flag) { 3485 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3486 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 3487 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 3488 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,NULL);CHKERRQ(ierr); 3489 } 3490 3491 if (dbg_flag) { 3492 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 3493 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 3494 } 3495 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3496 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 3497 3498 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3499 for (i=0; i<n_vertices; i++) { 3500 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 3501 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 3502 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 3503 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 3504 /* solution of saddle point problem */ 3505 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 3506 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3507 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 3508 if (n_constraints) { 3509 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 3510 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3511 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3512 } 3513 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 3514 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 3515 3516 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3517 /* coarse basis functions */ 3518 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3519 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3520 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3521 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3522 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3523 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3524 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3525 if (pcbddc->inexact_prec_type || dbg_flag) { 3526 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3527 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3528 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3529 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3530 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3531 } 3532 /* subdomain contribution to coarse matrix */ 3533 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3534 for (j=0; j<n_vertices; j++) coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; /* WARNING -> column major ordering */ 3535 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3536 if (n_constraints) { 3537 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3538 for (j=0; j<n_constraints; j++) coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; /* WARNING -> column major ordering */ 3539 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3540 } 3541 3542 if (dbg_flag) { 3543 /* assemble subdomain vector on nodes */ 3544 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3545 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3546 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3547 for (j=0;j<n_R;j++) array[idx_R_local[j]] = array2[j]; 3548 array[vertices[i]] = one; 3549 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3550 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3551 3552 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3553 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3554 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3555 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3556 for (j=0;j<n_vertices;j++) array2[j]=array[j]; 3557 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3558 if (n_constraints) { 3559 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3560 for (j=0;j<n_constraints;j++) array2[j+n_vertices]=array[j]; 3561 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3562 } 3563 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3564 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3565 3566 /* check saddle point solution */ 3567 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3568 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3569 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3570 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3571 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3572 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3573 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3574 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3575 } 3576 } 3577 3578 for (i=0; i<n_constraints; i++) { 3579 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3580 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3581 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3582 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3583 3584 /* solution of saddle point problem */ 3585 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3586 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3587 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3588 if (n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3589 3590 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3591 /* coarse basis functions */ 3592 index=i+n_vertices; 3593 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3594 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3595 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3596 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3597 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3598 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3599 if (pcbddc->inexact_prec_type || dbg_flag) { 3600 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3601 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3602 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3603 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3604 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3605 } 3606 3607 /* subdomain contribution to coarse matrix */ 3608 if (n_vertices) { 3609 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3610 for (j=0; j<n_vertices; j++) coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j]; /* WARNING -> column major ordering */ 3611 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3612 } 3613 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3614 for (j=0; j<n_constraints; j++) coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j]; /* WARNING -> column major ordering */ 3615 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3616 3617 if (dbg_flag) { 3618 /* assemble subdomain vector on nodes */ 3619 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3620 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3621 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3622 for (j=0;j<n_R;j++) array[idx_R_local[j]] = array2[j]; 3623 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3624 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3625 3626 /* assemble subdomain vector of lagrange multipliers */ 3627 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3628 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3629 if (n_vertices) { 3630 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3631 for (j=0;j<n_vertices;j++) array2[j]=-array[j]; 3632 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3633 } 3634 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3635 for (j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3636 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3637 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3638 3639 /* check saddle point solution */ 3640 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3641 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3642 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3643 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3644 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3645 array[index] = array[index]+m_one; /* shift by the identity matrix */ 3646 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3647 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3648 } 3649 } 3650 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3651 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3652 if (pcbddc->inexact_prec_type || dbg_flag) { 3653 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3654 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3655 } 3656 /* Checking coarse_sub_mat and coarse basis functios */ 3657 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3658 if (dbg_flag) { 3659 Mat coarse_sub_mat; 3660 Mat TM1,TM2,TM3,TM4; 3661 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3662 MatType checkmattype=MATSEQAIJ; 3663 PetscScalar value; 3664 3665 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3666 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3667 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3668 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3669 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3670 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3671 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3672 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3673 3674 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3675 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3676 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3677 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3678 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3679 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3680 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3681 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3682 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3683 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3684 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3685 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3686 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3687 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3688 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3689 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3690 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3691 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3692 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3693 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3694 for (i=0; i<pcbddc->local_primal_size; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3695 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3696 for (i=0; i<pcbddc->local_primal_size; i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3697 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3698 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3699 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3700 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3701 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3702 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3703 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3704 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3705 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3706 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3707 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3708 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3709 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3710 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3711 } 3712 /* free memory */ 3713 if (n_vertices) { 3714 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3715 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3716 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3717 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3718 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3719 } 3720 if (n_constraints) { 3721 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3722 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3723 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3724 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3725 } 3726 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3727 ierr = PetscFree(nnz);CHKERRQ(ierr); 3728 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3729 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3730 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3731 } 3732 /* free memory */ 3733 if (n_vertices) { 3734 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3735 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3736 } 3737 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3738 PetscFunctionReturn(0); 3739 } 3740 3741 /* -------------------------------------------------------------------------- */ 3742 3743 #undef __FUNCT__ 3744 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3745 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3746 { 3747 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3748 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3749 PC_IS *pcis = (PC_IS*)pc->data; 3750 MPI_Comm prec_comm; 3751 MPI_Comm coarse_comm; 3752 3753 /* common to all choiches */ 3754 PetscScalar *temp_coarse_mat_vals; 3755 PetscScalar *ins_coarse_mat_vals; 3756 PetscInt *ins_local_primal_indices; 3757 PetscMPIInt *localsizes2,*localdispl2; 3758 PetscMPIInt size_prec_comm; 3759 PetscMPIInt rank_prec_comm; 3760 PetscMPIInt active_rank=MPI_PROC_NULL; 3761 PetscMPIInt master_proc=0; 3762 PetscInt ins_local_primal_size; 3763 3764 /* specific to MULTILEVEL_BDDC */ 3765 PetscMPIInt *ranks_recv; 3766 PetscMPIInt count_recv=0; 3767 PetscMPIInt rank_coarse_proc_send_to; 3768 PetscMPIInt coarse_color = MPI_UNDEFINED; 3769 ISLocalToGlobalMapping coarse_ISLG; 3770 3771 /* some other variables */ 3772 PetscErrorCode ierr; 3773 MatType coarse_mat_type; 3774 PCType coarse_pc_type; 3775 KSPType coarse_ksp_type; 3776 PC pc_temp; 3777 PetscInt i,j,k; 3778 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3779 3780 /* verbose output viewer */ 3781 PetscViewer viewer = pcbddc->dbg_viewer; 3782 PetscBool dbg_flag = pcbddc->dbg_flag; 3783 3784 PetscInt offset,offset2; 3785 PetscMPIInt im_active,active_procs; 3786 PetscInt *dnz,*onz; 3787 3788 PetscBool setsym,issym=PETSC_FALSE; 3789 3790 PetscFunctionBegin; 3791 ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr); 3792 ins_local_primal_indices = 0; 3793 ins_coarse_mat_vals = 0; 3794 localsizes2 = 0; 3795 localdispl2 = 0; 3796 temp_coarse_mat_vals = 0; 3797 coarse_ISLG = 0; 3798 3799 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3800 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3801 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3802 3803 /* Assign global numbering to coarse dofs */ 3804 { 3805 PetscInt *auxlocal_primal; 3806 PetscInt *row_cmat_indices; 3807 PetscInt *aux_ordering; 3808 PetscInt *row_cmat_global_indices; 3809 PetscInt *dof_sizes,*dof_displs; 3810 PetscInt size_of_constraint; 3811 PetscBool *array_bool; 3812 PetscBool first_found; 3813 PetscInt first_index,old_index,s; 3814 PetscMPIInt mpi_local_primal_size; 3815 PetscScalar coarsesum,*array; 3816 3817 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3818 3819 /* Construct needed data structures for message passing */ 3820 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3821 j = 0; 3822 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3823 j = size_prec_comm; 3824 } 3825 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3826 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3827 /* Gather local_primal_size information for all processes */ 3828 if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3829 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3830 } else { 3831 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3832 } 3833 pcbddc->replicated_primal_size = 0; 3834 for (i=0; i<j; i++) { 3835 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size; 3836 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3837 } 3838 3839 /* First let's count coarse dofs. 3840 This code fragment assumes that the number of local constraints per connected component 3841 is not greater than the number of nodes defined for the connected component 3842 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3843 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3844 j = 0; 3845 for (i=0; i<pcbddc->local_primal_size; i++) { 3846 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3847 if (j < size_of_constraint) j = size_of_constraint; 3848 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3849 } 3850 ierr = PetscMalloc(j*sizeof(PetscInt),&aux_ordering);CHKERRQ(ierr); 3851 ierr = PetscMalloc(j*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 3852 ierr = PetscMalloc(pcis->n*sizeof(PetscBool),&array_bool);CHKERRQ(ierr); 3853 for (i=0;i<pcis->n;i++) array_bool[i] = PETSC_FALSE; 3854 3855 for (i=0;i<pcbddc->local_primal_size;i++) { 3856 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3857 for (j=0; j<size_of_constraint; j++) aux_ordering[j] = j; 3858 3859 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3860 ierr = PetscSortIntWithPermutation(size_of_constraint,row_cmat_global_indices,aux_ordering);CHKERRQ(ierr); 3861 for (j=0; j<size_of_constraint; j++) { 3862 k = row_cmat_indices[aux_ordering[j]]; 3863 if (!array_bool[k]) { 3864 array_bool[k] = PETSC_TRUE; 3865 auxlocal_primal[i] = k; 3866 break; 3867 } 3868 } 3869 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3870 } 3871 ierr = PetscFree(aux_ordering);CHKERRQ(ierr); 3872 ierr = PetscFree(array_bool);CHKERRQ(ierr); 3873 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3874 3875 /* Compute number of coarse dofs */ 3876 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3877 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3878 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]] = 1.0; 3879 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3880 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3881 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3882 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3883 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3884 pcbddc->coarse_size = (PetscInt)coarsesum; 3885 3886 /* Fill pcis->vec1_global with cumulative function for global numbering */ 3887 ierr = VecGetArray(pcis->vec1_global,&array);CHKERRQ(ierr); 3888 ierr = VecGetLocalSize(pcis->vec1_global,&s);CHKERRQ(ierr); 3889 k = 0; 3890 first_index = -1; 3891 first_found = PETSC_FALSE; 3892 for (i=0; i<s; i++) { 3893 if (!first_found && array[i] > 0.0) { 3894 first_found = PETSC_TRUE; 3895 first_index = i; 3896 } 3897 k += (PetscInt)array[i]; 3898 } 3899 j = (!rank_prec_comm ? size_prec_comm : 0); 3900 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 3901 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 3902 ierr = MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3903 if (!rank_prec_comm) { 3904 dof_displs[0]=0; 3905 for (i=1;i<size_prec_comm;i++) dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3906 } 3907 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3908 if (first_found) { 3909 array[first_index] += k; 3910 old_index = first_index; 3911 for (i=first_index+1;i<s;i++) { 3912 if (array[i] > 0.0) { 3913 array[i] += array[old_index]; 3914 old_index = i; 3915 } 3916 } 3917 } 3918 ierr = VecRestoreArray(pcis->vec1_global,&array);CHKERRQ(ierr); 3919 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3920 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3921 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3922 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3923 for (i=0;i<pcbddc->local_primal_size;i++) pcbddc->local_primal_indices[i] = (PetscInt)array[auxlocal_primal[i]]-1; 3924 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3925 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3926 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3927 3928 if (dbg_flag) { 3929 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3930 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3931 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 3932 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3933 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3934 for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0; 3935 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3936 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3937 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3938 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3939 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3940 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3941 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3942 for (i=0;i<pcis->n;i++) { 3943 if (array[i] == 1.0) { 3944 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 3945 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 3946 } 3947 } 3948 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3949 for (i=0;i<pcis->n;i++) { 3950 if (array[i] > 0.0) array[i] = 1.0/array[i]; 3951 } 3952 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3953 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3954 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3955 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3956 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3957 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 3958 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3959 } 3960 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3961 } 3962 3963 if (dbg_flag) { 3964 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3965 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3966 } 3967 3968 im_active = 0; 3969 if (pcis->n) im_active = 1; 3970 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 3971 3972 /* adapt coarse problem type */ 3973 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3974 if (pcbddc->current_level < pcbddc->max_levels) { 3975 if ((active_procs/pcbddc->coarsening_ratio) < 2) { 3976 if (dbg_flag) { 3977 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3978 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3979 } 3980 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3981 } 3982 } else { 3983 if (dbg_flag) { 3984 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3985 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3986 } 3987 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3988 } 3989 } 3990 3991 switch (pcbddc->coarse_problem_type) { 3992 3993 case (MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3994 { 3995 /* we need additional variables */ 3996 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3997 MetisInt *metis_coarse_subdivision; 3998 MetisInt options[METIS_NOPTIONS]; 3999 PetscMPIInt size_coarse_comm,rank_coarse_comm; 4000 PetscMPIInt procs_jumps_coarse_comm; 4001 PetscMPIInt *coarse_subdivision; 4002 PetscMPIInt *total_count_recv; 4003 PetscMPIInt *total_ranks_recv; 4004 PetscMPIInt *displacements_recv; 4005 PetscMPIInt *my_faces_connectivity; 4006 PetscMPIInt *petsc_faces_adjncy; 4007 MetisInt *faces_adjncy; 4008 MetisInt *faces_xadj; 4009 PetscMPIInt *number_of_faces; 4010 PetscMPIInt *faces_displacements; 4011 PetscInt *array_int; 4012 PetscMPIInt my_faces =0; 4013 PetscMPIInt total_faces=0; 4014 PetscInt ranks_stretching_ratio; 4015 4016 /* define some quantities */ 4017 pcbddc->coarse_communications_type = SCATTERS_BDDC; 4018 coarse_mat_type = MATIS; 4019 coarse_pc_type = PCBDDC; 4020 coarse_ksp_type = KSPRICHARDSON; 4021 4022 /* details of coarse decomposition */ 4023 n_subdomains = active_procs; 4024 n_parts = n_subdomains/pcbddc->coarsening_ratio; 4025 ranks_stretching_ratio = size_prec_comm/active_procs; 4026 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 4027 4028 #if 0 4029 PetscMPIInt *old_ranks; 4030 PetscInt *new_ranks,*jj,*ii; 4031 MatPartitioning mat_part; 4032 IS coarse_new_decomposition,is_numbering; 4033 PetscViewer viewer_test; 4034 MPI_Comm test_coarse_comm; 4035 PetscMPIInt test_coarse_color; 4036 Mat mat_adj; 4037 /* Create new communicator for coarse problem splitting the old one */ 4038 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 4039 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 4040 test_coarse_color = (im_active ? 0 : MPI_UNDEFINED); 4041 test_coarse_comm = MPI_COMM_NULL; 4042 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 4043 if (im_active) { 4044 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 4045 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 4046 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 4047 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 4048 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 4049 for (i=0; i<size_prec_comm; i++) 4050 new_ranks[i] = -1; 4051 for (i=0; i<n_subdomains; i++) 4052 new_ranks[old_ranks[i]] = i; 4053 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 4054 k = pcis->n_neigh-1; 4055 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 4056 ii[0]=0; 4057 ii[1]=k; 4058 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 4059 for (i=0; i<k; i++) 4060 jj[i]=new_ranks[pcis->neigh[i+1]]; 4061 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 4062 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr); 4063 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 4064 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 4065 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 4066 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 4067 printf("Setting Nparts %d\n",n_parts); 4068 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 4069 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 4070 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 4071 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 4072 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 4073 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 4074 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 4075 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 4076 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 4077 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 4078 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 4079 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 4080 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 4081 } 4082 #endif 4083 4084 /* build CSR graph of subdomains' connectivity */ 4085 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 4086 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 4087 for (i=1;i<pcis->n_neigh;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 4088 for (j=0;j<pcis->n_shared[i];j++) { 4089 array_int[pcis->shared[i][j]]+=1; 4090 } 4091 } 4092 for (i=1;i<pcis->n_neigh;i++) { 4093 for (j=0;j<pcis->n_shared[i];j++) { 4094 if (array_int[pcis->shared[i][j]] > 0) { 4095 my_faces++; 4096 break; 4097 } 4098 } 4099 } 4100 4101 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 4102 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 4103 my_faces=0; 4104 for (i=1;i<pcis->n_neigh;i++) { 4105 for (j=0;j<pcis->n_shared[i];j++) { 4106 if (array_int[pcis->shared[i][j]] > 0) { 4107 my_faces_connectivity[my_faces]=pcis->neigh[i]; 4108 my_faces++; 4109 break; 4110 } 4111 } 4112 } 4113 if (rank_prec_comm == master_proc) { 4114 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 4115 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 4116 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 4117 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 4118 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 4119 } 4120 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4121 if (rank_prec_comm == master_proc) { 4122 faces_xadj[0] = 0; 4123 faces_displacements[0] = 0; 4124 4125 j=0; 4126 for (i=1;i<size_prec_comm+1;i++) { 4127 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 4128 if (number_of_faces[i-1]) { 4129 j++; 4130 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 4131 } 4132 } 4133 } 4134 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4135 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 4136 ierr = PetscFree(array_int);CHKERRQ(ierr); 4137 if (rank_prec_comm == master_proc) { 4138 for (i=0; i<total_faces; i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 4139 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 4140 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 4141 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 4142 } 4143 4144 if (rank_prec_comm == master_proc) { 4145 PetscInt heuristic_for_metis=3; 4146 ncon =1; 4147 faces_nvtxs=n_subdomains; 4148 /* partition graoh induced by face connectivity */ 4149 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 4150 ierr = METIS_SetDefaultOptions(options); 4151 /* we need a contiguous partition of the coarse mesh */ 4152 options[METIS_OPTION_CONTIG]=1; 4153 options[METIS_OPTION_NITER] =30; 4154 if (pcbddc->coarsening_ratio > 1) { 4155 if (n_subdomains>n_parts*heuristic_for_metis) { 4156 options[METIS_OPTION_IPTYPE] =METIS_IPTYPE_EDGE; 4157 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 4158 4159 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 4160 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 4161 } else { 4162 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 4163 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 4164 } 4165 } else { 4166 for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i; 4167 } 4168 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 4169 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 4170 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 4171 4172 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 4173 for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 4174 for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 4175 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 4176 } 4177 4178 /* Create new communicator for coarse problem splitting the old one */ 4179 if (!(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts) { 4180 coarse_color=0; /* for communicator splitting */ 4181 active_rank =rank_prec_comm; /* for insertion of matrix values */ 4182 } 4183 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 4184 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 4185 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 4186 4187 if (coarse_color == 0) { 4188 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 4189 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 4190 } else { 4191 rank_coarse_comm = MPI_PROC_NULL; 4192 } 4193 4194 /* master proc take care of arranging and distributing coarse information */ 4195 if (rank_coarse_comm == master_proc) { 4196 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 4197 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 4198 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 4199 /* some initializations */ 4200 displacements_recv[0]=0; 4201 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 4202 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 4203 for (j=0; j<size_coarse_comm; j++) { 4204 for (i=0; i<size_prec_comm; i++) { 4205 if (coarse_subdivision[i]==j) total_count_recv[j]++; 4206 } 4207 } 4208 /* displacements needed for scatterv of total_ranks_recv */ 4209 for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 4210 4211 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 4212 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 4213 for (j=0; j<size_coarse_comm; j++) { 4214 for (i=0; i<size_prec_comm; i++) { 4215 if (coarse_subdivision[i]==j) { 4216 total_ranks_recv[displacements_recv[j]+total_count_recv[j]] = i; 4217 4218 total_count_recv[j] += 1; 4219 } 4220 } 4221 } 4222 /* for (j=0;j<size_coarse_comm;j++) { 4223 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 4224 for (i=0;i<total_count_recv[j];i++) { 4225 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 4226 } 4227 printf("\n"); 4228 } */ 4229 4230 /* identify new decomposition in terms of ranks in the old communicator */ 4231 for (i=0; i<n_subdomains; i++) { 4232 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 4233 } 4234 /* printf("coarse_subdivision in old end new ranks\n"); 4235 for (i=0;i<size_prec_comm;i++) { 4236 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 4237 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 4238 } else { 4239 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 4240 } 4241 } 4242 printf("\n"); */ 4243 } 4244 4245 /* Scatter new decomposition for send details */ 4246 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4247 /* Scatter receiving details to members of coarse decomposition */ 4248 if (coarse_color == 0) { 4249 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 4250 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 4251 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 4252 } 4253 4254 /* printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 4255 if (coarse_color == 0) { 4256 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 4257 for (i=0;i<count_recv;i++) 4258 printf("%d ",ranks_recv[i]); 4259 printf("\n"); 4260 } */ 4261 4262 if (rank_prec_comm == master_proc) { 4263 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 4264 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 4265 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 4266 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 4267 } 4268 break; 4269 } 4270 4271 case (REPLICATED_BDDC): 4272 4273 pcbddc->coarse_communications_type = GATHERS_BDDC; 4274 coarse_mat_type = MATSEQAIJ; 4275 coarse_pc_type = PCLU; 4276 coarse_ksp_type = KSPPREONLY; 4277 coarse_comm = PETSC_COMM_SELF; 4278 active_rank = rank_prec_comm; 4279 break; 4280 4281 case (PARALLEL_BDDC): 4282 4283 pcbddc->coarse_communications_type = SCATTERS_BDDC; 4284 coarse_mat_type = MATMPIAIJ; 4285 coarse_pc_type = PCREDUNDANT; 4286 coarse_ksp_type = KSPPREONLY; 4287 coarse_comm = prec_comm; 4288 active_rank = rank_prec_comm; 4289 break; 4290 4291 case (SEQUENTIAL_BDDC): 4292 pcbddc->coarse_communications_type = GATHERS_BDDC; 4293 coarse_mat_type = MATSEQAIJ; 4294 coarse_pc_type = PCLU; 4295 coarse_ksp_type = KSPPREONLY; 4296 coarse_comm = PETSC_COMM_SELF; 4297 active_rank = master_proc; 4298 break; 4299 } 4300 4301 switch (pcbddc->coarse_communications_type) { 4302 4303 case(SCATTERS_BDDC): 4304 { 4305 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 4306 4307 IS coarse_IS; 4308 4309 if (pcbddc->coarsening_ratio == 1) { 4310 ins_local_primal_size = pcbddc->local_primal_size; 4311 ins_local_primal_indices = pcbddc->local_primal_indices; 4312 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 4313 /* nonzeros */ 4314 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 4315 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 4316 for (i=0;i<ins_local_primal_size;i++) { 4317 dnz[i] = ins_local_primal_size; 4318 } 4319 } else { 4320 PetscMPIInt send_size; 4321 PetscMPIInt *send_buffer; 4322 PetscInt *aux_ins_indices; 4323 PetscInt ii,jj; 4324 MPI_Request *requests; 4325 4326 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 4327 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 4328 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 4329 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 4330 pcbddc->replicated_primal_size = count_recv; 4331 j = 0; 4332 for (i=0;i<count_recv;i++) { 4333 pcbddc->local_primal_displacements[i] = j; 4334 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 4335 } 4336 pcbddc->local_primal_displacements[count_recv] = j; 4337 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 4338 /* allocate auxiliary space */ 4339 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 4340 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 4341 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 4342 /* allocate stuffs for message massing */ 4343 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 4344 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 4345 /* send indices to be inserted */ 4346 for (i=0;i<count_recv;i++) { 4347 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 4348 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 4349 } 4350 if (rank_coarse_proc_send_to != MPI_PROC_NULL) { 4351 send_size = pcbddc->local_primal_size; 4352 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4353 for (i=0;i<send_size;i++) { 4354 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 4355 } 4356 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 4357 } 4358 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4359 if (rank_coarse_proc_send_to != MPI_PROC_NULL) { 4360 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4361 } 4362 j = 0; 4363 for (i=0;i<count_recv;i++) { 4364 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 4365 localsizes2[i] = ii*ii; 4366 localdispl2[i] = j; 4367 j += localsizes2[i]; 4368 jj = pcbddc->local_primal_displacements[i]; 4369 /* it counts the coarse subdomains sharing the coarse node */ 4370 for (k=0;k<ii;k++) { 4371 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 4372 } 4373 } 4374 /* temp_coarse_mat_vals used to store matrix values to be received */ 4375 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 4376 /* evaluate how many values I will insert in coarse mat */ 4377 ins_local_primal_size = 0; 4378 for (i=0;i<pcbddc->coarse_size;i++) { 4379 if (aux_ins_indices[i]) { 4380 ins_local_primal_size++; 4381 } 4382 } 4383 /* evaluate indices I will insert in coarse mat */ 4384 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4385 j = 0; 4386 for (i=0;i<pcbddc->coarse_size;i++) { 4387 if (aux_ins_indices[i]) { 4388 ins_local_primal_indices[j] = i; 4389 j++; 4390 } 4391 } 4392 /* processes partecipating in coarse problem receive matrix data from their friends */ 4393 for (i=0;i<count_recv;i++) { 4394 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 4395 } 4396 if (rank_coarse_proc_send_to != MPI_PROC_NULL) { 4397 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 4398 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 4399 } 4400 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4401 /* nonzeros */ 4402 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 4403 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 4404 /* use aux_ins_indices to realize a global to local mapping */ 4405 j=0; 4406 for (i=0;i<pcbddc->coarse_size;i++) { 4407 if (aux_ins_indices[i]==0) { 4408 aux_ins_indices[i]=-1; 4409 } else { 4410 aux_ins_indices[i]=j; 4411 j++; 4412 } 4413 } 4414 for (i=0;i<count_recv;i++) { 4415 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 4416 for (k=0;k<j;k++) { 4417 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 4418 } 4419 } 4420 /* check */ 4421 for (i=0;i<ins_local_primal_size;i++) { 4422 if (dnz[i] > ins_local_primal_size) { 4423 dnz[i] = ins_local_primal_size; 4424 } 4425 } 4426 ierr = PetscFree(requests);CHKERRQ(ierr); 4427 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 4428 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 4429 } 4430 /* create local to global mapping needed by coarse MATIS */ 4431 if (coarse_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 4432 coarse_comm = prec_comm; 4433 active_rank = rank_prec_comm; 4434 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 4435 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 4436 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 4437 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 4438 /* arrays for values insertion */ 4439 ins_local_primal_size = pcbddc->local_primal_size; 4440 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4441 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 4442 for (j=0;j<ins_local_primal_size;j++) { 4443 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 4444 for (i=0;i<ins_local_primal_size;i++) { 4445 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 4446 } 4447 } 4448 } 4449 break; 4450 } 4451 4452 case (GATHERS_BDDC): 4453 { 4454 PetscMPIInt mysize,mysize2; 4455 PetscMPIInt *send_buffer; 4456 4457 if (rank_prec_comm==active_rank) { 4458 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 4459 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 4460 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 4461 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 4462 /* arrays for values insertion */ 4463 for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 4464 localdispl2[0]=0; 4465 for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 4466 j = 0; 4467 for (i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 4468 ierr = PetscMalloc (j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 4469 } 4470 4471 mysize =pcbddc->local_primal_size; 4472 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 4473 ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4474 for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 4475 4476 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC) { 4477 ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4478 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 4479 } else { 4480 ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 4481 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 4482 } 4483 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4484 break; 4485 }/* switch on coarse problem and communications associated with finished */ 4486 } 4487 4488 /* Now create and fill up coarse matrix */ 4489 if (rank_prec_comm == active_rank) { 4490 4491 Mat matis_coarse_local_mat; 4492 4493 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 4494 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 4495 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 4496 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 4497 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 4498 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 4499 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4500 } else { 4501 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 4502 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 4503 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 4504 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 4505 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 4506 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4507 } 4508 /* preallocation */ 4509 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 4510 4511 PetscInt lrows,lcols; 4512 4513 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 4514 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 4515 4516 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 4517 4518 Vec vec_dnz,vec_onz; 4519 PetscScalar *my_dnz,*my_onz,*array; 4520 PetscInt *mat_ranges,*row_ownership; 4521 PetscInt coarse_index_row,coarse_index_col,owner; 4522 4523 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 4524 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,pcbddc->coarse_size);CHKERRQ(ierr); 4525 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 4526 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 4527 4528 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 4529 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 4530 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 4531 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 4532 4533 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 4534 ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 4535 for (i=0; i<size_prec_comm; i++) { 4536 for (j=mat_ranges[i]; j<mat_ranges[i+1]; j++) { 4537 row_ownership[j]=i; 4538 } 4539 } 4540 4541 for (i=0; i<pcbddc->local_primal_size; i++) { 4542 coarse_index_row = pcbddc->local_primal_indices[i]; 4543 owner = row_ownership[coarse_index_row]; 4544 for (j=i; j<pcbddc->local_primal_size; j++) { 4545 owner = row_ownership[coarse_index_row]; 4546 coarse_index_col = pcbddc->local_primal_indices[j]; 4547 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1]) { 4548 my_dnz[i] += 1.0; 4549 } else { 4550 my_onz[i] += 1.0; 4551 } 4552 if (i != j) { 4553 owner = row_ownership[coarse_index_col]; 4554 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1]) { 4555 my_dnz[j] += 1.0; 4556 } else { 4557 my_onz[j] += 1.0; 4558 } 4559 } 4560 } 4561 } 4562 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 4563 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 4564 if (pcbddc->local_primal_size) { 4565 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 4566 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 4567 } 4568 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 4569 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 4570 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 4571 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 4572 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 4573 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 4574 for (i=0; i<j; i++) dnz[i] = (PetscInt)array[i]; 4575 4576 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 4577 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 4578 for (i=0;i<j;i++) onz[i] = (PetscInt)array[i]; 4579 4580 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 4581 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 4582 ierr = PetscFree(my_onz);CHKERRQ(ierr); 4583 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 4584 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 4585 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 4586 } else { 4587 for (k=0;k<size_prec_comm;k++) { 4588 offset=pcbddc->local_primal_displacements[k]; 4589 offset2=localdispl2[k]; 4590 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 4591 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4592 for (j=0;j<ins_local_primal_size;j++) { 4593 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4594 } 4595 for (j=0;j<ins_local_primal_size;j++) { 4596 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 4597 } 4598 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4599 } 4600 } 4601 4602 /* check */ 4603 for (i=0;i<lrows;i++) { 4604 if (dnz[i]>lcols) dnz[i]=lcols; 4605 if (onz[i]>pcbddc->coarse_size-lcols) onz[i]=pcbddc->coarse_size-lcols; 4606 } 4607 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr); 4608 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr); 4609 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4610 } else { 4611 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 4612 ierr = PetscFree(dnz);CHKERRQ(ierr); 4613 } 4614 4615 /* insert values */ 4616 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 4617 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4618 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4619 if (pcbddc->coarsening_ratio == 1) { 4620 ins_coarse_mat_vals = coarse_submat_vals; 4621 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 4622 } else { 4623 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4624 for (k=0; k<pcbddc->replicated_primal_size; k++) { 4625 offset = pcbddc->local_primal_displacements[k]; 4626 offset2 = localdispl2[k]; 4627 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 4628 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4629 for (j=0; j<ins_local_primal_size; j++) { 4630 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4631 } 4632 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 4633 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4634 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4635 } 4636 } 4637 ins_local_primal_indices = 0; 4638 ins_coarse_mat_vals = 0; 4639 } else { 4640 for (k=0; k<size_prec_comm; k++) { 4641 offset =pcbddc->local_primal_displacements[k]; 4642 offset2 =localdispl2[k]; 4643 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 4644 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4645 for (j=0; j<ins_local_primal_size; j++) { 4646 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4647 } 4648 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 4649 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4650 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4651 } 4652 ins_local_primal_indices = 0; 4653 ins_coarse_mat_vals = 0; 4654 } 4655 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4656 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4657 /* symmetry of coarse matrix */ 4658 if (issym) { 4659 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4660 } 4661 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 4662 } 4663 4664 /* create loc to glob scatters if needed */ 4665 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 4666 IS local_IS,global_IS; 4667 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 4668 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 4669 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4670 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 4671 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 4672 } 4673 4674 /* free memory no longer needed */ 4675 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 4676 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 4677 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 4678 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 4679 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 4680 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 4681 4682 /* Eval coarse null space */ 4683 if (pcbddc->NullSpace) { 4684 const Vec *nsp_vecs; 4685 PetscInt nsp_size,coarse_nsp_size; 4686 PetscBool nsp_has_cnst; 4687 PetscReal test_null; 4688 Vec *coarse_nsp_vecs; 4689 4690 coarse_nsp_size = 0; 4691 coarse_nsp_vecs = 0; 4692 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); 4693 if (rank_prec_comm == active_rank) { 4694 ierr = PetscMalloc((nsp_size+1)*sizeof(Vec),&coarse_nsp_vecs);CHKERRQ(ierr); 4695 for (i=0; i<nsp_size+1; i++) { 4696 ierr = VecDuplicate(pcbddc->coarse_vec,&coarse_nsp_vecs[i]);CHKERRQ(ierr); 4697 } 4698 } 4699 if (nsp_has_cnst) { 4700 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4701 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 4702 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4703 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4704 if (rank_prec_comm == active_rank) { 4705 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4706 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&test_null);CHKERRQ(ierr); 4707 if (test_null > 1.0e-12 && pcbddc->dbg_flag) { 4708 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Constant coarse null space error % 1.14e\n",test_null);CHKERRQ(ierr); 4709 } 4710 ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); 4711 coarse_nsp_size++; 4712 } 4713 } 4714 for (i=0; i<nsp_size; i++) { 4715 ierr = VecScatterBegin(matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4716 ierr = VecScatterEnd (matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4717 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 4718 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4719 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4720 if (rank_prec_comm == active_rank) { 4721 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4722 ierr = VecNorm(pcbddc->coarse_rhs,NORM_2,&test_null);CHKERRQ(ierr); 4723 if (test_null > 1.0e-12 && pcbddc->dbg_flag) { 4724 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Vec %d coarse null space error % 1.14e\n",i,test_null);CHKERRQ(ierr); 4725 } 4726 ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); 4727 coarse_nsp_size++; 4728 } 4729 } 4730 if (coarse_nsp_size > 0) { 4731 /* TODO orthonormalize vecs */ 4732 ierr = VecNormalize(coarse_nsp_vecs[0],NULL);CHKERRQ(ierr); 4733 ierr = MatNullSpaceCreate(coarse_comm,PETSC_FALSE,coarse_nsp_size,coarse_nsp_vecs,&pcbddc->CoarseNullSpace);CHKERRQ(ierr); 4734 for (i=0; i<nsp_size+1; i++) { 4735 ierr = VecDestroy(&coarse_nsp_vecs[i]);CHKERRQ(ierr); 4736 } 4737 } 4738 ierr = PetscFree(coarse_nsp_vecs);CHKERRQ(ierr); 4739 } 4740 4741 /* KSP for coarse problem */ 4742 if (rank_prec_comm == active_rank) { 4743 PetscBool isbddc=PETSC_FALSE; 4744 4745 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 4746 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4747 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4748 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 4749 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4750 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4751 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4752 /* Allow user's customization */ 4753 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 4754 /* Set Up PC for coarse problem BDDC */ 4755 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4756 i = pcbddc->current_level+1; 4757 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 4758 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4759 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4760 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 4761 if (pcbddc->CoarseNullSpace) { ierr = PCBDDCSetNullSpace(pc_temp,pcbddc->CoarseNullSpace);CHKERRQ(ierr); } 4762 if (dbg_flag) { 4763 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 4764 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4765 } 4766 } 4767 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4768 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4769 4770 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr); 4771 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4772 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4773 if (j == 1) { 4774 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4775 if (isbddc) { 4776 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 4777 } 4778 } 4779 } 4780 /* Check coarse problem if requested */ 4781 if (dbg_flag && rank_prec_comm == active_rank) { 4782 KSP check_ksp; 4783 PC check_pc; 4784 Vec check_vec; 4785 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 4786 KSPType check_ksp_type; 4787 4788 /* Create ksp object suitable for extreme eigenvalues' estimation */ 4789 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 4790 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4791 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4792 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4793 if (issym) check_ksp_type = KSPCG; 4794 else check_ksp_type = KSPGMRES; 4795 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 4796 } else { 4797 check_ksp_type = KSPPREONLY; 4798 } 4799 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4800 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4801 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4802 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4803 /* create random vec */ 4804 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 4805 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4806 if (pcbddc->CoarseNullSpace) { ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,check_vec,NULL);CHKERRQ(ierr); } 4807 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4808 /* solve coarse problem */ 4809 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 4810 if (pcbddc->CoarseNullSpace) { ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,NULL);CHKERRQ(ierr); } 4811 /* check coarse problem residual error */ 4812 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 4813 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4814 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4815 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4816 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4817 /* get eigenvalue estimation if inexact */ 4818 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4819 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 4820 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 4821 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 4822 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 4823 } 4824 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 4825 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 4826 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4827 } 4828 if (dbg_flag) { ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } 4829 PetscFunctionReturn(0); 4830 } 4831 4832 #undef __FUNCT__ 4833 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 4834 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 4835 { 4836 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4837 PC_IS *pcis = (PC_IS*)pc->data; 4838 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4839 PCBDDCGraph mat_graph=pcbddc->mat_graph; 4840 PetscInt *is_indices,*auxis; 4841 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 4842 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 4843 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 4844 PetscBool same_set; 4845 MPI_Comm interface_comm; 4846 PetscBool use_faces = PETSC_FALSE,use_edges = PETSC_FALSE; 4847 const PetscInt *neumann_nodes; 4848 const PetscInt *dirichlet_nodes; 4849 IS used_IS,*custom_ISForDofs; 4850 PetscScalar *array; 4851 PetscScalar *array2; 4852 PetscViewer viewer=pcbddc->dbg_viewer; 4853 PetscInt *queue_in_global_numbering; 4854 4855 PetscFunctionBegin; 4856 ierr = PetscObjectGetComm((PetscObject)pc,&interface_comm);CHKERRQ(ierr); 4857 /* Setup local adjacency graph */ 4858 mat_graph->nvtxs=pcis->n; 4859 if (!mat_graph->xadj) NEUMANNCNT = 1; 4860 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 4861 4862 i = mat_graph->nvtxs; 4863 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 4864 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4865 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4866 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4867 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4868 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4869 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4870 4871 /* Setting dofs splitting in mat_graph->which_dof 4872 Get information about dofs' splitting if provided by the user 4873 Otherwise it assumes a constant block size */ 4874 vertex_size=0; 4875 if (!pcbddc->n_ISForDofs) { 4876 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4877 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4878 for (i=0; i<bs; i++) { 4879 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4880 } 4881 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4882 vertex_size=1; 4883 /* remove my references to IS objects */ 4884 for (i=0; i<bs; i++) { 4885 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4886 } 4887 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4888 } 4889 for (i=0; i<pcbddc->n_ISForDofs; i++) { 4890 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4891 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4892 for (j=0; j<k; j++) mat_graph->which_dof[is_indices[j]]=i; 4893 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4894 } 4895 /* use mat block size as vertex size if it has not yet set */ 4896 if (!vertex_size) { 4897 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4898 } 4899 4900 /* count number of neigh per node */ 4901 total_counts=0; 4902 for (i=1; i<pcis->n_neigh; i++) { 4903 s = pcis->n_shared[i]; 4904 total_counts += s; 4905 for (j=0;j<s;j++) mat_graph->count[pcis->shared[i][j]] += 1; 4906 } 4907 4908 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4909 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4910 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4911 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4912 if (used_IS) { 4913 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4914 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4915 for (i=0; i<neumann_bsize; i++) { 4916 iindex = neumann_nodes[i]; 4917 if (mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0) { 4918 mat_graph->count[iindex]+=1; 4919 total_counts++; 4920 array[iindex]=array[iindex]+1.0; 4921 } else if (array[iindex]>0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4922 } 4923 } 4924 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4925 /* allocate space for storing the set of neighbours for each node */ 4926 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4927 if (mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4928 for (i=1; i<mat_graph->nvtxs; i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4929 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4930 for (i=1; i<pcis->n_neigh; i++) { 4931 s=pcis->n_shared[i]; 4932 for (j=0; j<s; j++) { 4933 k=pcis->shared[i][j]; 4934 4935 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4936 4937 mat_graph->count[k]+=1; 4938 } 4939 } 4940 /* Check consistency of Neumann nodes */ 4941 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4942 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4943 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4944 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4945 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4946 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4947 /* set -1 fake neighbour to mimic Neumann boundary */ 4948 if (used_IS) { 4949 for (i=0; i<neumann_bsize; i++) { 4950 iindex = neumann_nodes[i]; 4951 if (mat_graph->count[iindex] > NEUMANNCNT) { 4952 if (mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4953 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4954 4955 mat_graph->count[iindex]+=1; 4956 } 4957 } 4958 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4959 } 4960 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4961 /* sort set of sharing subdomains */ 4962 for (i=0;i<mat_graph->nvtxs;i++) { 4963 ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); 4964 } 4965 4966 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4967 for (i=0;i<mat_graph->nvtxs;i++) mat_graph->touched[i]=PETSC_FALSE; 4968 nodes_touched=0; 4969 4970 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4971 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4972 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4973 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4974 if (used_IS) { 4975 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4976 if (dirichlet_bsize && matis->pure_neumann) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4977 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4978 for (i=0; i<dirichlet_bsize; i++) { 4979 iindex = dirichlet_nodes[i]; 4980 if (mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4981 if (array[iindex]>0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4982 mat_graph->touched[iindex] = PETSC_TRUE; 4983 mat_graph->where[iindex] = 0; 4984 nodes_touched++; 4985 array2[iindex] = array2[iindex]+1.0; 4986 } 4987 } 4988 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4989 } 4990 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4991 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4992 4993 /* Check consistency of Dirichlet nodes */ 4994 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4995 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4996 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4997 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4998 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4999 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5000 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5001 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5002 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5003 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5004 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5005 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5006 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5007 if (used_IS) { 5008 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 5009 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 5010 for (i=0; i<dirichlet_bsize; i++) { 5011 iindex=dirichlet_nodes[i]; 5012 if (array[iindex]>1.0 && array[iindex]!=array2[iindex]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 5013 } 5014 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 5015 } 5016 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5017 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5018 5019 for (i=0; i<mat_graph->nvtxs; i++) { 5020 if (!mat_graph->count[i]) { /* interior nodes */ 5021 mat_graph->touched[i] = PETSC_TRUE; 5022 mat_graph->where[i] = 0; 5023 nodes_touched++; 5024 } 5025 } 5026 mat_graph->ncmps = 0; 5027 5028 i=0; 5029 while (nodes_touched<mat_graph->nvtxs) { 5030 /* find first untouched node in local ordering */ 5031 while (mat_graph->touched[i]) i++; 5032 mat_graph->touched[i]=PETSC_TRUE; 5033 mat_graph->where[i] =where_values; 5034 nodes_touched++; 5035 /* now find all other nodes having the same set of sharing subdomains */ 5036 for (j=i+1; j<mat_graph->nvtxs; j++) { 5037 /* check for same number of sharing subdomains and dof number */ 5038 if (!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j]) { 5039 /* check for same set of sharing subdomains */ 5040 same_set=PETSC_TRUE; 5041 for (k=0; k<mat_graph->count[j]; k++) { 5042 if (mat_graph->neighbours_set[i][k] != mat_graph->neighbours_set[j][k]) { 5043 same_set=PETSC_FALSE; 5044 } 5045 } 5046 /* I found a friend of mine */ 5047 if (same_set) { 5048 mat_graph->where[j] = where_values; 5049 mat_graph->touched[j] = PETSC_TRUE; 5050 nodes_touched++; 5051 } 5052 } 5053 } 5054 where_values++; 5055 } 5056 where_values--; if (where_values<0) where_values=0; 5057 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 5058 /* Find connected components defined on the shared interface */ 5059 if (where_values) { 5060 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values);CHKERRQ(ierr); 5061 } 5062 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 5063 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 5064 for (i=0;i<where_values;i++) { 5065 /* We are not sure that on a given subset of the local interface, 5066 two connected components will be the same among sharing subdomains */ 5067 if (mat_graph->where_ncmps[i]>1) { 5068 adapt_interface=1; 5069 break; 5070 } 5071 } 5072 5073 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 5074 if (pcbddc->dbg_flag && adapt_interface_reduced) { 5075 ierr = PetscViewerASCIIPrintf(viewer,"Adapting interface\n");CHKERRQ(ierr); 5076 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 5077 } 5078 if (where_values && adapt_interface_reduced) { 5079 PetscInt sum_requests=0,my_rank; 5080 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 5081 PetscInt temp_buffer_size,ins_val,global_where_counter; 5082 PetscInt *cum_recv_counts; 5083 PetscInt *where_to_nodes_indices; 5084 PetscInt *petsc_buffer; 5085 PetscMPIInt *recv_buffer; 5086 PetscMPIInt *recv_buffer_where; 5087 PetscMPIInt *send_buffer; 5088 PetscMPIInt size_of_send; 5089 PetscInt *sizes_of_sends; 5090 MPI_Request *send_requests; 5091 MPI_Request *recv_requests; 5092 PetscInt *where_cc_adapt; 5093 PetscInt **temp_buffer; 5094 PetscInt *nodes_to_temp_buffer_indices; 5095 PetscInt *add_to_where; 5096 PetscInt *aux_new_xadj,*new_xadj,*new_adjncy; 5097 5098 /* Retrict adjacency graph using information from connected components */ 5099 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&aux_new_xadj);CHKERRQ(ierr); 5100 for (i=0; i<mat_graph->nvtxs; i++) aux_new_xadj[i]=1; 5101 for (i=0;i<mat_graph->ncmps;i++) { 5102 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5103 for (j=0;j<k;j++) aux_new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]=k; 5104 } 5105 j = 0; 5106 for (i=0;i<mat_graph->nvtxs;i++) j += aux_new_xadj[i]; 5107 5108 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&new_xadj);CHKERRQ(ierr); 5109 ierr = PetscMalloc(j*sizeof(PetscInt),&new_adjncy);CHKERRQ(ierr); 5110 new_xadj[0]=0; 5111 for (i=0;i<mat_graph->nvtxs;i++) { 5112 new_xadj[i+1]=new_xadj[i]+aux_new_xadj[i]; 5113 if (aux_new_xadj[i]==1) new_adjncy[new_xadj[i]]=i; 5114 } 5115 ierr = PetscFree(aux_new_xadj);CHKERRQ(ierr); 5116 for (i=0; i<mat_graph->ncmps; i++) { 5117 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5118 for (j=0; j<k; j++) { 5119 ierr = PetscMemcpy(&new_adjncy[new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]],&mat_graph->queue[mat_graph->cptr[i]],k*sizeof(PetscInt));CHKERRQ(ierr); 5120 } 5121 } 5122 ierr = PCBDDCSetLocalAdjacencyGraph(pc,mat_graph->nvtxs,new_xadj,new_adjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 5123 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 5124 for (i=0; i<mat_graph->ncmps; i++) { 5125 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5126 ierr = ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 5127 ierr = PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 5128 } 5129 /* allocate some space */ 5130 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 5131 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 5132 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 5133 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 5134 /* first count how many neighbours per connected component I will receive from */ 5135 cum_recv_counts[0]=0; 5136 for (i=1; i<where_values+1; i++) { 5137 j=0; 5138 while (mat_graph->where[j] != i) j++; 5139 where_to_nodes_indices[i-1]=j; 5140 if (mat_graph->neighbours_set[j][0]!=-1) cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 5141 else cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; 5142 } 5143 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 5144 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 5145 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 5146 for (i=0; i<cum_recv_counts[where_values]; i++) { 5147 send_requests[i]=MPI_REQUEST_NULL; 5148 recv_requests[i]=MPI_REQUEST_NULL; 5149 } 5150 /* exchange with my neighbours the number of my connected components on the shared interface */ 5151 for (i=0; i<where_values; i++) { 5152 j = where_to_nodes_indices[i]; 5153 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5154 for (; k<mat_graph->count[j]; k++) { 5155 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5156 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5157 sum_requests++; 5158 } 5159 } 5160 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5161 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5162 /* determine the connected component I need to adapt */ 5163 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 5164 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 5165 for (i=0; i<where_values; i++) { 5166 for (j=cum_recv_counts[i]; j<cum_recv_counts[i+1]; j++) { 5167 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 5168 if (mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1) { 5169 where_cc_adapt[i]=PETSC_TRUE; 5170 break; 5171 } 5172 } 5173 } 5174 buffer_size = 0; 5175 for (i=0; i<where_values; i++) { 5176 if (where_cc_adapt[i]) { 5177 for (j=i; j<mat_graph->ncmps; j++) { 5178 if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 5179 buffer_size += 1 + mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5180 } 5181 } 5182 } 5183 } 5184 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 5185 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 5186 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 5187 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 5188 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 5189 5190 sum_requests = 0; 5191 start_of_send = 0; 5192 start_of_recv = cum_recv_counts[where_values]; 5193 for (i=0; i<where_values; i++) { 5194 if (where_cc_adapt[i]) { 5195 size_of_send=0; 5196 for (j=i; j<mat_graph->ncmps; j++) { 5197 if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 5198 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5199 size_of_send += 1; 5200 for (k=0; k<mat_graph->cptr[j+1]-mat_graph->cptr[j]; k++) { 5201 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 5202 } 5203 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5204 } 5205 } 5206 j = where_to_nodes_indices[i]; 5207 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5208 sizes_of_sends[i]=size_of_send; 5209 for (; k<mat_graph->count[j]; k++) { 5210 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5211 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5212 sum_requests++; 5213 } 5214 start_of_send+=size_of_send; 5215 } 5216 } 5217 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5218 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5219 5220 buffer_size=0; 5221 5222 for (k=0;k<sum_requests;k++) buffer_size += recv_buffer_where[start_of_recv+k]; 5223 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 5224 /* now exchange the data */ 5225 start_of_recv = 0; 5226 start_of_send = 0; 5227 sum_requests = 0; 5228 for (i=0; i<where_values; i++) { 5229 if (where_cc_adapt[i]) { 5230 size_of_send = sizes_of_sends[i]; 5231 5232 j = where_to_nodes_indices[i]; 5233 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5234 for (; k<mat_graph->count[j]; k++) { 5235 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5236 size_of_recv = recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 5237 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5238 start_of_recv+=size_of_recv; 5239 sum_requests++; 5240 } 5241 start_of_send+=size_of_send; 5242 } 5243 } 5244 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5245 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5246 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 5247 for (k=0;k<start_of_recv;k++) petsc_buffer[k]=(PetscInt)recv_buffer[k]; 5248 for (j=0;j<buffer_size;) { 5249 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 5250 k = petsc_buffer[j]+1; 5251 j += k; 5252 } 5253 sum_requests = cum_recv_counts[where_values]; 5254 start_of_recv = 0; 5255 5256 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 5257 global_where_counter=0; 5258 for (i=0; i<where_values; i++) { 5259 if (where_cc_adapt[i]) { 5260 temp_buffer_size=0; 5261 /* find nodes on the shared interface we need to adapt */ 5262 for (j=0; j<mat_graph->nvtxs; j++) { 5263 if (mat_graph->where[j]==i+1) { 5264 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 5265 temp_buffer_size++; 5266 } else { 5267 nodes_to_temp_buffer_indices[j]=-1; 5268 } 5269 } 5270 5271 /* allocate some temporary space */ 5272 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 5273 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 5274 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 5275 for (j=1; j<temp_buffer_size; j++) { 5276 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 5277 } 5278 /* analyze contributions from neighbouring subdomains for i-th conn comp 5279 temp buffer structure: 5280 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 5281 3 neighs procs with structured connected components: 5282 neigh 0: [0 1 4], [2 3]; (2 connected components) 5283 neigh 1: [0 1], [2 3 4]; (2 connected components) 5284 neigh 2: [0 4], [1], [2 3]; (3 connected components) 5285 tempbuffer (row-oriented) should be filled as: 5286 [ 0, 0, 0; 5287 0, 0, 1; 5288 1, 1, 2; 5289 1, 1, 2; 5290 0, 1, 0; ]; 5291 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 5292 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 5293 */ 5294 for (j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 5295 ins_val=0; 5296 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 5297 for (buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 5298 for (k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 5299 temp_buffer[nodes_to_temp_buffer_indices[petsc_buffer[start_of_recv+buffer_size+k]]][j] = ins_val; 5300 } 5301 buffer_size+=k; 5302 ins_val++; 5303 } 5304 start_of_recv+=size_of_recv; 5305 sum_requests++; 5306 } 5307 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 5308 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 5309 for (j=0; j<temp_buffer_size; j++) { 5310 if (!add_to_where[j]) { /* found a new cc */ 5311 global_where_counter++; 5312 add_to_where[j]=global_where_counter; 5313 for (k=j+1; k<temp_buffer_size; k++) { /* check for other nodes in new cc */ 5314 same_set=PETSC_TRUE; 5315 for (s=0; s<cum_recv_counts[i+1]-cum_recv_counts[i]; s++) { 5316 if (temp_buffer[j][s]!=temp_buffer[k][s]) { 5317 same_set=PETSC_FALSE; 5318 break; 5319 } 5320 } 5321 if (same_set) add_to_where[k] = global_where_counter; 5322 } 5323 } 5324 } 5325 /* insert new data in where array */ 5326 temp_buffer_size=0; 5327 for (j=0;j<mat_graph->nvtxs;j++) { 5328 if (mat_graph->where[j]==i+1) { 5329 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 5330 temp_buffer_size++; 5331 } 5332 } 5333 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 5334 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 5335 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 5336 } 5337 } 5338 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 5339 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 5340 ierr = PetscFree(send_requests);CHKERRQ(ierr); 5341 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 5342 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 5343 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 5344 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 5345 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 5346 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 5347 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 5348 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 5349 5350 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 5351 if (global_where_counter) { 5352 for (i=0;i<mat_graph->nvtxs;i++) mat_graph->touched[i]=PETSC_FALSE; 5353 global_where_counter=0; 5354 for (i=0;i<mat_graph->nvtxs;i++) { 5355 if (mat_graph->where[i] && !mat_graph->touched[i]) { 5356 global_where_counter++; 5357 for (j=i+1;j<mat_graph->nvtxs;j++) { 5358 if (!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 5359 mat_graph->where[j] = global_where_counter; 5360 mat_graph->touched[j] = PETSC_TRUE; 5361 } 5362 } 5363 mat_graph->where[i] = global_where_counter; 5364 mat_graph->touched[i] = PETSC_TRUE; 5365 } 5366 } 5367 where_values=global_where_counter; 5368 } 5369 if (global_where_counter) { 5370 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 5371 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 5372 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 5373 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 5374 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 5375 } 5376 } /* Finished adapting interface */ 5377 5378 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 5379 for (i=0; i<mat_graph->ncmps; i++) { 5380 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5381 ierr = ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 5382 ierr = PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 5383 } 5384 5385 PetscInt nfc = 0; 5386 PetscInt nec = 0; 5387 PetscInt nvc = 0; 5388 PetscBool twodim_flag = PETSC_FALSE; 5389 for (i=0; i<mat_graph->ncmps; i++) { 5390 if (mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size) { 5391 if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1) nfc++; /* 1 neigh Neumann fake included */ 5392 else nec++; /* note that nec will be zero in 2d */ 5393 } else { 5394 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5395 } 5396 } 5397 if (!nec) { /* we are in a 2d case -> no faces, only edges */ 5398 nec = nfc; 5399 nfc = 0; 5400 twodim_flag = PETSC_TRUE; 5401 } 5402 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 5403 k=0; 5404 for (i=0; i<mat_graph->ncmps; i++) { 5405 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5406 if (j > k) k=j; 5407 5408 if (j<=vertex_size) k+=vertex_size; 5409 } 5410 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 5411 if (!pcbddc->vertices_flag && !pcbddc->edges_flag) { 5412 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 5413 use_faces = PETSC_TRUE; 5414 } 5415 if (!pcbddc->vertices_flag && !pcbddc->faces_flag) { 5416 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 5417 use_edges = PETSC_TRUE; 5418 } 5419 nfc=0; 5420 nec=0; 5421 for (i=0; i<mat_graph->ncmps; i++) { 5422 if (mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size) { 5423 for (j=0; j<mat_graph->cptr[i+1]-mat_graph->cptr[i]; j++) { 5424 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 5425 } 5426 if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1) { 5427 if (twodim_flag) { 5428 if (use_edges) { 5429 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 5430 nec++; 5431 } 5432 } else { 5433 if (use_faces) { 5434 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 5435 nfc++; 5436 } 5437 } 5438 } else { 5439 if (use_edges) { 5440 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 5441 nec++; 5442 } 5443 } 5444 } 5445 } 5446 pcbddc->n_ISForFaces = nfc; 5447 pcbddc->n_ISForEdges = nec; 5448 5449 nvc = 0; 5450 if (!pcbddc->constraints_flag) { 5451 for (i=0; i<mat_graph->ncmps; i++) { 5452 if (mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size) { 5453 for (j = mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++) { 5454 auxis[nvc]=mat_graph->queue[j]; 5455 nvc++; 5456 } 5457 } 5458 } 5459 } 5460 5461 /* sort vertex set (by local ordering) */ 5462 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 5463 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 5464 if (pcbddc->dbg_flag) { 5465 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5466 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5467 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 5468 for (i=0; i<mat_graph->ncmps; i++) { 5469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 5470 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 5471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 5472 for (j=0; j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 5473 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]);CHKERRQ(ierr); 5474 } 5475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 5476 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++) { 5477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",mat_graph->queue[j],queue_in_global_numbering[j]);CHKERRQ(ierr); 5478 } 5479 } 5480 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 5481 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 5482 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 5483 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 5484 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 5485 } 5486 ierr = PetscFree(auxis);CHKERRQ(ierr); 5487 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 5488 PetscFunctionReturn(0); 5489 } 5490 5491 /* -------------------------------------------------------------------------- */ 5492 5493 /* The following code has been adapted from function IsConnectedSubdomain contained 5494 in source file contig.c of METIS library (version 5.0.1) 5495 It finds connected components of each partition labeled from 1 to n_dist */ 5496 5497 #undef __FUNCT__ 5498 #define __FUNCT__ "PCBDDCFindConnectedComponents" 5499 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist) 5500 { 5501 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 5502 PetscInt *xadj, *adjncy, *where, *queue; 5503 PetscInt *cptr; 5504 PetscBool *touched; 5505 5506 PetscFunctionBegin; 5507 nvtxs = graph->nvtxs; 5508 xadj = graph->xadj; 5509 adjncy = graph->adjncy; 5510 where = graph->where; 5511 touched = graph->touched; 5512 queue = graph->queue; 5513 cptr = graph->cptr; 5514 5515 for (i=0; i<nvtxs; i++) touched[i] = PETSC_FALSE; 5516 5517 cum_queue = 0; 5518 ncmps = 0; 5519 5520 for (n=0; n<n_dist; n++) { 5521 pid = n+1; /* partition labeled by 0 is discarded */ 5522 nleft = 0; 5523 for (i=0; i<nvtxs; i++) { 5524 if (where[i] == pid) nleft++; 5525 } 5526 for (i=0; i<nvtxs; i++) { 5527 if (where[i] == pid) break; 5528 } 5529 touched[i] = PETSC_TRUE; 5530 queue[cum_queue] = i; 5531 first = 0; last = 1; 5532 5533 cptr[ncmps] = cum_queue; /* This actually points to queue */ 5534 ncmps_pid = 0; 5535 5536 while (first != nleft) { 5537 if (first == last) { /* Find another starting vertex */ 5538 cptr[++ncmps] = first+cum_queue; 5539 ncmps_pid++; 5540 for (i=0; i<nvtxs; i++) { 5541 if (where[i] == pid && !touched[i]) break; 5542 } 5543 queue[cum_queue+last] = i; 5544 last++; 5545 touched[i] = PETSC_TRUE; 5546 } 5547 i = queue[cum_queue+first]; 5548 first++; 5549 for (j=xadj[i]; j<xadj[i+1]; j++) { 5550 k = adjncy[j]; 5551 if (where[k] == pid && !touched[k]) { 5552 queue[cum_queue+last] = k; 5553 last++; 5554 touched[k] = PETSC_TRUE; 5555 } 5556 } 5557 } 5558 cptr[++ncmps] = first+cum_queue; 5559 ncmps_pid++; 5560 cum_queue = cptr[ncmps]; 5561 graph->where_ncmps[n] = ncmps_pid; 5562 } 5563 graph->ncmps = ncmps; 5564 PetscFunctionReturn(0); 5565 } 5566