1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 Exact solvers: Solve local saddle point directly 5 - change prec_type to switch_inexact_prec_type 6 - add bool solve_exact_saddle_point slot to pdbddc data 7 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 8 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 9 - mind the problem with coarsening_factor 10 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 11 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 12 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 13 - Add levels' slot to bddc data structure and associated Set/Get functions 14 code refactoring: 15 - pick up better names for static functions 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 man pages 21 */ 22 23 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 24 Implementation of BDDC preconditioner based on: 25 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 26 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 27 28 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 29 #include <petscblaslapack.h> 30 31 /* -------------------------------------------------------------------------- */ 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCSetFromOptions_BDDC" 34 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 35 { 36 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 37 PetscErrorCode ierr; 38 39 PetscFunctionBegin; 40 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 41 /* Verbose debugging of main data structures */ 42 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 43 /* Some customization for default primal space */ 44 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 47 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 48 /* Coarse solver context */ 49 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 50 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 51 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 52 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsTail();CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 #undef __FUNCT__ 71 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 72 /*@ 73 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 74 75 Not collective 76 77 Input Parameters: 78 + pc - the preconditioning context 79 - CoarseProblemType - pick a better name and explain what this is 80 81 Level: intermediate 82 83 Notes: 84 Not collective but all procs must call with same arguments. 85 86 .seealso: PCBDDC 87 @*/ 88 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 94 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 95 PetscFunctionReturn(0); 96 } 97 /* -------------------------------------------------------------------------- */ 98 EXTERN_C_BEGIN 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 101 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 102 { 103 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 108 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 109 pcbddc->DirichletBoundaries=DirichletBoundaries; 110 PetscFunctionReturn(0); 111 } 112 EXTERN_C_END 113 #undef __FUNCT__ 114 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 115 /*@ 116 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 117 of Dirichlet boundaries for the global problem. 118 119 Not collective 120 121 Input Parameters: 122 + pc - the preconditioning context 123 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 124 125 Level: intermediate 126 127 Notes: 128 129 .seealso: PCBDDC 130 @*/ 131 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 132 { 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 137 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 /* -------------------------------------------------------------------------- */ 141 EXTERN_C_BEGIN 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 144 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 145 { 146 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 151 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 152 pcbddc->NeumannBoundaries=NeumannBoundaries; 153 PetscFunctionReturn(0); 154 } 155 EXTERN_C_END 156 #undef __FUNCT__ 157 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 158 /*@ 159 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 160 of Neumann boundaries for the global problem. 161 162 Not collective 163 164 Input Parameters: 165 + pc - the preconditioning context 166 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 167 168 Level: intermediate 169 170 Notes: 171 172 .seealso: PCBDDC 173 @*/ 174 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 175 { 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 180 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 /* -------------------------------------------------------------------------- */ 184 EXTERN_C_BEGIN 185 #undef __FUNCT__ 186 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 187 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 188 { 189 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 190 191 PetscFunctionBegin; 192 *DirichletBoundaries = pcbddc->DirichletBoundaries; 193 PetscFunctionReturn(0); 194 } 195 EXTERN_C_END 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 198 /*@ 199 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 200 of Dirichlet boundaries for the global problem. 201 202 Not collective 203 204 Input Parameters: 205 + pc - the preconditioning context 206 207 Output Parameters: 208 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 209 210 Level: intermediate 211 212 Notes: 213 214 .seealso: PCBDDC 215 @*/ 216 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 217 { 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 222 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 /* -------------------------------------------------------------------------- */ 226 EXTERN_C_BEGIN 227 #undef __FUNCT__ 228 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 229 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 230 { 231 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 232 233 PetscFunctionBegin; 234 *NeumannBoundaries = pcbddc->NeumannBoundaries; 235 PetscFunctionReturn(0); 236 } 237 EXTERN_C_END 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 240 /*@ 241 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 242 of Neumann boundaries for the global problem. 243 244 Not collective 245 246 Input Parameters: 247 + pc - the preconditioning context 248 249 Output Parameters: 250 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 251 252 Level: intermediate 253 254 Notes: 255 256 .seealso: PCBDDC 257 @*/ 258 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 259 { 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 264 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 /* -------------------------------------------------------------------------- */ 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 271 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 272 { 273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 274 PCBDDCGraph mat_graph=pcbddc->mat_graph; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 mat_graph->nvtxs=nvtxs; 279 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 280 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 281 if(copymode == PETSC_COPY_VALUES) { 282 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 284 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 285 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 286 } else if(copymode == PETSC_OWN_POINTER) { 287 mat_graph->xadj=xadj; 288 mat_graph->adjncy=adjncy; 289 } else { 290 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 291 } 292 PetscFunctionReturn(0); 293 } 294 EXTERN_C_END 295 #undef __FUNCT__ 296 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 297 /*@ 298 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 299 300 Not collective 301 302 Input Parameters: 303 + pc - the preconditioning context 304 - nvtxs - number of local vertices of the graph 305 - xadj, adjncy - the CSR graph 306 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 307 in the latter case, memory must be obtained with PetscMalloc. 308 309 Level: intermediate 310 311 Notes: 312 313 .seealso: PCBDDC 314 @*/ 315 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 316 { 317 PetscInt nrows,ncols; 318 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 319 PetscErrorCode ierr; 320 321 PetscFunctionBegin; 322 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 323 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 324 if(nvtxs != nrows) { 325 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 326 } else { 327 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 328 } 329 PetscFunctionReturn(0); 330 } 331 /* -------------------------------------------------------------------------- */ 332 EXTERN_C_BEGIN 333 #undef __FUNCT__ 334 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 335 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 336 { 337 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 338 PetscInt i; 339 PetscErrorCode ierr; 340 341 PetscFunctionBegin; 342 /* Destroy ISes if they were already set */ 343 for(i=0;i<pcbddc->n_ISForDofs;i++) { 344 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 345 } 346 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 347 /* allocate space then set */ 348 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 349 for(i=0;i<n_is;i++) { 350 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 351 pcbddc->ISForDofs[i]=ISForDofs[i]; 352 } 353 pcbddc->n_ISForDofs=n_is; 354 PetscFunctionReturn(0); 355 } 356 EXTERN_C_END 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCBDDCSetDofsSplitting" 359 /*@ 360 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 361 362 Not collective 363 364 Input Parameters: 365 + pc - the preconditioning context 366 - n - number of index sets defining the fields 367 - IS[] - array of IS describing the fields 368 369 Level: intermediate 370 371 Notes: 372 373 .seealso: PCBDDC 374 @*/ 375 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 376 { 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 381 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 /* -------------------------------------------------------------------------- */ 385 386 #undef __FUNCT__ 387 #define __FUNCT__ "PCSetUp_BDDC" 388 /* -------------------------------------------------------------------------- */ 389 /* 390 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 391 by setting data structures and options. 392 393 Input Parameter: 394 + pc - the preconditioner context 395 396 Application Interface Routine: PCSetUp() 397 398 Notes: 399 The interface routine PCSetUp() is not usually called directly by 400 the user, but instead is called by PCApply() if necessary. 401 */ 402 PetscErrorCode PCSetUp_BDDC(PC pc) 403 { 404 PetscErrorCode ierr; 405 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 406 PC_IS *pcis = (PC_IS*)(pc->data); 407 408 PetscFunctionBegin; 409 if (!pc->setupcalled) { 410 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 411 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 412 Also, we decide to directly build the (same) Dirichlet problem */ 413 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 414 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 415 /* Set up all the "iterative substructuring" common block */ 416 ierr = PCISSetUp(pc);CHKERRQ(ierr); 417 /* Get stdout for dbg */ 418 if(pcbddc->dbg_flag) { 419 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 420 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 421 } 422 /* TODO MOVE CODE FRAGMENT */ 423 PetscInt im_active=0; 424 if(pcis->n) im_active = 1; 425 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 426 /* Analyze local interface */ 427 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 428 /* Set up local constraint matrix */ 429 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 430 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 431 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 432 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 433 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 434 } 435 PetscFunctionReturn(0); 436 } 437 438 /* -------------------------------------------------------------------------- */ 439 /* 440 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 441 442 Input Parameters: 443 . pc - the preconditioner context 444 . r - input vector (global) 445 446 Output Parameter: 447 . z - output vector (global) 448 449 Application Interface Routine: PCApply() 450 */ 451 #undef __FUNCT__ 452 #define __FUNCT__ "PCApply_BDDC" 453 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 454 { 455 PC_IS *pcis = (PC_IS*)(pc->data); 456 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 457 PetscErrorCode ierr; 458 const PetscScalar one = 1.0; 459 const PetscScalar m_one = -1.0; 460 461 /* This code is similar to that provided in nn.c for PCNN 462 NN interface preconditioner changed to BDDC 463 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 464 465 PetscFunctionBegin; 466 /* First Dirichlet solve */ 467 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 469 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 470 /* 471 Assembling right hand side for BDDC operator 472 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 473 - the interface part of the global vector z 474 */ 475 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 476 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 477 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 478 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 479 ierr = VecCopy(r,z);CHKERRQ(ierr); 480 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 481 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 482 483 /* 484 Apply interface preconditioner 485 Results are stored in: 486 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 487 - the interface part of the global vector z 488 */ 489 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 490 491 /* Second Dirichlet solve and assembling of output */ 492 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 494 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 495 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 496 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 497 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 498 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 499 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 500 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 501 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 502 503 PetscFunctionReturn(0); 504 505 } 506 /* -------------------------------------------------------------------------- */ 507 #undef __FUNCT__ 508 #define __FUNCT__ "PCDestroy_BDDC" 509 PetscErrorCode PCDestroy_BDDC(PC pc) 510 { 511 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 512 PetscErrorCode ierr; 513 514 PetscFunctionBegin; 515 /* free data created by PCIS */ 516 ierr = PCISDestroy(pc);CHKERRQ(ierr); 517 /* free BDDC data */ 518 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 519 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 520 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 521 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 522 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 523 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 524 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 525 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 526 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 527 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 528 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 529 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 530 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 531 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 532 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 533 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 534 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 535 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 536 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 537 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 538 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 539 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 540 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 541 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 542 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 543 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 544 PetscInt i; 545 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 546 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 547 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 548 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 549 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 550 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 551 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 552 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 553 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 554 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 555 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 556 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 557 /* Free the private data structure that was hanging off the PC */ 558 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 559 PetscFunctionReturn(0); 560 } 561 562 /* -------------------------------------------------------------------------- */ 563 /*MC 564 PCBDDC - Balancing Domain Decomposition by Constraints. 565 566 Options Database Keys: 567 . -pcbddc ??? - 568 569 Level: intermediate 570 571 Notes: The matrix used with this preconditioner must be of type MATIS 572 573 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 574 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 575 on the subdomains). 576 577 Options for the coarse grid preconditioner can be set with - 578 Options for the Dirichlet subproblem can be set with - 579 Options for the Neumann subproblem can be set with - 580 581 Contributed by Stefano Zampini 582 583 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 584 M*/ 585 EXTERN_C_BEGIN 586 #undef __FUNCT__ 587 #define __FUNCT__ "PCCreate_BDDC" 588 PetscErrorCode PCCreate_BDDC(PC pc) 589 { 590 PetscErrorCode ierr; 591 PC_BDDC *pcbddc; 592 PCBDDCGraph mat_graph; 593 594 PetscFunctionBegin; 595 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 596 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 597 pc->data = (void*)pcbddc; 598 599 /* create PCIS data structure */ 600 ierr = PCISCreate(pc);CHKERRQ(ierr); 601 602 /* BDDC specific */ 603 pcbddc->coarse_vec = 0; 604 pcbddc->coarse_rhs = 0; 605 pcbddc->coarse_ksp = 0; 606 pcbddc->coarse_phi_B = 0; 607 pcbddc->coarse_phi_D = 0; 608 pcbddc->vec1_P = 0; 609 pcbddc->vec1_R = 0; 610 pcbddc->vec2_R = 0; 611 pcbddc->local_auxmat1 = 0; 612 pcbddc->local_auxmat2 = 0; 613 pcbddc->R_to_B = 0; 614 pcbddc->R_to_D = 0; 615 pcbddc->ksp_D = 0; 616 pcbddc->ksp_R = 0; 617 pcbddc->local_primal_indices = 0; 618 pcbddc->prec_type = PETSC_FALSE; 619 pcbddc->NeumannBoundaries = 0; 620 pcbddc->ISForDofs = 0; 621 pcbddc->ISForVertices = 0; 622 pcbddc->n_ISForFaces = 0; 623 pcbddc->n_ISForEdges = 0; 624 pcbddc->ConstraintMatrix = 0; 625 pcbddc->use_nnsp_true = PETSC_FALSE; 626 pcbddc->local_primal_sizes = 0; 627 pcbddc->local_primal_displacements = 0; 628 pcbddc->replicated_local_primal_indices = 0; 629 pcbddc->replicated_local_primal_values = 0; 630 pcbddc->coarse_loc_to_glob = 0; 631 pcbddc->dbg_flag = PETSC_FALSE; 632 pcbddc->coarsening_ratio = 8; 633 634 /* allocate and initialize needed graph structure */ 635 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 636 pcbddc->mat_graph->xadj = 0; 637 pcbddc->mat_graph->adjncy = 0; 638 639 /* function pointers */ 640 pc->ops->apply = PCApply_BDDC; 641 pc->ops->applytranspose = 0; 642 pc->ops->setup = PCSetUp_BDDC; 643 pc->ops->destroy = PCDestroy_BDDC; 644 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 645 pc->ops->view = 0; 646 pc->ops->applyrichardson = 0; 647 pc->ops->applysymmetricleft = 0; 648 pc->ops->applysymmetricright = 0; 649 650 /* composing function */ 651 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 652 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 653 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 654 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 656 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 657 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 658 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 660 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 661 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 662 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 664 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 EXTERN_C_END 668 /* -------------------------------------------------------------------------- */ 669 /* All static functions from now on */ 670 /* -------------------------------------------------------------------------- */ 671 #undef __FUNCT__ 672 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 673 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 674 { 675 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 676 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 677 PetscInt nvtxs,*xadj,*adjncy; 678 Mat mat_adj; 679 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 680 PCBDDCGraph mat_graph=pcbddc->mat_graph; 681 PetscErrorCode ierr; 682 683 PetscFunctionBegin; 684 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 685 if(!mat_graph->xadj) { 686 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 687 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 688 if(!flg_row) { 689 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 690 } 691 /* Get adjacency into BDDC workspace */ 692 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 693 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 694 if(!flg_row) { 695 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 696 } 697 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 698 } 699 PetscFunctionReturn(0); 700 } 701 /* -------------------------------------------------------------------------- */ 702 #undef __FUNCT__ 703 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 704 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 705 { 706 PetscErrorCode ierr; 707 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 708 PC_IS* pcis = (PC_IS*) (pc->data); 709 const PetscScalar zero = 0.0; 710 711 PetscFunctionBegin; 712 /* Get Local boundary and apply partition of unity */ 713 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 714 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 715 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 716 717 /* Application of PHI^T */ 718 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 719 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 720 721 /* Scatter data of coarse_rhs */ 722 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 723 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 724 725 /* Local solution on R nodes */ 726 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 727 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 728 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 729 if(pcbddc->prec_type) { 730 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 731 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 732 } 733 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 734 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 735 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 736 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 737 if(pcbddc->prec_type) { 738 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 739 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 } 741 742 /* Coarse solution */ 743 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 744 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 745 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 746 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 747 748 /* Sum contributions from two levels */ 749 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 750 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 751 752 /* Apply partition of unity and sum boundary values */ 753 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 754 ierr = VecSet(z,zero);CHKERRQ(ierr); 755 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 756 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 /* -------------------------------------------------------------------------- */ 760 #undef __FUNCT__ 761 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 762 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 763 { 764 PetscErrorCode ierr; 765 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 766 767 PetscFunctionBegin; 768 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 769 if(pcbddc->n_constraints) { 770 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 771 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 772 } 773 PetscFunctionReturn(0); 774 } 775 /* -------------------------------------------------------------------------- */ 776 #undef __FUNCT__ 777 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 778 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 779 { 780 PetscErrorCode ierr; 781 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 782 783 PetscFunctionBegin; 784 switch(pcbddc->coarse_communications_type){ 785 case SCATTERS_BDDC: 786 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 787 break; 788 case GATHERS_BDDC: 789 break; 790 } 791 PetscFunctionReturn(0); 792 } 793 /* -------------------------------------------------------------------------- */ 794 #undef __FUNCT__ 795 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 796 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 797 { 798 PetscErrorCode ierr; 799 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 800 PetscScalar* array_to; 801 PetscScalar* array_from; 802 MPI_Comm comm=((PetscObject)pc)->comm; 803 PetscInt i; 804 805 PetscFunctionBegin; 806 807 switch(pcbddc->coarse_communications_type){ 808 case SCATTERS_BDDC: 809 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 810 break; 811 case GATHERS_BDDC: 812 if(vec_from) VecGetArray(vec_from,&array_from); 813 if(vec_to) VecGetArray(vec_to,&array_to); 814 switch(pcbddc->coarse_problem_type){ 815 case SEQUENTIAL_BDDC: 816 if(smode == SCATTER_FORWARD) { 817 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 818 if(vec_to) { 819 for(i=0;i<pcbddc->replicated_primal_size;i++) 820 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 821 } 822 } else { 823 if(vec_from) 824 for(i=0;i<pcbddc->replicated_primal_size;i++) 825 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 826 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 827 } 828 break; 829 case REPLICATED_BDDC: 830 if(smode == SCATTER_FORWARD) { 831 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 832 for(i=0;i<pcbddc->replicated_primal_size;i++) 833 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 834 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 835 for(i=0;i<pcbddc->local_primal_size;i++) 836 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 837 } 838 break; 839 case MULTILEVEL_BDDC: 840 break; 841 case PARALLEL_BDDC: 842 break; 843 } 844 if(vec_from) VecRestoreArray(vec_from,&array_from); 845 if(vec_to) VecRestoreArray(vec_to,&array_to); 846 break; 847 } 848 PetscFunctionReturn(0); 849 } 850 /* -------------------------------------------------------------------------- */ 851 #ifdef BDDC_USE_POD 852 #if !defined(PETSC_MISSING_LAPACK_GESVD) 853 #define PETSC_MISSING_LAPACK_GESVD 1 854 #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 855 #endif 856 #endif 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 860 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 861 { 862 PetscErrorCode ierr; 863 PC_IS* pcis = (PC_IS*)(pc->data); 864 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 865 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 866 PetscInt *nnz,*vertices,*is_indices; 867 PetscScalar *temp_quadrature_constraint; 868 PetscInt *temp_indices,*temp_indices_to_constraint; 869 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 870 PetscInt n_constraints,n_vertices,size_of_constraint; 871 PetscReal quad_value; 872 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 873 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 874 IS *used_IS; 875 const MatType impMatType=MATSEQAIJ; 876 PetscBLASInt Bs,Bt,lwork,lierr; 877 PetscReal tol=1.0e-8; 878 MatNullSpace nearnullsp; 879 const Vec *nearnullvecs; 880 Vec *localnearnullsp; 881 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 882 PetscReal *rwork,*singular_vals; 883 PetscBLASInt Bone=1; 884 /* some ugly conditional declarations */ 885 #if defined(PETSC_MISSING_LAPACK_GESVD) 886 PetscScalar dot_result; 887 PetscScalar one=1.0,zero=0.0; 888 PetscInt ii; 889 #if defined(PETSC_USE_COMPLEX) 890 PetscScalar val1,val2; 891 #endif 892 #else 893 PetscBLASInt dummy_int; 894 PetscScalar dummy_scalar; 895 #endif 896 897 PetscFunctionBegin; 898 /* check if near null space is attached to global mat */ 899 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 900 if (nearnullsp) { 901 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 902 } else { /* if near null space is not provided it uses constants */ 903 nnsp_has_cnst = PETSC_TRUE; 904 use_nnsp_true = PETSC_TRUE; 905 } 906 if(nnsp_has_cnst) { 907 nnsp_addone = 1; 908 } 909 /* 910 Evaluate maximum storage size needed by the procedure 911 - temp_indices will contain start index of each constraint stored as follows 912 - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 913 - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 914 */ 915 916 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 917 total_counts *= (nnsp_addone+nnsp_size); 918 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 919 total_counts += n_vertices; 920 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 921 total_counts = 0; 922 max_size_of_constraint = 0; 923 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 924 if(i<pcbddc->n_ISForEdges){ 925 used_IS = &pcbddc->ISForEdges[i]; 926 } else { 927 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 928 } 929 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 930 total_counts += j; 931 if(j>max_size_of_constraint) max_size_of_constraint=j; 932 } 933 total_counts *= (nnsp_addone+nnsp_size); 934 total_counts += n_vertices; 935 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 936 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 937 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 938 rwork = 0; 939 work = 0; 940 singular_vals = 0; 941 temp_basis = 0; 942 correlation_mat = 0; 943 if(!pcbddc->use_nnsp_true) { 944 PetscScalar temp_work; 945 #if defined(PETSC_MISSING_LAPACK_GESVD) 946 /* POD */ 947 PetscInt max_n; 948 max_n = nnsp_addone+nnsp_size; 949 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 950 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 951 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 952 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 953 #if defined(PETSC_USE_COMPLEX) 954 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 955 #endif 956 /* now we evaluate the optimal workspace using query with lwork=-1 */ 957 Bt = PetscBLASIntCast(max_n); 958 lwork=-1; 959 #if !defined(PETSC_USE_COMPLEX) 960 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 961 #else 962 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 963 #endif 964 #else /* on missing GESVD */ 965 /* SVD */ 966 PetscInt max_n,min_n; 967 max_n = max_size_of_constraint; 968 min_n = nnsp_addone+nnsp_size; 969 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 970 min_n = max_size_of_constraint; 971 max_n = nnsp_addone+nnsp_size; 972 } 973 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 974 #if defined(PETSC_USE_COMPLEX) 975 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 976 #endif 977 /* now we evaluate the optimal workspace using query with lwork=-1 */ 978 lwork=-1; 979 Bs = PetscBLASIntCast(max_n); 980 Bt = PetscBLASIntCast(min_n); 981 dummy_int = Bs; 982 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 983 #if !defined(PETSC_USE_COMPLEX) 984 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 985 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 986 #else 987 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 988 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 989 #endif 990 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 991 ierr = PetscFPTrapPop();CHKERRQ(ierr); 992 #endif 993 /* Allocate optimal workspace */ 994 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 995 total_counts = (PetscInt)lwork; 996 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 997 } 998 /* get local part of global near null space vectors */ 999 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1000 for(k=0;k<nnsp_size;k++) { 1001 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1002 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 } 1005 /* Now we can loop on constraining sets */ 1006 total_counts=0; 1007 temp_indices[0]=0; 1008 /* vertices */ 1009 PetscBool used_vertex; 1010 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1011 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1012 if(nnsp_has_cnst) { /* consider all vertices */ 1013 for(i=0;i<n_vertices;i++) { 1014 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1015 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1016 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1017 vertices[total_counts]=is_indices[i]; 1018 total_counts++; 1019 } 1020 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1021 for(i=0;i<n_vertices;i++) { 1022 used_vertex=PETSC_FALSE; 1023 k=0; 1024 while(!used_vertex && k<nnsp_size) { 1025 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1026 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 1027 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1028 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1029 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1030 vertices[total_counts]=is_indices[i]; 1031 total_counts++; 1032 used_vertex=PETSC_TRUE; 1033 } 1034 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1035 k++; 1036 } 1037 } 1038 } 1039 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1040 n_vertices=total_counts; 1041 /* edges and faces */ 1042 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 1043 if(i<pcbddc->n_ISForEdges){ 1044 used_IS = &pcbddc->ISForEdges[i]; 1045 } else { 1046 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 1047 } 1048 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1049 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1050 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1051 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1052 if(nnsp_has_cnst) { 1053 temp_constraints++; 1054 quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint); 1055 for(j=0;j<size_of_constraint;j++) { 1056 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1057 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1058 } 1059 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1060 total_counts++; 1061 } 1062 for(k=0;k<nnsp_size;k++) { 1063 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1064 for(j=0;j<size_of_constraint;j++) { 1065 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1066 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 1067 } 1068 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1069 quad_value = 1.0; 1070 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 1071 Bs = PetscBLASIntCast(size_of_constraint); 1072 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 1073 } 1074 if ( quad_value > 0.0 ) { /* keep indices and values */ 1075 temp_constraints++; 1076 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1077 total_counts++; 1078 } 1079 } 1080 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1081 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 1082 if(!use_nnsp_true) { 1083 1084 Bs = PetscBLASIntCast(size_of_constraint); 1085 Bt = PetscBLASIntCast(temp_constraints); 1086 1087 #if defined(PETSC_MISSING_LAPACK_GESVD) 1088 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 1089 /* Store upper triangular part of correlation matrix */ 1090 for(j=0;j<temp_constraints;j++) { 1091 for(k=0;k<j+1;k++) { 1092 #if defined(PETSC_USE_COMPLEX) 1093 /* hand made complex dot product */ 1094 dot_result = 0.0; 1095 for (ii=0; ii<size_of_constraint; ii++) { 1096 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 1097 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 1098 dot_result += val1*PetscConj(val2); 1099 } 1100 #else 1101 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 1102 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 1103 #endif 1104 correlation_mat[j*temp_constraints+k]=dot_result; 1105 } 1106 } 1107 #if !defined(PETSC_USE_COMPLEX) 1108 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 1109 #else 1110 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 1111 #endif 1112 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 1113 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 1114 j=0; 1115 while( j < Bt && singular_vals[j] < tol) j++; 1116 total_counts=total_counts-j; 1117 if(j<temp_constraints) { 1118 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 1119 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 1120 /* copy POD basis into used quadrature memory */ 1121 for(k=0;k<Bt-j;k++) { 1122 for(ii=0;ii<size_of_constraint;ii++) { 1123 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 1124 } 1125 } 1126 } 1127 1128 #else /* on missing GESVD */ 1129 1130 PetscInt min_n = temp_constraints; 1131 if(min_n > size_of_constraint) min_n = size_of_constraint; 1132 dummy_int = Bs; 1133 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1134 #if !defined(PETSC_USE_COMPLEX) 1135 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1136 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 1137 #else 1138 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1139 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 1140 #endif 1141 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 1142 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1143 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 1144 j=0; 1145 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 1146 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 1147 1148 #endif 1149 } 1150 } 1151 n_constraints=total_counts-n_vertices; 1152 local_primal_size = total_counts; 1153 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1154 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1155 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1156 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1157 for(i=0;i<local_primal_size;i++) { 1158 nnz[i]=temp_indices[i+1]-temp_indices[i]; 1159 } 1160 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1161 for(i=0;i<local_primal_size;i++) { 1162 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1163 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&i,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1164 } 1165 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1167 /* set quantities in pcbddc data structure */ 1168 pcbddc->n_vertices = n_vertices; 1169 pcbddc->n_constraints = n_constraints; 1170 pcbddc->local_primal_size = local_primal_size; 1171 /* free workspace no longer needed */ 1172 ierr = PetscFree(rwork);CHKERRQ(ierr); 1173 ierr = PetscFree(work);CHKERRQ(ierr); 1174 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1175 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1176 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1177 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1178 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1179 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1180 ierr = PetscFree(nnz);CHKERRQ(ierr); 1181 for(k=0;k<nnsp_size;k++) { 1182 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1183 } 1184 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD 1188 #undef PETSC_MISSING_LAPACK_GESVD 1189 #endif 1190 /* -------------------------------------------------------------------------- */ 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "PCBDDCCoarseSetUp" 1193 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1194 { 1195 PetscErrorCode ierr; 1196 1197 PC_IS* pcis = (PC_IS*)(pc->data); 1198 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1199 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1200 IS is_R_local; 1201 IS is_V_local; 1202 IS is_C_local; 1203 IS is_aux1; 1204 IS is_aux2; 1205 const VecType impVecType; 1206 const MatType impMatType; 1207 PetscInt n_R=0; 1208 PetscInt n_D=0; 1209 PetscInt n_B=0; 1210 PetscScalar zero=0.0; 1211 PetscScalar one=1.0; 1212 PetscScalar m_one=-1.0; 1213 PetscScalar* array; 1214 PetscScalar *coarse_submat_vals; 1215 PetscInt *idx_R_local; 1216 PetscInt *idx_V_B; 1217 PetscScalar *coarsefunctions_errors; 1218 PetscScalar *constraints_errors; 1219 /* auxiliary indices */ 1220 PetscInt i,j; 1221 /* for verbose output of bddc */ 1222 PetscViewer viewer=pcbddc->dbg_viewer; 1223 PetscBool dbg_flag=pcbddc->dbg_flag; 1224 /* for counting coarse dofs */ 1225 PetscInt n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints; 1226 PetscInt size_of_constraint; 1227 PetscInt *row_cmat_indices; 1228 PetscScalar *row_cmat_values; 1229 PetscInt *vertices; 1230 1231 PetscFunctionBegin; 1232 /* Set Non-overlapping dimensions */ 1233 n_B = pcis->n_B; n_D = pcis->n - n_B; 1234 1235 /* get vertex indices from constraint matrix */ 1236 ierr = PetscMalloc(pcbddc->n_vertices*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1237 j=0; 1238 for(i=0;i<pcbddc->local_primal_size;i++) { 1239 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1240 if(size_of_constraint == 1) { 1241 vertices[j]=row_cmat_indices[0]; 1242 j++; 1243 } 1244 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1245 } 1246 1247 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1248 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1249 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1250 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1251 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1252 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1253 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1254 if(dbg_flag) { 1255 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1256 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1257 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1258 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1259 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1260 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1261 } 1262 /* Allocate needed vectors */ 1263 /* Set Mat type for local matrices needed by BDDC precondtioner */ 1264 impMatType = MATSEQDENSE; 1265 impVecType = VECSEQ; 1266 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1267 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1268 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1269 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1270 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1271 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1272 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1273 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1274 1275 /* Creating some index sets needed */ 1276 /* For submatrices */ 1277 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 1278 if(n_vertices) { 1279 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 1280 } 1281 if(n_constraints) { 1282 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 1283 } 1284 1285 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1286 { 1287 PetscInt *aux_array1; 1288 PetscInt *aux_array2; 1289 1290 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1291 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1292 1293 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1294 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1295 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1296 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1297 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1298 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1299 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1300 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1301 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 1302 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1303 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1304 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1305 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 1306 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1307 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1308 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1309 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1310 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1311 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1312 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1313 1314 if(pcbddc->prec_type || dbg_flag ) { 1315 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1316 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1317 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 1318 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1319 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1320 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1321 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1322 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1323 } 1324 } 1325 1326 /* vertices in boundary numbering */ 1327 if(n_vertices) { 1328 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1329 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1330 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1331 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1332 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1335 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1336 for (i=0; i<n_vertices; i++) { 1337 j=0; 1338 while (array[j] != i ) {j++;} 1339 idx_V_B[i]=j; 1340 } 1341 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1342 } 1343 1344 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1345 { 1346 Mat A_RR; 1347 PC pc_temp; 1348 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1349 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1350 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1351 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1352 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1353 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 1354 /* default */ 1355 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1356 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1357 /* Allow user's customization */ 1358 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1359 /* Set Up KSP for Dirichlet problem of BDDC */ 1360 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1361 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_D,PETSC_VIEWER_STDOUT_SELF); 1362 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1363 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1364 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1365 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1366 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1367 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1368 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 1369 /* default */ 1370 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1371 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1372 /* Allow user's customization */ 1373 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1374 /* Set Up KSP for Neumann problem of BDDC */ 1375 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1376 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_R,PETSC_VIEWER_STDOUT_SELF); 1377 /* check Dirichlet and Neumann solvers */ 1378 if(pcbddc->dbg_flag) { 1379 Vec temp_vec; 1380 PetscScalar value; 1381 1382 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1383 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1384 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1385 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1386 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1387 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1388 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1389 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1390 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1391 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1392 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1393 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1394 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1395 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1396 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1397 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1398 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1399 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1400 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1401 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1402 } 1403 /* free Neumann problem's matrix */ 1404 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1405 } 1406 1407 /* Assemble all remaining stuff needed to apply BDDC */ 1408 { 1409 Mat A_RV,A_VR,A_VV; 1410 Mat M1,M2; 1411 Mat C_CR; 1412 Mat AUXMAT; 1413 Vec vec1_C; 1414 Vec vec2_C; 1415 Vec vec1_V; 1416 Vec vec2_V; 1417 PetscInt *nnz; 1418 PetscInt *auxindices; 1419 PetscInt index; 1420 PetscScalar* array2; 1421 MatFactorInfo matinfo; 1422 1423 /* Allocating some extra storage just to be safe */ 1424 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1425 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1426 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1427 1428 /* some work vectors on vertices and/or constraints */ 1429 if(n_vertices) { 1430 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1431 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1432 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1433 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1434 } 1435 if(pcbddc->n_constraints) { 1436 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1437 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1438 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1439 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1440 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1441 } 1442 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1443 if(n_constraints) { 1444 /* some work vectors */ 1445 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1446 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1447 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1448 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1449 1450 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1451 for(i=0;i<n_constraints;i++) { 1452 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1453 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1454 /* Get row of constraint matrix in R numbering */ 1455 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1456 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1457 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1458 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1459 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1460 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1461 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1462 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1463 /* Solve for row of constraint matrix in R numbering */ 1464 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1465 /* Set values */ 1466 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1467 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1468 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1469 } 1470 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1471 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1472 1473 /* Create Constraint matrix on R nodes: C_{CR} */ 1474 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1475 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1476 1477 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1478 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1479 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1480 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1481 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1482 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1483 1484 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1485 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1486 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1487 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1488 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1489 for(i=0;i<n_constraints;i++) { 1490 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1491 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1492 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1493 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1494 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1495 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1496 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1497 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1498 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1499 } 1500 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1501 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1502 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1503 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1504 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1505 1506 } 1507 1508 /* Get submatrices from subdomain matrix */ 1509 if(n_vertices){ 1510 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1511 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1512 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1513 /* Assemble M2 = A_RR^{-1}A_RV */ 1514 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1515 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1516 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1517 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1518 for(i=0;i<n_vertices;i++) { 1519 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1520 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1521 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1522 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1523 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1524 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1525 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1526 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1527 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1528 } 1529 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1530 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1531 } 1532 1533 /* Matrix of coarse basis functions (local) */ 1534 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1535 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1536 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1537 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1538 if(pcbddc->prec_type || dbg_flag ) { 1539 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1540 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1541 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1542 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1543 } 1544 1545 if(dbg_flag) { 1546 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1547 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1548 } 1549 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1550 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1551 1552 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1553 for(i=0;i<n_vertices;i++){ 1554 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1555 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1556 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1557 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1558 /* solution of saddle point problem */ 1559 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1560 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1561 if(n_constraints) { 1562 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1563 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1564 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1565 } 1566 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1567 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1568 1569 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1570 /* coarse basis functions */ 1571 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1572 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1575 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1576 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1577 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1578 if( pcbddc->prec_type || dbg_flag ) { 1579 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1581 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1582 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1583 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1584 } 1585 /* subdomain contribution to coarse matrix */ 1586 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1587 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 1588 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1589 if(n_constraints) { 1590 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1591 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 1592 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1593 } 1594 1595 if( dbg_flag ) { 1596 /* assemble subdomain vector on nodes */ 1597 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1598 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1599 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1600 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1601 array[ vertices[i] ] = one; 1602 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1603 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1604 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1605 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1606 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1607 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1608 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1609 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1610 if(n_constraints) { 1611 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1612 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1613 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1614 } 1615 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1616 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1617 /* check saddle point solution */ 1618 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1619 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1620 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 1621 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1622 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1623 array[i]=array[i]+m_one; /* shift by the identity matrix */ 1624 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1625 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 1626 } 1627 } 1628 1629 for(i=0;i<n_constraints;i++){ 1630 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1631 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1632 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1633 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1634 /* solution of saddle point problem */ 1635 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1636 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1637 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1638 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1639 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1640 /* coarse basis functions */ 1641 index=i+n_vertices; 1642 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1643 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1646 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1647 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1648 if( pcbddc->prec_type || dbg_flag ) { 1649 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1652 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1653 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1654 } 1655 /* subdomain contribution to coarse matrix */ 1656 if(n_vertices) { 1657 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1658 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 1659 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1660 } 1661 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1662 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 1663 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1664 1665 if( dbg_flag ) { 1666 /* assemble subdomain vector on nodes */ 1667 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1668 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1669 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1670 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1671 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1672 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1673 /* assemble subdomain vector of lagrange multipliers */ 1674 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1675 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1676 if( n_vertices) { 1677 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1678 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 1679 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1680 } 1681 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1682 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 1683 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1684 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1685 /* check saddle point solution */ 1686 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1687 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1688 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1689 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1690 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1691 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1692 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1693 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1694 } 1695 } 1696 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 if( pcbddc->prec_type || dbg_flag ) { 1699 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1701 } 1702 /* Checking coarse_sub_mat and coarse basis functios */ 1703 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1704 if(dbg_flag) { 1705 1706 Mat coarse_sub_mat; 1707 Mat TM1,TM2,TM3,TM4; 1708 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1709 const MatType checkmattype=MATSEQAIJ; 1710 PetscScalar value; 1711 1712 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1713 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1714 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1715 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1716 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1717 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1718 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1719 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1720 1721 /*PetscViewer view_out; 1722 PetscMPIInt myrank; 1723 char filename[256]; 1724 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 1725 sprintf(filename,"coarsesubmat_%04d.m",myrank); 1726 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 1727 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1728 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 1729 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 1730 1731 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1732 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1733 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1734 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1735 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1736 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1737 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1738 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1739 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1740 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1741 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1742 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1743 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1744 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1745 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1746 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1748 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1749 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1750 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1751 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1752 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1753 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1754 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1755 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1756 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1757 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1758 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1759 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1760 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1761 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1762 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1763 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1764 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1765 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1766 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1767 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1768 } 1769 1770 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1771 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1772 /* free memory */ 1773 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1774 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1775 ierr = PetscFree(nnz);CHKERRQ(ierr); 1776 if(n_vertices) { 1777 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1778 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1779 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1780 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1781 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1782 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1783 } 1784 if(pcbddc->n_constraints) { 1785 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1786 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1787 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1788 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1789 } 1790 } 1791 /* free memory */ 1792 if(n_vertices) { 1793 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1794 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1795 } 1796 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1797 1798 PetscFunctionReturn(0); 1799 } 1800 1801 /* -------------------------------------------------------------------------- */ 1802 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1805 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1806 { 1807 1808 1809 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1810 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1811 PC_IS *pcis = (PC_IS*)pc->data; 1812 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1813 MPI_Comm coarse_comm; 1814 1815 /* common to all choiches */ 1816 PetscScalar *temp_coarse_mat_vals; 1817 PetscScalar *ins_coarse_mat_vals; 1818 PetscInt *ins_local_primal_indices; 1819 PetscMPIInt *localsizes2,*localdispl2; 1820 PetscMPIInt size_prec_comm; 1821 PetscMPIInt rank_prec_comm; 1822 PetscMPIInt active_rank=MPI_PROC_NULL; 1823 PetscMPIInt master_proc=0; 1824 PetscInt ins_local_primal_size; 1825 /* specific to MULTILEVEL_BDDC */ 1826 PetscMPIInt *ranks_recv; 1827 PetscMPIInt count_recv=0; 1828 PetscMPIInt rank_coarse_proc_send_to; 1829 PetscMPIInt coarse_color = MPI_UNDEFINED; 1830 ISLocalToGlobalMapping coarse_ISLG; 1831 /* some other variables */ 1832 PetscErrorCode ierr; 1833 const MatType coarse_mat_type; 1834 const PCType coarse_pc_type; 1835 const KSPType coarse_ksp_type; 1836 PC pc_temp; 1837 PetscInt i,j,k,bs; 1838 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 1839 /* verbose output viewer */ 1840 PetscViewer viewer=pcbddc->dbg_viewer; 1841 PetscBool dbg_flag=pcbddc->dbg_flag; 1842 1843 PetscFunctionBegin; 1844 1845 ins_local_primal_indices = 0; 1846 ins_coarse_mat_vals = 0; 1847 localsizes2 = 0; 1848 localdispl2 = 0; 1849 temp_coarse_mat_vals = 0; 1850 coarse_ISLG = 0; 1851 1852 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1853 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1854 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1855 1856 /* Assign global numbering to coarse dofs */ 1857 { 1858 PetscScalar one=1.,zero=0.; 1859 PetscScalar *array; 1860 PetscMPIInt *auxlocal_primal; 1861 PetscMPIInt *auxglobal_primal; 1862 PetscMPIInt *all_auxglobal_primal; 1863 PetscMPIInt *all_auxglobal_primal_dummy; 1864 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1865 PetscInt *row_cmat_indices; 1866 PetscInt size_of_constraint; 1867 PetscScalar coarsesum; 1868 1869 /* Construct needed data structures for message passing */ 1870 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1871 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1872 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1873 /* Gather local_primal_size information for all processes */ 1874 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1875 pcbddc->replicated_primal_size = 0; 1876 for (i=0; i<size_prec_comm; i++) { 1877 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1878 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1879 } 1880 if(rank_prec_comm == 0) { 1881 /* allocate some auxiliary space */ 1882 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1883 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1884 } 1885 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1886 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1887 1888 /* First let's count coarse dofs. 1889 This code fragment assumes that the number of local constraints per connected component 1890 is not greater than the number of nodes defined for the connected component 1891 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1892 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 1893 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1894 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1895 for(i=0;i<pcbddc->local_primal_size;i++) { 1896 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1897 for (j=0; j<size_of_constraint; j++) { 1898 k = row_cmat_indices[j]; 1899 if( array[k] == zero ) { 1900 array[k] = one; 1901 auxlocal_primal[i] = k; 1902 break; 1903 } 1904 } 1905 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1906 } 1907 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1908 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1909 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1910 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1911 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1912 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1913 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1914 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 1915 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1916 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1917 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1918 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1919 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1920 pcbddc->coarse_size = (PetscInt) coarsesum; 1921 1922 /* Now assign them a global numbering */ 1923 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1924 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1925 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1926 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1927 1928 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1929 /* It implements a function similar to PetscSortRemoveDupsInt */ 1930 if(rank_prec_comm==0) { 1931 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1932 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1933 k=1; 1934 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1935 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1936 if(j != all_auxglobal_primal[i] ) { 1937 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1938 k++; 1939 j=all_auxglobal_primal[i]; 1940 } 1941 } 1942 } else { 1943 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1944 } 1945 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1946 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1947 1948 /* Now get global coarse numbering of local primal nodes */ 1949 for(i=0;i<pcbddc->local_primal_size;i++) { 1950 k=0; 1951 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1952 pcbddc->local_primal_indices[i]=k; 1953 } 1954 if(dbg_flag) { 1955 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1956 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 1957 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1958 } 1959 /* free allocated memory */ 1960 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1961 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1962 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1963 if(rank_prec_comm == 0) { 1964 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1965 } 1966 } 1967 1968 /* adapt coarse problem type */ 1969 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1970 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1971 1972 switch(pcbddc->coarse_problem_type){ 1973 1974 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 1975 { 1976 /* we need additional variables */ 1977 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1978 MetisInt *metis_coarse_subdivision; 1979 MetisInt options[METIS_NOPTIONS]; 1980 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1981 PetscMPIInt procs_jumps_coarse_comm; 1982 PetscMPIInt *coarse_subdivision; 1983 PetscMPIInt *total_count_recv; 1984 PetscMPIInt *total_ranks_recv; 1985 PetscMPIInt *displacements_recv; 1986 PetscMPIInt *my_faces_connectivity; 1987 PetscMPIInt *petsc_faces_adjncy; 1988 MetisInt *faces_adjncy; 1989 MetisInt *faces_xadj; 1990 PetscMPIInt *number_of_faces; 1991 PetscMPIInt *faces_displacements; 1992 PetscInt *array_int; 1993 PetscMPIInt my_faces=0; 1994 PetscMPIInt total_faces=0; 1995 PetscInt ranks_stretching_ratio; 1996 1997 /* define some quantities */ 1998 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1999 coarse_mat_type = MATIS; 2000 coarse_pc_type = PCBDDC; 2001 coarse_ksp_type = KSPCHEBYSHEV; 2002 2003 /* details of coarse decomposition */ 2004 n_subdomains = pcbddc->active_procs; 2005 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2006 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 2007 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2008 2009 /*printf("Coarse algorithm details: \n"); 2010 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 2011 2012 /* build CSR graph of subdomains' connectivity through faces */ 2013 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2014 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2015 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2016 for(j=0;j<pcis->n_shared[i];j++){ 2017 array_int[ pcis->shared[i][j] ]+=1; 2018 } 2019 } 2020 for(i=1;i<pcis->n_neigh;i++){ 2021 for(j=0;j<pcis->n_shared[i];j++){ 2022 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2023 my_faces++; 2024 break; 2025 } 2026 } 2027 } 2028 2029 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2030 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2031 my_faces=0; 2032 for(i=1;i<pcis->n_neigh;i++){ 2033 for(j=0;j<pcis->n_shared[i];j++){ 2034 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2035 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2036 my_faces++; 2037 break; 2038 } 2039 } 2040 } 2041 if(rank_prec_comm == master_proc) { 2042 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2043 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2044 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2045 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2046 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2047 } 2048 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2049 if(rank_prec_comm == master_proc) { 2050 faces_xadj[0]=0; 2051 faces_displacements[0]=0; 2052 j=0; 2053 for(i=1;i<size_prec_comm+1;i++) { 2054 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2055 if(number_of_faces[i-1]) { 2056 j++; 2057 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2058 } 2059 } 2060 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 2061 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 2062 } 2063 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2064 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2065 ierr = PetscFree(array_int);CHKERRQ(ierr); 2066 if(rank_prec_comm == master_proc) { 2067 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2068 /*printf("This is the face connectivity (actual ranks)\n"); 2069 for(i=0;i<n_subdomains;i++){ 2070 printf("proc %d is connected with \n",i); 2071 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2072 printf("%d ",faces_adjncy[j]); 2073 printf("\n"); 2074 }*/ 2075 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2076 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2077 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2078 } 2079 2080 if( rank_prec_comm == master_proc ) { 2081 2082 PetscInt heuristic_for_metis=3; 2083 2084 ncon=1; 2085 faces_nvtxs=n_subdomains; 2086 /* partition graoh induced by face connectivity */ 2087 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2088 ierr = METIS_SetDefaultOptions(options); 2089 /* we need a contiguous partition of the coarse mesh */ 2090 options[METIS_OPTION_CONTIG]=1; 2091 options[METIS_OPTION_DBGLVL]=1; 2092 options[METIS_OPTION_NITER]=30; 2093 if(n_subdomains>n_parts*heuristic_for_metis) { 2094 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2095 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2096 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2097 } else { 2098 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2099 } 2100 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2101 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2102 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2103 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2104 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2105 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2106 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2107 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2108 } 2109 2110 /* Create new communicator for coarse problem splitting the old one */ 2111 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2112 coarse_color=0; /* for communicator splitting */ 2113 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2114 } 2115 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2116 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2117 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2118 2119 if( coarse_color == 0 ) { 2120 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2121 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2122 /*printf("Details of coarse comm\n"); 2123 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2124 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 2125 } else { 2126 rank_coarse_comm = MPI_PROC_NULL; 2127 } 2128 2129 /* master proc take care of arranging and distributing coarse informations */ 2130 if(rank_coarse_comm == master_proc) { 2131 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2132 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2133 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 2134 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2135 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2136 /* some initializations */ 2137 displacements_recv[0]=0; 2138 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 2139 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2140 for(j=0;j<size_coarse_comm;j++) 2141 for(i=0;i<size_prec_comm;i++) 2142 if(coarse_subdivision[i]==j) 2143 total_count_recv[j]++; 2144 /* displacements needed for scatterv of total_ranks_recv */ 2145 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2146 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2147 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2148 for(j=0;j<size_coarse_comm;j++) { 2149 for(i=0;i<size_prec_comm;i++) { 2150 if(coarse_subdivision[i]==j) { 2151 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2152 total_count_recv[j]+=1; 2153 } 2154 } 2155 } 2156 /*for(j=0;j<size_coarse_comm;j++) { 2157 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2158 for(i=0;i<total_count_recv[j];i++) { 2159 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2160 } 2161 printf("\n"); 2162 }*/ 2163 2164 /* identify new decomposition in terms of ranks in the old communicator */ 2165 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2166 /*printf("coarse_subdivision in old end new ranks\n"); 2167 for(i=0;i<size_prec_comm;i++) 2168 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2169 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2170 } else { 2171 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2172 } 2173 printf("\n");*/ 2174 } 2175 2176 /* Scatter new decomposition for send details */ 2177 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2178 /* Scatter receiving details to members of coarse decomposition */ 2179 if( coarse_color == 0) { 2180 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2181 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2182 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2183 } 2184 2185 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2186 if(coarse_color == 0) { 2187 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2188 for(i=0;i<count_recv;i++) 2189 printf("%d ",ranks_recv[i]); 2190 printf("\n"); 2191 }*/ 2192 2193 if(rank_prec_comm == master_proc) { 2194 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2195 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2196 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 2197 free(coarse_subdivision); 2198 free(total_count_recv); 2199 free(total_ranks_recv); 2200 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2201 } 2202 break; 2203 } 2204 2205 case(REPLICATED_BDDC): 2206 2207 pcbddc->coarse_communications_type = GATHERS_BDDC; 2208 coarse_mat_type = MATSEQAIJ; 2209 coarse_pc_type = PCLU; 2210 coarse_ksp_type = KSPPREONLY; 2211 coarse_comm = PETSC_COMM_SELF; 2212 active_rank = rank_prec_comm; 2213 break; 2214 2215 case(PARALLEL_BDDC): 2216 2217 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2218 coarse_mat_type = MATMPIAIJ; 2219 coarse_pc_type = PCREDUNDANT; 2220 coarse_ksp_type = KSPPREONLY; 2221 coarse_comm = prec_comm; 2222 active_rank = rank_prec_comm; 2223 break; 2224 2225 case(SEQUENTIAL_BDDC): 2226 pcbddc->coarse_communications_type = GATHERS_BDDC; 2227 coarse_mat_type = MATSEQAIJ; 2228 coarse_pc_type = PCLU; 2229 coarse_ksp_type = KSPPREONLY; 2230 coarse_comm = PETSC_COMM_SELF; 2231 active_rank = master_proc; 2232 break; 2233 } 2234 2235 switch(pcbddc->coarse_communications_type){ 2236 2237 case(SCATTERS_BDDC): 2238 { 2239 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2240 2241 PetscMPIInt send_size; 2242 PetscInt *aux_ins_indices; 2243 PetscInt ii,jj; 2244 MPI_Request *requests; 2245 2246 /* allocate auxiliary space */ 2247 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2248 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2249 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2250 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2251 /* allocate stuffs for message massing */ 2252 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2253 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2254 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2255 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2256 /* fill up quantities */ 2257 j=0; 2258 for(i=0;i<count_recv;i++){ 2259 ii = ranks_recv[i]; 2260 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2261 localdispl2[i]=j; 2262 j+=localsizes2[i]; 2263 jj = pcbddc->local_primal_displacements[ii]; 2264 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 2265 } 2266 /*printf("aux_ins_indices 1\n"); 2267 for(i=0;i<pcbddc->coarse_size;i++) 2268 printf("%d ",aux_ins_indices[i]); 2269 printf("\n");*/ 2270 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2271 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2272 /* evaluate how many values I will insert in coarse mat */ 2273 ins_local_primal_size=0; 2274 for(i=0;i<pcbddc->coarse_size;i++) 2275 if(aux_ins_indices[i]) 2276 ins_local_primal_size++; 2277 /* evaluate indices I will insert in coarse mat */ 2278 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2279 j=0; 2280 for(i=0;i<pcbddc->coarse_size;i++) 2281 if(aux_ins_indices[i]) 2282 ins_local_primal_indices[j++]=i; 2283 /* use aux_ins_indices to realize a global to local mapping */ 2284 j=0; 2285 for(i=0;i<pcbddc->coarse_size;i++){ 2286 if(aux_ins_indices[i]==0){ 2287 aux_ins_indices[i]=-1; 2288 } else { 2289 aux_ins_indices[i]=j; 2290 j++; 2291 } 2292 } 2293 2294 /*printf("New details localsizes2 localdispl2\n"); 2295 for(i=0;i<count_recv;i++) 2296 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2297 printf("\n"); 2298 printf("aux_ins_indices 2\n"); 2299 for(i=0;i<pcbddc->coarse_size;i++) 2300 printf("%d ",aux_ins_indices[i]); 2301 printf("\n"); 2302 printf("ins_local_primal_indices\n"); 2303 for(i=0;i<ins_local_primal_size;i++) 2304 printf("%d ",ins_local_primal_indices[i]); 2305 printf("\n"); 2306 printf("coarse_submat_vals\n"); 2307 for(i=0;i<pcbddc->local_primal_size;i++) 2308 for(j=0;j<pcbddc->local_primal_size;j++) 2309 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2310 printf("\n");*/ 2311 2312 /* processes partecipating in coarse problem receive matrix data from their friends */ 2313 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2314 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2315 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2316 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2317 } 2318 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2319 2320 /*if(coarse_color == 0) { 2321 printf("temp_coarse_mat_vals\n"); 2322 for(k=0;k<count_recv;k++){ 2323 printf("---- %d ----\n",ranks_recv[k]); 2324 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2325 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2326 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2327 printf("\n"); 2328 } 2329 }*/ 2330 /* calculate data to insert in coarse mat */ 2331 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2332 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2333 2334 PetscMPIInt rr,kk,lps,lpd; 2335 PetscInt row_ind,col_ind; 2336 for(k=0;k<count_recv;k++){ 2337 rr = ranks_recv[k]; 2338 kk = localdispl2[k]; 2339 lps = pcbddc->local_primal_sizes[rr]; 2340 lpd = pcbddc->local_primal_displacements[rr]; 2341 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 2342 for(j=0;j<lps;j++){ 2343 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2344 for(i=0;i<lps;i++){ 2345 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2346 /*printf("%d %d\n",row_ind,col_ind);*/ 2347 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2348 } 2349 } 2350 } 2351 ierr = PetscFree(requests);CHKERRQ(ierr); 2352 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2353 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2354 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2355 2356 /* create local to global mapping needed by coarse MATIS */ 2357 { 2358 IS coarse_IS; 2359 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2360 coarse_comm = prec_comm; 2361 active_rank=rank_prec_comm; 2362 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2363 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2364 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2365 } 2366 } 2367 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2368 /* arrays for values insertion */ 2369 ins_local_primal_size = pcbddc->local_primal_size; 2370 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2371 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2372 for(j=0;j<ins_local_primal_size;j++){ 2373 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2374 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2375 } 2376 } 2377 break; 2378 2379 } 2380 2381 case(GATHERS_BDDC): 2382 { 2383 2384 PetscMPIInt mysize,mysize2; 2385 2386 if(rank_prec_comm==active_rank) { 2387 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2388 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2389 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2390 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2391 /* arrays for values insertion */ 2392 ins_local_primal_size = pcbddc->coarse_size; 2393 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2394 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2395 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2396 localdispl2[0]=0; 2397 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2398 j=0; 2399 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2400 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2401 } 2402 2403 mysize=pcbddc->local_primal_size; 2404 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2405 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2406 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2407 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2408 } else { 2409 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2410 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2411 } 2412 2413 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2414 if(rank_prec_comm==active_rank) { 2415 PetscInt offset,offset2,row_ind,col_ind; 2416 for(j=0;j<ins_local_primal_size;j++){ 2417 ins_local_primal_indices[j]=j; 2418 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2419 } 2420 for(k=0;k<size_prec_comm;k++){ 2421 offset=pcbddc->local_primal_displacements[k]; 2422 offset2=localdispl2[k]; 2423 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2424 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2425 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2426 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2427 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2428 } 2429 } 2430 } 2431 } 2432 break; 2433 }/* switch on coarse problem and communications associated with finished */ 2434 } 2435 2436 /* Now create and fill up coarse matrix */ 2437 if( rank_prec_comm == active_rank ) { 2438 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2439 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2440 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2441 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2442 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2443 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2444 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2445 } else { 2446 Mat matis_coarse_local_mat; 2447 /* remind bs */ 2448 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2449 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2450 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2451 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2452 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2453 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2454 } 2455 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2456 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2457 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2458 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2459 2460 /* PetscViewer view_out; 2461 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 2462 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2463 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 2464 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 2465 2466 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2467 /* Preconditioner for coarse problem */ 2468 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2469 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2470 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2471 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2472 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2473 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2474 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2475 /* Allow user's customization */ 2476 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 2477 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2478 /* Set Up PC for coarse problem BDDC */ 2479 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2480 if(dbg_flag) { 2481 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2482 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2483 } 2484 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2485 } 2486 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2487 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2488 if(dbg_flag) { 2489 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2490 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2491 } 2492 } 2493 } 2494 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2495 IS local_IS,global_IS; 2496 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2497 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2498 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2499 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2500 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2501 } 2502 2503 2504 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2505 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2506 PetscScalar m_one=-1.0; 2507 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2508 const KSPType check_ksp_type=KSPGMRES; 2509 2510 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2511 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2512 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2513 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2514 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2515 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2516 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2517 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2518 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2519 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2520 if(dbg_flag) { 2521 kappa_2=lambda_max/lambda_min; 2522 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2523 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2524 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2525 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2526 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2527 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2528 } 2529 /* restore coarse ksp to default values */ 2530 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2531 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2532 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2533 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2534 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2535 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2536 } 2537 2538 /* free data structures no longer needed */ 2539 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2540 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2541 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2542 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2543 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2544 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2545 2546 PetscFunctionReturn(0); 2547 } 2548 2549 #undef __FUNCT__ 2550 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2551 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2552 { 2553 2554 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2555 PC_IS *pcis = (PC_IS*)pc->data; 2556 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2557 PCBDDCGraph mat_graph=pcbddc->mat_graph; 2558 PetscInt *queue_in_global_numbering; 2559 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2560 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 2561 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2562 PetscBool same_set; 2563 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2564 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2565 const PetscInt *neumann_nodes; 2566 const PetscInt *dirichlet_nodes; 2567 IS used_IS; 2568 PetscScalar *array; 2569 PetscScalar *array2; 2570 PetscViewer viewer=pcbddc->dbg_viewer; 2571 2572 PetscFunctionBegin; 2573 /* Setup local adjacency graph */ 2574 mat_graph->nvtxs=pcis->n; 2575 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 2576 i = mat_graph->nvtxs; 2577 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2578 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2579 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2580 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2581 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2582 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2583 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2584 2585 /* Setting dofs splitting in mat_graph->which_dof */ 2586 if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */ 2587 PetscInt *is_indices; 2588 PetscInt is_size; 2589 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2590 ierr = ISGetSize(pcbddc->ISForDofs[i],&is_size);CHKERRQ(ierr); 2591 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2592 for(j=0;j<is_size;j++) { 2593 mat_graph->which_dof[is_indices[j]]=i; 2594 } 2595 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2596 } 2597 /* use mat block size as vertex size */ 2598 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2599 } else { /* otherwise it assumes a constant block size */ 2600 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2601 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2602 for(s=0;s<bs;s++) { 2603 mat_graph->which_dof[i*bs+s]=s; 2604 } 2605 } 2606 vertex_size=1; 2607 } 2608 /* count number of neigh per node */ 2609 total_counts=0; 2610 for(i=1;i<pcis->n_neigh;i++){ 2611 s=pcis->n_shared[i]; 2612 total_counts+=s; 2613 for(j=0;j<s;j++){ 2614 mat_graph->count[pcis->shared[i][j]] += 1; 2615 } 2616 } 2617 /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */ 2618 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 2619 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2620 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2621 if(used_IS) { 2622 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 2623 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 2624 for(i=0;i<neumann_bsize;i++){ 2625 iindex = neumann_nodes[i]; 2626 if(mat_graph->count[iindex] > 1 && array[iindex]==0.0){ 2627 mat_graph->count[iindex]+=1; 2628 total_counts++; 2629 array[iindex]=array[iindex]+1.0; 2630 } else if(array[iindex]>0.0) { 2631 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 2632 } 2633 } 2634 } 2635 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2636 /* allocate space for storing the set of neighbours for each node */ 2637 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 2638 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 2639 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 2640 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2641 for(i=1;i<pcis->n_neigh;i++){ 2642 s=pcis->n_shared[i]; 2643 for(j=0;j<s;j++) { 2644 k=pcis->shared[i][j]; 2645 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2646 mat_graph->count[k]+=1; 2647 } 2648 } 2649 /* Check consistency of Neumann nodes */ 2650 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2651 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2652 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2653 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2654 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2655 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2656 /* set -1 fake neighbour to mimic Neumann boundary */ 2657 if(used_IS) { 2658 for(i=0;i<neumann_bsize;i++){ 2659 iindex = neumann_nodes[i]; 2660 if(mat_graph->count[iindex] > 1){ 2661 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 2662 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 2663 } 2664 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 2665 mat_graph->count[iindex]+=1; 2666 } 2667 } 2668 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 2669 } 2670 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2671 /* sort set of sharing subdomains */ 2672 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 2673 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 2674 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2675 nodes_touched=0; 2676 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 2677 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 2678 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2679 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 2680 if(used_IS) { 2681 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 2682 if(dirichlet_bsize && matis->pure_neumann) { 2683 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 2684 } 2685 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 2686 for(i=0;i<dirichlet_bsize;i++){ 2687 iindex=dirichlet_nodes[i]; 2688 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 2689 if(array[iindex]>0.0) { 2690 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 2691 } 2692 mat_graph->touched[iindex]=PETSC_TRUE; 2693 mat_graph->where[iindex]=0; 2694 nodes_touched++; 2695 array2[iindex]=array2[iindex]+1.0; 2696 } 2697 } 2698 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 2699 } 2700 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2701 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 2702 /* Check consistency of Dirichlet nodes */ 2703 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2704 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2705 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2706 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2707 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2708 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2709 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2710 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2711 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2712 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2713 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2714 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2715 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 2716 if(used_IS) { 2717 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 2718 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 2719 for(i=0;i<dirichlet_bsize;i++){ 2720 iindex=dirichlet_nodes[i]; 2721 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 2722 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 2723 } 2724 } 2725 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 2726 } 2727 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2728 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 2729 2730 for(i=0;i<mat_graph->nvtxs;i++){ 2731 if(!mat_graph->count[i]){ /* interior nodes */ 2732 mat_graph->touched[i]=PETSC_TRUE; 2733 mat_graph->where[i]=0; 2734 nodes_touched++; 2735 } 2736 } 2737 mat_graph->ncmps = 0; 2738 i=0; 2739 while(nodes_touched<mat_graph->nvtxs) { 2740 /* find first untouched node in local ordering */ 2741 while(mat_graph->touched[i]) i++; 2742 mat_graph->touched[i]=PETSC_TRUE; 2743 mat_graph->where[i]=where_values; 2744 nodes_touched++; 2745 /* now find all other nodes having the same set of sharing subdomains */ 2746 for(j=i+1;j<mat_graph->nvtxs;j++){ 2747 /* check for same number of sharing subdomains and dof number */ 2748 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2749 /* check for same set of sharing subdomains */ 2750 same_set=PETSC_TRUE; 2751 for(k=0;k<mat_graph->count[j];k++){ 2752 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 2753 same_set=PETSC_FALSE; 2754 } 2755 } 2756 /* I found a friend of mine */ 2757 if(same_set) { 2758 mat_graph->where[j]=where_values; 2759 mat_graph->touched[j]=PETSC_TRUE; 2760 nodes_touched++; 2761 } 2762 } 2763 } 2764 where_values++; 2765 } 2766 where_values--; if(where_values<0) where_values=0; 2767 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2768 /* Find connected components defined on the shared interface */ 2769 if(where_values) { 2770 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2771 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 2772 for(i=0;i<mat_graph->ncmps;i++) { 2773 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2774 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2775 } 2776 } 2777 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2778 for(i=0;i<where_values;i++) { 2779 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2780 if(mat_graph->where_ncmps[i]>1) { 2781 adapt_interface=1; 2782 break; 2783 } 2784 } 2785 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2786 if(pcbddc->dbg_flag && adapt_interface_reduced) { 2787 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 2788 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2789 } 2790 if(where_values && adapt_interface_reduced) { 2791 2792 2793 PetscInt sum_requests=0,my_rank; 2794 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2795 PetscInt temp_buffer_size,ins_val,global_where_counter; 2796 PetscInt *cum_recv_counts; 2797 PetscInt *where_to_nodes_indices; 2798 PetscInt *petsc_buffer; 2799 PetscMPIInt *recv_buffer; 2800 PetscMPIInt *recv_buffer_where; 2801 PetscMPIInt *send_buffer; 2802 PetscMPIInt size_of_send; 2803 PetscInt *sizes_of_sends; 2804 MPI_Request *send_requests; 2805 MPI_Request *recv_requests; 2806 PetscInt *where_cc_adapt; 2807 PetscInt **temp_buffer; 2808 PetscInt *nodes_to_temp_buffer_indices; 2809 PetscInt *add_to_where; 2810 2811 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2812 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2813 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2814 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2815 /* first count how many neighbours per connected component I will receive from */ 2816 cum_recv_counts[0]=0; 2817 for(i=1;i<where_values+1;i++){ 2818 j=0; 2819 while(mat_graph->where[j] != i) j++; 2820 where_to_nodes_indices[i-1]=j; 2821 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2822 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 2823 } 2824 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2825 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2826 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2827 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2828 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2829 for(i=0;i<cum_recv_counts[where_values];i++) { 2830 send_requests[i]=MPI_REQUEST_NULL; 2831 recv_requests[i]=MPI_REQUEST_NULL; 2832 } 2833 /* exchange with my neighbours the number of my connected components on the shared interface */ 2834 for(i=0;i<where_values;i++){ 2835 j=where_to_nodes_indices[i]; 2836 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 2837 for(;k<mat_graph->count[j];k++){ 2838 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2839 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2840 sum_requests++; 2841 } 2842 } 2843 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2844 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2845 /* determine the connected component I need to adapt */ 2846 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2847 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2848 for(i=0;i<where_values;i++){ 2849 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2850 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2851 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 2852 where_cc_adapt[i]=PETSC_TRUE; 2853 break; 2854 } 2855 } 2856 } 2857 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2858 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2859 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2860 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2861 sum_requests=0; 2862 start_of_send=0; 2863 start_of_recv=cum_recv_counts[where_values]; 2864 for(i=0;i<where_values;i++) { 2865 if(where_cc_adapt[i]) { 2866 size_of_send=0; 2867 for(j=i;j<mat_graph->ncmps;j++) { 2868 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2869 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2870 size_of_send+=1; 2871 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2872 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2873 } 2874 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2875 } 2876 } 2877 j = where_to_nodes_indices[i]; 2878 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 2879 for(;k<mat_graph->count[j];k++){ 2880 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2881 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2882 sum_requests++; 2883 } 2884 sizes_of_sends[i]=size_of_send; 2885 start_of_send+=size_of_send; 2886 } 2887 } 2888 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2889 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2890 buffer_size=0; 2891 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2892 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2893 /* now exchange the data */ 2894 start_of_recv=0; 2895 start_of_send=0; 2896 sum_requests=0; 2897 for(i=0;i<where_values;i++) { 2898 if(where_cc_adapt[i]) { 2899 size_of_send = sizes_of_sends[i]; 2900 j = where_to_nodes_indices[i]; 2901 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 2902 for(;k<mat_graph->count[j];k++){ 2903 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2904 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2905 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2906 start_of_recv+=size_of_recv; 2907 sum_requests++; 2908 } 2909 start_of_send+=size_of_send; 2910 } 2911 } 2912 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2913 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2914 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2915 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2916 for(j=0;j<buffer_size;) { 2917 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2918 k=petsc_buffer[j]+1; 2919 j+=k; 2920 } 2921 sum_requests=cum_recv_counts[where_values]; 2922 start_of_recv=0; 2923 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2924 global_where_counter=0; 2925 for(i=0;i<where_values;i++){ 2926 if(where_cc_adapt[i]){ 2927 temp_buffer_size=0; 2928 /* find nodes on the shared interface we need to adapt */ 2929 for(j=0;j<mat_graph->nvtxs;j++){ 2930 if(mat_graph->where[j]==i+1) { 2931 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2932 temp_buffer_size++; 2933 } else { 2934 nodes_to_temp_buffer_indices[j]=-1; 2935 } 2936 } 2937 /* allocate some temporary space */ 2938 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2939 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2940 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2941 for(j=1;j<temp_buffer_size;j++){ 2942 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2943 } 2944 /* analyze contributions from neighbouring subdomains for i-th conn comp 2945 temp buffer structure: 2946 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2947 3 neighs procs with structured connected components: 2948 neigh 0: [0 1 4], [2 3]; (2 connected components) 2949 neigh 1: [0 1], [2 3 4]; (2 connected components) 2950 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2951 tempbuffer (row-oriented) should be filled as: 2952 [ 0, 0, 0; 2953 0, 0, 1; 2954 1, 1, 2; 2955 1, 1, 2; 2956 0, 1, 0; ]; 2957 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2958 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2959 */ 2960 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2961 ins_val=0; 2962 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2963 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2964 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2965 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2966 } 2967 buffer_size+=k; 2968 ins_val++; 2969 } 2970 start_of_recv+=size_of_recv; 2971 sum_requests++; 2972 } 2973 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2974 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2975 for(j=0;j<temp_buffer_size;j++){ 2976 if(!add_to_where[j]){ /* found a new cc */ 2977 global_where_counter++; 2978 add_to_where[j]=global_where_counter; 2979 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2980 same_set=PETSC_TRUE; 2981 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2982 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2983 same_set=PETSC_FALSE; 2984 break; 2985 } 2986 } 2987 if(same_set) add_to_where[k]=global_where_counter; 2988 } 2989 } 2990 } 2991 /* insert new data in where array */ 2992 temp_buffer_size=0; 2993 for(j=0;j<mat_graph->nvtxs;j++){ 2994 if(mat_graph->where[j]==i+1) { 2995 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2996 temp_buffer_size++; 2997 } 2998 } 2999 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 3000 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 3001 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 3002 } 3003 } 3004 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 3005 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 3006 ierr = PetscFree(send_requests);CHKERRQ(ierr); 3007 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 3008 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 3009 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 3010 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 3011 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 3012 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 3013 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 3014 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 3015 if(global_where_counter) { 3016 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 3017 global_where_counter=0; 3018 for(i=0;i<mat_graph->nvtxs;i++){ 3019 if(mat_graph->where[i] && !mat_graph->touched[i]) { 3020 global_where_counter++; 3021 for(j=i+1;j<mat_graph->nvtxs;j++){ 3022 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 3023 mat_graph->where[j]=global_where_counter; 3024 mat_graph->touched[j]=PETSC_TRUE; 3025 } 3026 } 3027 mat_graph->where[i]=global_where_counter; 3028 mat_graph->touched[i]=PETSC_TRUE; 3029 } 3030 } 3031 where_values=global_where_counter; 3032 } 3033 if(global_where_counter) { 3034 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3035 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3036 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3037 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 3038 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 3039 for(i=0;i<mat_graph->ncmps;i++) { 3040 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 3041 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 3042 } 3043 } 3044 } /* Finished adapting interface */ 3045 PetscInt nfc=0; 3046 PetscInt nec=0; 3047 PetscInt nvc=0; 3048 PetscBool twodim_flag=PETSC_FALSE; 3049 for (i=0; i<mat_graph->ncmps; i++) { 3050 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3051 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */ 3052 nfc++; 3053 } else { /* note that nec will be zero in 2d */ 3054 nec++; 3055 } 3056 } else { 3057 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 3058 } 3059 } 3060 3061 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 3062 nec = nfc; 3063 nfc = 0; 3064 twodim_flag = PETSC_TRUE; 3065 } 3066 /* allocate IS arrays for faces, edges. Vertices need a single index set. 3067 Reusing space allocated in mat_graph->where for creating IS objects */ 3068 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 3069 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 3070 use_faces=PETSC_TRUE; 3071 } 3072 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 3073 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 3074 use_edges=PETSC_TRUE; 3075 } 3076 nfc=0; 3077 nec=0; 3078 for (i=0; i<mat_graph->ncmps; i++) { 3079 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3080 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 3081 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 3082 } 3083 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 3084 if(twodim_flag) { 3085 if(use_edges) { 3086 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3087 nec++; 3088 } 3089 } else { 3090 if(use_faces) { 3091 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 3092 nfc++; 3093 } 3094 } 3095 } else { 3096 if(use_edges) { 3097 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3098 nec++; 3099 } 3100 } 3101 } 3102 } 3103 pcbddc->n_ISForFaces=nfc; 3104 pcbddc->n_ISForEdges=nec; 3105 nvc=0; 3106 if( !pcbddc->constraints_flag ) { 3107 for (i=0; i<mat_graph->ncmps; i++) { 3108 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 3109 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 3110 mat_graph->where[nvc]=mat_graph->queue[j]; 3111 nvc++; 3112 } 3113 } 3114 } 3115 } 3116 /* sort vertex set (by local ordering) */ 3117 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 3118 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 3119 3120 if(pcbddc->dbg_flag) { 3121 3122 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3123 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3124 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3125 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 3126 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3127 for(i=0;i<mat_graph->nvtxs;i++) { 3128 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3129 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3130 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3131 } 3132 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3133 }*/ 3134 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3135 for(i=0;i<mat_graph->ncmps;i++) { 3136 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3137 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3138 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 3139 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 3140 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 3141 } 3142 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 3143 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3144 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 3145 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 3146 } 3147 } 3148 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3149 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3150 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3151 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3152 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3153 } 3154 3155 /* Free graph structure */ 3156 if(mat_graph->nvtxs){ 3157 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3158 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3159 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3160 } 3161 3162 PetscFunctionReturn(0); 3163 3164 } 3165 3166 /* -------------------------------------------------------------------------- */ 3167 3168 /* The following code has been adapted from function IsConnectedSubdomain contained 3169 in source file contig.c of METIS library (version 5.0.1) 3170 It finds connected components of each partition labeled from 1 to n_dist */ 3171 3172 #undef __FUNCT__ 3173 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3174 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3175 { 3176 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3177 PetscInt *xadj, *adjncy, *where, *queue; 3178 PetscInt *cptr; 3179 PetscBool *touched; 3180 3181 PetscFunctionBegin; 3182 3183 nvtxs = graph->nvtxs; 3184 xadj = graph->xadj; 3185 adjncy = graph->adjncy; 3186 where = graph->where; 3187 touched = graph->touched; 3188 queue = graph->queue; 3189 cptr = graph->cptr; 3190 3191 for (i=0; i<nvtxs; i++) 3192 touched[i] = PETSC_FALSE; 3193 3194 cum_queue=0; 3195 ncmps=0; 3196 3197 for(n=0; n<n_dist; n++) { 3198 pid = n+1; /* partition labeled by 0 is discarded */ 3199 nleft = 0; 3200 for (i=0; i<nvtxs; i++) { 3201 if (where[i] == pid) 3202 nleft++; 3203 } 3204 for (i=0; i<nvtxs; i++) { 3205 if (where[i] == pid) 3206 break; 3207 } 3208 touched[i] = PETSC_TRUE; 3209 queue[cum_queue] = i; 3210 first = 0; last = 1; 3211 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3212 ncmps_pid = 0; 3213 while (first != nleft) { 3214 if (first == last) { /* Find another starting vertex */ 3215 cptr[++ncmps] = first+cum_queue; 3216 ncmps_pid++; 3217 for (i=0; i<nvtxs; i++) { 3218 if (where[i] == pid && !touched[i]) 3219 break; 3220 } 3221 queue[cum_queue+last] = i; 3222 last++; 3223 touched[i] = PETSC_TRUE; 3224 } 3225 i = queue[cum_queue+first]; 3226 first++; 3227 for (j=xadj[i]; j<xadj[i+1]; j++) { 3228 k = adjncy[j]; 3229 if (where[k] == pid && !touched[k]) { 3230 queue[cum_queue+last] = k; 3231 last++; 3232 touched[k] = PETSC_TRUE; 3233 } 3234 } 3235 } 3236 cptr[++ncmps] = first+cum_queue; 3237 ncmps_pid++; 3238 cum_queue=cptr[ncmps]; 3239 graph->where_ncmps[n] = ncmps_pid; 3240 } 3241 graph->ncmps = ncmps; 3242 3243 PetscFunctionReturn(0); 3244 } 3245 3246