1 /* TODOLIST 2 DofSplitting and DM attached to pc. 3 Exact solvers: Solve local saddle point directly for very hard problems 4 - change prec_type to switch_inexact_prec_type 5 - add bool solve_exact_saddle_point slot to pdbddc data 6 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 7 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 8 - mind the problem with coarsening_factor 9 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 10 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 11 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 12 - Add levels' slot to bddc data structure and associated Set/Get functions 13 code refactoring: 14 - pick up better names for static functions 15 check log_summary for leaking (actually: 1 Vector per level ) 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 remove // commments and adhere to PETSc code requirements 21 man pages 22 */ 23 24 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 25 Implementation of BDDC preconditioner based on: 26 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 27 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 28 29 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 30 #include <petscblaslapack.h> 31 32 /* -------------------------------------------------------------------------- */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "PCSetFromOptions_BDDC" 35 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 36 { 37 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 38 PetscErrorCode ierr; 39 40 PetscFunctionBegin; 41 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 42 /* Verbose debugging of main data structures */ 43 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 44 /* Some customization for default primal space */ 45 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 47 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 48 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 49 /* Coarse solver context */ 50 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 51 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 52 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 53 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 55 ierr = PetscOptionsTail();CHKERRQ(ierr); 56 PetscFunctionReturn(0); 57 } 58 59 /* -------------------------------------------------------------------------- */ 60 EXTERN_C_BEGIN 61 #undef __FUNCT__ 62 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 63 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 64 { 65 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 66 67 PetscFunctionBegin; 68 pcbddc->coarse_problem_type = CPT; 69 PetscFunctionReturn(0); 70 } 71 EXTERN_C_END 72 73 /* -------------------------------------------------------------------------- */ 74 #undef __FUNCT__ 75 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 76 /*@ 77 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 78 79 Not collective 80 81 Input Parameters: 82 + pc - the preconditioning context 83 - CoarseProblemType - pick a better name and explain what this is 84 85 Level: intermediate 86 87 Notes: 88 Not collective but all procs must call this. 89 90 .seealso: PCBDDC 91 @*/ 92 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 93 { 94 PetscErrorCode ierr; 95 96 PetscFunctionBegin; 97 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 98 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 99 PetscFunctionReturn(0); 100 } 101 102 /* -------------------------------------------------------------------------- */ 103 EXTERN_C_BEGIN 104 #undef __FUNCT__ 105 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 106 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 107 { 108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 109 PetscErrorCode ierr; 110 111 PetscFunctionBegin; 112 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 113 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 114 pcbddc->DirichletBoundaries=DirichletBoundaries; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 /* -------------------------------------------------------------------------- */ 120 #undef __FUNCT__ 121 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 122 /*@ 123 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part of 124 Dirichlet boundaries for the global problem. 125 126 Not collective 127 128 Input Parameters: 129 + pc - the preconditioning context 130 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 131 132 Level: intermediate 133 134 Notes: 135 136 .seealso: PCBDDC 137 @*/ 138 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 139 { 140 PetscErrorCode ierr; 141 142 PetscFunctionBegin; 143 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 144 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 /* -------------------------------------------------------------------------- */ 148 EXTERN_C_BEGIN 149 #undef __FUNCT__ 150 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 151 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 152 { 153 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 154 PetscErrorCode ierr; 155 156 PetscFunctionBegin; 157 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 158 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 159 pcbddc->NeumannBoundaries=NeumannBoundaries; 160 PetscFunctionReturn(0); 161 } 162 EXTERN_C_END 163 164 /* -------------------------------------------------------------------------- */ 165 #undef __FUNCT__ 166 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 167 /*@ 168 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 169 Neumann boundaries for the global problem. 170 171 Not collective 172 173 Input Parameters: 174 + pc - the preconditioning context 175 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 176 177 Level: intermediate 178 179 Notes: 180 181 .seealso: PCBDDC 182 @*/ 183 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 184 { 185 PetscErrorCode ierr; 186 187 PetscFunctionBegin; 188 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 189 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 /* -------------------------------------------------------------------------- */ 193 EXTERN_C_BEGIN 194 #undef __FUNCT__ 195 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 196 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 197 { 198 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 199 200 PetscFunctionBegin; 201 *NeumannBoundaries = pcbddc->NeumannBoundaries; 202 PetscFunctionReturn(0); 203 } 204 EXTERN_C_END 205 206 /* -------------------------------------------------------------------------- */ 207 #undef __FUNCT__ 208 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 209 /*@ 210 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 211 Neumann boundaries for the global problem. 212 213 Not collective 214 215 Input Parameters: 216 + pc - the preconditioning context 217 218 Output Parameters: 219 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 220 221 Level: intermediate 222 223 Notes: 224 225 .seealso: PCBDDC 226 @*/ 227 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 228 { 229 PetscErrorCode ierr; 230 231 PetscFunctionBegin; 232 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 233 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 /* -------------------------------------------------------------------------- */ 237 EXTERN_C_BEGIN 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 240 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 241 { 242 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 243 244 PetscFunctionBegin; 245 *DirichletBoundaries = pcbddc->DirichletBoundaries; 246 PetscFunctionReturn(0); 247 } 248 EXTERN_C_END 249 250 /* -------------------------------------------------------------------------- */ 251 #undef __FUNCT__ 252 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 253 /*@ 254 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part of 255 Dirichlet boundaries for the global problem. 256 257 Not collective 258 259 Input Parameters: 260 + pc - the preconditioning context 261 262 Output Parameters: 263 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 264 265 Level: intermediate 266 267 Notes: 268 269 .seealso: PCBDDC 270 @*/ 271 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 272 { 273 PetscErrorCode ierr; 274 275 PetscFunctionBegin; 276 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 277 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 /* -------------------------------------------------------------------------- */ 282 EXTERN_C_BEGIN 283 #undef __FUNCT__ 284 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 285 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 286 { 287 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 288 PetscInt i; 289 PetscErrorCode ierr; 290 291 PetscFunctionBegin; 292 /* Destroy ISs if they were already set */ 293 for(i=0;i<pcbddc->n_ISForDofs;i++) { 294 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 295 } 296 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 297 298 /* allocate space then copy ISs */ 299 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 300 for(i=0;i<n_is;i++) { 301 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 302 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 303 } 304 pcbddc->n_ISForDofs=n_is; 305 306 PetscFunctionReturn(0); 307 } 308 EXTERN_C_END 309 310 /* -------------------------------------------------------------------------- */ 311 #undef __FUNCT__ 312 #define __FUNCT__ "PCBDDCSetDofsSplitting" 313 /*@ 314 PCBDDCSetDofsSplitting - Set index set defining how dofs are splitted. 315 316 Not collective 317 318 Input Parameters: 319 + pc - the preconditioning context 320 - n - number of index sets defining dofs spltting 321 - IS[] - array of IS describing dofs splitting 322 323 Level: intermediate 324 325 Notes: 326 Sequential ISs are copied, the user must destroy the array of IS passed in. 327 328 .seealso: PCBDDC 329 @*/ 330 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 331 { 332 PetscErrorCode ierr; 333 334 PetscFunctionBegin; 335 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 336 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 337 PetscFunctionReturn(0); 338 } 339 340 #undef __FUNCT__ 341 #define __FUNCT__ "PCSetUp_BDDC" 342 /* -------------------------------------------------------------------------- */ 343 /* 344 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 345 by setting data structures and options. 346 347 Input Parameter: 348 + pc - the preconditioner context 349 350 Application Interface Routine: PCSetUp() 351 352 Notes: 353 The interface routine PCSetUp() is not usually called directly by 354 the user, but instead is called by PCApply() if necessary. 355 */ 356 PetscErrorCode PCSetUp_BDDC(PC pc) 357 { 358 PetscErrorCode ierr; 359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 360 PC_IS *pcis = (PC_IS*)(pc->data); 361 362 PetscFunctionBegin; 363 if (!pc->setupcalled) { 364 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 365 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 366 Also, we decide to directly build the (same) Dirichlet problem */ 367 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 368 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 369 /* Set up all the "iterative substructuring" common block */ 370 ierr = PCISSetUp(pc);CHKERRQ(ierr); 371 /* Get stdout for dbg */ 372 if(pcbddc->dbg_flag) { 373 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 374 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 375 } 376 /* TODO MOVE CODE FRAGMENT */ 377 PetscInt im_active=0; 378 if(pcis->n) im_active = 1; 379 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 380 /* Analyze local interface */ 381 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 382 /* Set up local constraint matrix */ 383 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 384 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 385 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 386 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 387 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 388 } 389 PetscFunctionReturn(0); 390 } 391 392 /* -------------------------------------------------------------------------- */ 393 /* 394 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 395 396 Input Parameters: 397 . pc - the preconditioner context 398 . r - input vector (global) 399 400 Output Parameter: 401 . z - output vector (global) 402 403 Application Interface Routine: PCApply() 404 */ 405 #undef __FUNCT__ 406 #define __FUNCT__ "PCApply_BDDC" 407 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 408 { 409 PC_IS *pcis = (PC_IS*)(pc->data); 410 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 411 PetscErrorCode ierr; 412 const PetscScalar one = 1.0; 413 const PetscScalar m_one = -1.0; 414 415 /* This code is similar to that provided in nn.c for PCNN 416 NN interface preconditioner changed to BDDC 417 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 418 419 PetscFunctionBegin; 420 /* First Dirichlet solve */ 421 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 422 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 423 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 424 /* 425 Assembling right hand side for BDDC operator 426 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 427 - the interface part of the global vector z 428 */ 429 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 430 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 431 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 432 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 433 ierr = VecCopy(r,z);CHKERRQ(ierr); 434 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 435 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 436 437 /* 438 Apply interface preconditioner 439 Results are stored in: 440 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 441 - the interface part of the global vector z 442 */ 443 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 444 445 /* Second Dirichlet solve and assembling of output */ 446 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 447 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 448 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 449 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 450 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 451 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 452 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 453 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 454 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 456 457 PetscFunctionReturn(0); 458 459 } 460 461 /* -------------------------------------------------------------------------- */ 462 /* 463 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 464 465 */ 466 #undef __FUNCT__ 467 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 468 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 469 { 470 PetscErrorCode ierr; 471 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 472 PC_IS* pcis = (PC_IS*) (pc->data); 473 const PetscScalar zero = 0.0; 474 475 PetscFunctionBegin; 476 /* Get Local boundary and apply partition of unity */ 477 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 480 481 /* Application of PHI^T */ 482 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 483 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 484 485 /* Scatter data of coarse_rhs */ 486 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 487 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 489 /* Local solution on R nodes */ 490 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 491 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 492 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 493 if(pcbddc->prec_type) { 494 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 495 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 } 497 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 498 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 499 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 500 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 501 if(pcbddc->prec_type) { 502 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 503 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 504 } 505 506 /* Coarse solution */ 507 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 508 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 509 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 512 /* Sum contributions from two levels */ 513 /* Apply partition of unity and sum boundary values */ 514 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 515 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 516 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 517 ierr = VecSet(z,zero);CHKERRQ(ierr); 518 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 519 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 520 521 PetscFunctionReturn(0); 522 } 523 524 525 /* -------------------------------------------------------------------------- */ 526 /* 527 PCBDDCSolveSaddlePoint 528 529 */ 530 #undef __FUNCT__ 531 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 532 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 533 { 534 PetscErrorCode ierr; 535 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 536 537 PetscFunctionBegin; 538 539 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 540 if(pcbddc->n_constraints) { 541 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 542 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 543 } 544 545 PetscFunctionReturn(0); 546 } 547 /* -------------------------------------------------------------------------- */ 548 /* 549 PCBDDCScatterCoarseDataBegin 550 551 */ 552 #undef __FUNCT__ 553 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 554 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 555 { 556 PetscErrorCode ierr; 557 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 558 559 PetscFunctionBegin; 560 561 switch(pcbddc->coarse_communications_type){ 562 case SCATTERS_BDDC: 563 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 564 break; 565 case GATHERS_BDDC: 566 break; 567 } 568 PetscFunctionReturn(0); 569 570 } 571 /* -------------------------------------------------------------------------- */ 572 /* 573 PCBDDCScatterCoarseDataEnd 574 575 */ 576 #undef __FUNCT__ 577 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 578 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 579 { 580 PetscErrorCode ierr; 581 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 582 PetscScalar* array_to; 583 PetscScalar* array_from; 584 MPI_Comm comm=((PetscObject)pc)->comm; 585 PetscInt i; 586 587 PetscFunctionBegin; 588 589 switch(pcbddc->coarse_communications_type){ 590 case SCATTERS_BDDC: 591 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 592 break; 593 case GATHERS_BDDC: 594 if(vec_from) VecGetArray(vec_from,&array_from); 595 if(vec_to) VecGetArray(vec_to,&array_to); 596 switch(pcbddc->coarse_problem_type){ 597 case SEQUENTIAL_BDDC: 598 if(smode == SCATTER_FORWARD) { 599 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 600 if(vec_to) { 601 for(i=0;i<pcbddc->replicated_primal_size;i++) 602 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 603 } 604 } else { 605 if(vec_from) 606 for(i=0;i<pcbddc->replicated_primal_size;i++) 607 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 608 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 609 } 610 break; 611 case REPLICATED_BDDC: 612 if(smode == SCATTER_FORWARD) { 613 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 614 for(i=0;i<pcbddc->replicated_primal_size;i++) 615 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 616 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 617 for(i=0;i<pcbddc->local_primal_size;i++) 618 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 619 } 620 break; 621 case MULTILEVEL_BDDC: 622 break; 623 case PARALLEL_BDDC: 624 break; 625 } 626 if(vec_from) VecRestoreArray(vec_from,&array_from); 627 if(vec_to) VecRestoreArray(vec_to,&array_to); 628 break; 629 } 630 PetscFunctionReturn(0); 631 632 } 633 634 /* -------------------------------------------------------------------------- */ 635 /* 636 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 637 that was created with PCCreate_BDDC(). 638 639 Input Parameter: 640 . pc - the preconditioner context 641 642 Application Interface Routine: PCDestroy() 643 */ 644 #undef __FUNCT__ 645 #define __FUNCT__ "PCDestroy_BDDC" 646 PetscErrorCode PCDestroy_BDDC(PC pc) 647 { 648 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 649 PetscErrorCode ierr; 650 651 PetscFunctionBegin; 652 /* free data created by PCIS */ 653 ierr = PCISDestroy(pc);CHKERRQ(ierr); 654 /* free BDDC data */ 655 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 656 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 657 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 658 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 659 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 660 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 661 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 662 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 663 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 664 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 665 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 666 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 667 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 668 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 669 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 670 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 671 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 672 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 673 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 674 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 675 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 676 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 677 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 678 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 679 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 680 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 681 PetscInt i; 682 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 683 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 684 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 685 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 686 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 687 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 688 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 689 /* Free the private data structure that was hanging off the PC */ 690 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } 693 694 /* -------------------------------------------------------------------------- */ 695 /*MC 696 PCBDDC - Balancing Domain Decomposition by Constraints. 697 698 Options Database Keys: 699 . -pcbddc ??? - 700 701 Level: intermediate 702 703 Notes: The matrix used with this preconditioner must be of type MATIS 704 705 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 706 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 707 on the subdomains). 708 709 Options for the coarse grid preconditioner can be set with - 710 Options for the Dirichlet subproblem can be set with - 711 Options for the Neumann subproblem can be set with - 712 713 Contributed by Stefano Zampini 714 715 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 716 M*/ 717 EXTERN_C_BEGIN 718 #undef __FUNCT__ 719 #define __FUNCT__ "PCCreate_BDDC" 720 PetscErrorCode PCCreate_BDDC(PC pc) 721 { 722 PetscErrorCode ierr; 723 PC_BDDC *pcbddc; 724 725 PetscFunctionBegin; 726 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 727 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 728 pc->data = (void*)pcbddc; 729 /* create PCIS data structure */ 730 ierr = PCISCreate(pc);CHKERRQ(ierr); 731 /* BDDC specific */ 732 pcbddc->coarse_vec = 0; 733 pcbddc->coarse_rhs = 0; 734 pcbddc->coarse_ksp = 0; 735 pcbddc->coarse_phi_B = 0; 736 pcbddc->coarse_phi_D = 0; 737 pcbddc->vec1_P = 0; 738 pcbddc->vec1_R = 0; 739 pcbddc->vec2_R = 0; 740 pcbddc->local_auxmat1 = 0; 741 pcbddc->local_auxmat2 = 0; 742 pcbddc->R_to_B = 0; 743 pcbddc->R_to_D = 0; 744 pcbddc->ksp_D = 0; 745 pcbddc->ksp_R = 0; 746 pcbddc->local_primal_indices = 0; 747 pcbddc->prec_type = PETSC_FALSE; 748 pcbddc->NeumannBoundaries = 0; 749 pcbddc->ISForDofs = 0; 750 pcbddc->ISForVertices = 0; 751 pcbddc->n_ISForFaces = 0; 752 pcbddc->n_ISForEdges = 0; 753 pcbddc->ConstraintMatrix = 0; 754 pcbddc->use_nnsp_true = PETSC_FALSE; 755 pcbddc->local_primal_sizes = 0; 756 pcbddc->local_primal_displacements = 0; 757 pcbddc->replicated_local_primal_indices = 0; 758 pcbddc->replicated_local_primal_values = 0; 759 pcbddc->coarse_loc_to_glob = 0; 760 pcbddc->dbg_flag = PETSC_FALSE; 761 pcbddc->coarsening_ratio = 8; 762 /* function pointers */ 763 pc->ops->apply = PCApply_BDDC; 764 pc->ops->applytranspose = 0; 765 pc->ops->setup = PCSetUp_BDDC; 766 pc->ops->destroy = PCDestroy_BDDC; 767 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 768 pc->ops->view = 0; 769 pc->ops->applyrichardson = 0; 770 pc->ops->applysymmetricleft = 0; 771 pc->ops->applysymmetricright = 0; 772 /* composing function */ 773 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 774 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 776 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 777 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 778 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 779 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 780 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 781 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 782 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 783 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 784 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 EXTERN_C_END 788 789 /* -------------------------------------------------------------------------- */ 790 /* 791 Create C matrix [I 0; 0 const] 792 */ 793 #ifdef BDDC_USE_POD 794 #if !defined(PETSC_MISSING_LAPACK_GESVD) 795 #define PETSC_MISSING_LAPACK_GESVD 1 796 #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 797 #endif 798 #endif 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 802 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 803 { 804 PetscErrorCode ierr; 805 PC_IS* pcis = (PC_IS*)(pc->data); 806 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 807 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 808 PetscInt *nnz,*vertices,*is_indices; 809 PetscScalar *temp_quadrature_constraint; 810 PetscInt *temp_indices,*temp_indices_to_constraint; 811 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 812 PetscInt n_constraints,n_vertices,size_of_constraint; 813 PetscReal quad_value; 814 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 815 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 816 IS *used_IS; 817 const MatType impMatType=MATSEQAIJ; 818 PetscBLASInt Bs,Bt,lwork,lierr; 819 PetscReal tol=1.0e-8; 820 MatNullSpace nearnullsp; 821 const Vec *nearnullvecs; 822 Vec *localnearnullsp; 823 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 824 PetscReal *rwork,*singular_vals; 825 PetscBLASInt Bone=1; 826 /* some ugly conditional declarations */ 827 #if defined(PETSC_MISSING_LAPACK_GESVD) 828 PetscScalar dot_result; 829 PetscScalar one=1.0,zero=0.0; 830 PetscInt ii; 831 #if defined(PETSC_USE_COMPLEX) 832 PetscScalar val1,val2; 833 #endif 834 #else 835 PetscBLASInt dummy_int; 836 PetscScalar dummy_scalar; 837 #endif 838 839 PetscFunctionBegin; 840 /* check if near null space is attached to global mat */ 841 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 842 if (nearnullsp) { 843 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 844 } else { /* if near null space is not provided it uses constants */ 845 nnsp_has_cnst = PETSC_TRUE; 846 use_nnsp_true = PETSC_TRUE; 847 } 848 if(nnsp_has_cnst) { 849 nnsp_addone = 1; 850 } 851 /* 852 Evaluate maximum storage size needed by the procedure 853 - temp_indices will contain start index of each constraint stored as follows 854 - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 855 - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 856 */ 857 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 858 total_counts *= (nnsp_addone+nnsp_size); 859 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 860 total_counts = 0; 861 max_size_of_constraint = 0; 862 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 863 if(i<pcbddc->n_ISForEdges){ 864 used_IS = &pcbddc->ISForEdges[i]; 865 } else { 866 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 867 } 868 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 869 total_counts += j; 870 if(j>max_size_of_constraint) max_size_of_constraint=j; 871 } 872 total_counts *= (nnsp_addone+nnsp_size); 873 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 874 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 875 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 876 rwork = 0; 877 work = 0; 878 singular_vals = 0; 879 temp_basis = 0; 880 correlation_mat = 0; 881 if(!pcbddc->use_nnsp_true) { 882 PetscScalar temp_work; 883 #if defined(PETSC_MISSING_LAPACK_GESVD) 884 /* POD */ 885 PetscInt max_n; 886 max_n = nnsp_addone+nnsp_size; 887 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 888 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 889 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 890 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 891 #if defined(PETSC_USE_COMPLEX) 892 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 893 #endif 894 /* now we evaluate the optimal workspace using query with lwork=-1 */ 895 Bt = PetscBLASIntCast(max_n); 896 lwork=-1; 897 #if !defined(PETSC_USE_COMPLEX) 898 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 899 #else 900 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 901 #endif 902 #else /* on missing GESVD */ 903 /* SVD */ 904 PetscInt max_n,min_n; 905 max_n = max_size_of_constraint; 906 min_n = nnsp_addone+nnsp_size; 907 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 908 min_n = max_size_of_constraint; 909 max_n = nnsp_addone+nnsp_size; 910 } 911 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 912 #if defined(PETSC_USE_COMPLEX) 913 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 914 #endif 915 /* now we evaluate the optimal workspace using query with lwork=-1 */ 916 lwork=-1; 917 Bs = PetscBLASIntCast(max_n); 918 Bt = PetscBLASIntCast(min_n); 919 dummy_int = Bs; 920 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 921 #if !defined(PETSC_USE_COMPLEX) 922 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 923 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 924 #else 925 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 926 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 927 #endif 928 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 930 #endif 931 /* Allocate optimal workspace */ 932 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 933 total_counts = (PetscInt)lwork; 934 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 935 } 936 /* get local part of global near null space vectors */ 937 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 938 for(k=0;k<nnsp_size;k++) { 939 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 940 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 } 943 /* Now we can loop on constraining sets */ 944 total_counts=0; 945 temp_indices[0]=0; 946 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 947 if(i<pcbddc->n_ISForEdges){ 948 used_IS = &pcbddc->ISForEdges[i]; 949 } else { 950 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 951 } 952 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 953 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 954 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 955 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 956 if(nnsp_has_cnst) { 957 temp_constraints++; 958 quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint); 959 for(j=0;j<size_of_constraint;j++) { 960 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 961 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 962 } 963 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 964 total_counts++; 965 } 966 for(k=0;k<nnsp_size;k++) { 967 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 968 for(j=0;j<size_of_constraint;j++) { 969 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 970 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 971 } 972 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 973 quad_value = 1.0; 974 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 975 Bs = PetscBLASIntCast(size_of_constraint); 976 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 977 } 978 if ( quad_value > 0.0 ) { /* keep indices and values */ 979 temp_constraints++; 980 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 981 total_counts++; 982 } 983 } 984 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 985 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 986 if(!use_nnsp_true) { 987 988 Bs = PetscBLASIntCast(size_of_constraint); 989 Bt = PetscBLASIntCast(temp_constraints); 990 991 #if defined(PETSC_MISSING_LAPACK_GESVD) 992 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 993 /* Store upper triangular part of correlation matrix */ 994 for(j=0;j<temp_constraints;j++) { 995 for(k=0;k<j+1;k++) { 996 #if defined(PETSC_USE_COMPLEX) 997 /* hand made complex dot product */ 998 dot_result = 0.0; 999 for (ii=0; ii<size_of_constraint; ii++) { 1000 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 1001 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 1002 dot_result += val1*PetscConj(val2); 1003 } 1004 #else 1005 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 1006 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 1007 #endif 1008 correlation_mat[j*temp_constraints+k]=dot_result; 1009 } 1010 } 1011 #if !defined(PETSC_USE_COMPLEX) 1012 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 1013 #else 1014 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 1015 #endif 1016 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 1017 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 1018 j=0; 1019 while( j < Bt && singular_vals[j] < tol) j++; 1020 total_counts=total_counts-j; 1021 if(j<temp_constraints) { 1022 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 1023 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 1024 /* copy POD basis into used quadrature memory */ 1025 for(k=0;k<Bt-j;k++) { 1026 for(ii=0;ii<size_of_constraint;ii++) { 1027 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 1028 } 1029 } 1030 } 1031 1032 #else /* on missing GESVD */ 1033 1034 PetscInt min_n = temp_constraints; 1035 if(min_n > size_of_constraint) min_n = size_of_constraint; 1036 dummy_int = Bs; 1037 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1038 #if !defined(PETSC_USE_COMPLEX) 1039 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1040 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 1041 #else 1042 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1043 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 1044 #endif 1045 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 1046 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1047 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 1048 j=0; 1049 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 1050 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 1051 1052 #endif 1053 } 1054 } 1055 n_constraints=total_counts; 1056 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 1057 local_primal_size = n_vertices+n_constraints; 1058 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1059 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1060 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1061 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1062 for(i=0;i<n_vertices;i++) { nnz[i]= 1; } 1063 for(i=0;i<n_constraints;i++) { nnz[i+n_vertices]=temp_indices[i+1]-temp_indices[i]; } 1064 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1065 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1066 for(i=0;i<n_vertices;i++) { ierr = MatSetValue(pcbddc->ConstraintMatrix,i,vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); } 1067 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1068 for(i=0;i<n_constraints;i++) { 1069 j=i+n_vertices; 1070 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1071 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&j,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1072 } 1073 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1074 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1075 /* set quantities in pcbddc data structure */ 1076 pcbddc->n_vertices = n_vertices; 1077 pcbddc->n_constraints = n_constraints; 1078 pcbddc->local_primal_size = n_vertices+n_constraints; 1079 /* free workspace no longer needed */ 1080 ierr = PetscFree(rwork);CHKERRQ(ierr); 1081 ierr = PetscFree(work);CHKERRQ(ierr); 1082 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1083 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1084 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1085 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1086 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1087 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1088 ierr = PetscFree(nnz);CHKERRQ(ierr); 1089 for(k=0;k<nnsp_size;k++) { ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); } 1090 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD 1094 #undef PETSC_MISSING_LAPACK_GESVD 1095 #endif 1096 1097 /* -------------------------------------------------------------------------- */ 1098 /* 1099 PCBDDCCoarseSetUp - 1100 */ 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "PCBDDCCoarseSetUp" 1103 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1104 { 1105 PetscErrorCode ierr; 1106 1107 PC_IS* pcis = (PC_IS*)(pc->data); 1108 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1109 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1110 IS is_R_local; 1111 IS is_V_local; 1112 IS is_C_local; 1113 IS is_aux1; 1114 IS is_aux2; 1115 const VecType impVecType; 1116 const MatType impMatType; 1117 PetscInt n_R=0; 1118 PetscInt n_D=0; 1119 PetscInt n_B=0; 1120 PetscScalar zero=0.0; 1121 PetscScalar one=1.0; 1122 PetscScalar m_one=-1.0; 1123 PetscScalar* array; 1124 PetscScalar *coarse_submat_vals; 1125 PetscInt *idx_R_local; 1126 PetscInt *idx_V_B; 1127 PetscScalar *coarsefunctions_errors; 1128 PetscScalar *constraints_errors; 1129 /* auxiliary indices */ 1130 PetscInt s,i,j,k; 1131 /* for verbose output of bddc */ 1132 PetscViewer viewer=pcbddc->dbg_viewer; 1133 PetscBool dbg_flag=pcbddc->dbg_flag; 1134 /* for counting coarse dofs */ 1135 PetscScalar coarsesum; 1136 PetscInt n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints; 1137 PetscInt size_of_constraint; 1138 PetscInt *row_cmat_indices; 1139 PetscScalar *row_cmat_values; 1140 const PetscInt *vertices; 1141 1142 PetscFunctionBegin; 1143 /* Set Non-overlapping dimensions */ 1144 n_B = pcis->n_B; n_D = pcis->n - n_B; 1145 ierr = ISGetIndices(pcbddc->ISForVertices,&vertices);CHKERRQ(ierr); 1146 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */ 1147 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1148 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1149 for(i=0;i<n_vertices;i++) { array[ vertices[i] ] = one; } 1150 1151 for(i=0;i<n_constraints;i++) { 1152 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1153 for (j=0; j<size_of_constraint; j++) { 1154 k = row_cmat_indices[j]; 1155 if( array[k] == zero ) { 1156 array[k] = one; 1157 break; 1158 } 1159 } 1160 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1161 } 1162 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1163 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1164 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1166 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1167 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1169 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 1170 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1171 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1172 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1173 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1174 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1175 pcbddc->coarse_size = (PetscInt) coarsesum; 1176 1177 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1178 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1179 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1180 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1181 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1182 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1183 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1184 if(dbg_flag) { 1185 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1186 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1187 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1188 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1189 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1190 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1191 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1192 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 1193 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1194 } 1195 /* Allocate needed vectors */ 1196 /* Set Mat type for local matrices needed by BDDC precondtioner */ 1197 impMatType = MATSEQDENSE; 1198 impVecType = VECSEQ; 1199 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1200 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 1201 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1202 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1203 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1204 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1205 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1206 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1207 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1208 1209 /* Creating some index sets needed */ 1210 /* For submatrices */ 1211 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 1212 if(n_vertices) { 1213 ierr = ISDuplicate(pcbddc->ISForVertices,&is_V_local);CHKERRQ(ierr); 1214 ierr = ISCopy(pcbddc->ISForVertices,is_V_local);CHKERRQ(ierr); 1215 } 1216 if(n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); } 1217 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1218 { 1219 PetscInt *aux_array1; 1220 PetscInt *aux_array2; 1221 PetscScalar value; 1222 1223 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1224 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1225 1226 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1227 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1233 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1234 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 1235 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1236 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1237 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1238 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 1239 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1240 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1241 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1242 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1243 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1244 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1245 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1246 1247 if(pcbddc->prec_type || dbg_flag ) { 1248 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1249 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1250 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 1251 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1252 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1253 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1254 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1255 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1256 } 1257 1258 /* Check scatters */ 1259 if(dbg_flag) { 1260 1261 Vec vec_aux; 1262 1263 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1264 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 1265 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1266 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1267 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1268 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1269 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1270 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1272 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1273 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1274 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1275 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1276 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1277 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1278 1279 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1280 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1281 ierr = VecDuplicate(pcis->vec1_B,&vec_aux);CHKERRQ(ierr); 1282 ierr = VecCopy(pcis->vec1_B,vec_aux);CHKERRQ(ierr); 1283 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1284 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1285 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1286 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1287 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B);CHKERRQ(ierr); 1288 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1289 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1290 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1291 1292 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1293 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 1294 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1295 1296 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1297 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1298 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1299 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1300 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1301 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1302 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1303 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1304 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1305 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1306 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1307 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1308 1309 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1310 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1311 ierr = VecDuplicate(pcis->vec1_D,&vec_aux);CHKERRQ(ierr); 1312 ierr = VecCopy(pcis->vec1_D,vec_aux);CHKERRQ(ierr); 1313 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1314 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1316 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1317 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D);CHKERRQ(ierr); 1318 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1320 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1321 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1322 1323 } 1324 } 1325 1326 /* vertices in boundary numbering */ 1327 if(n_vertices) { 1328 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1329 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1330 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1331 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1332 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1335 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1336 for (i=0; i<n_vertices; i++) { 1337 s=0; 1338 while (array[s] != i ) {s++;} 1339 idx_V_B[i]=s; 1340 } 1341 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1342 } 1343 1344 1345 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1346 { 1347 Mat A_RR; 1348 PC pc_temp; 1349 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1350 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1351 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1352 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1353 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1354 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1355 /* default */ 1356 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1357 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1358 /* Allow user's customization */ 1359 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1360 /* Set Up KSP for Dirichlet problem of BDDC */ 1361 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1362 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1363 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1364 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1365 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1366 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1367 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1368 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1369 /* default */ 1370 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1371 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1372 /* Allow user's customization */ 1373 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1374 /* Set Up KSP for Neumann problem of BDDC */ 1375 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1376 /* check Dirichlet and Neumann solvers */ 1377 if(pcbddc->dbg_flag) { 1378 Vec temp_vec; 1379 PetscScalar value; 1380 1381 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1382 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1383 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1384 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1385 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1386 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1387 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1388 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1389 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1390 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1391 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1392 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1393 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1394 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1395 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1396 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1397 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1398 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1399 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1400 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1401 } 1402 /* free Neumann problem's matrix */ 1403 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1404 } 1405 1406 /* Assemble all remaining stuff needed to apply BDDC */ 1407 { 1408 Mat A_RV,A_VR,A_VV; 1409 Mat M1,M2; 1410 Mat C_CR; 1411 Mat AUXMAT; 1412 Vec vec1_C; 1413 Vec vec2_C; 1414 Vec vec1_V; 1415 Vec vec2_V; 1416 PetscInt *nnz; 1417 PetscInt *auxindices; 1418 PetscInt index; 1419 PetscScalar* array2; 1420 MatFactorInfo matinfo; 1421 1422 /* Allocating some extra storage just to be safe */ 1423 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1424 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1425 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1426 1427 /* some work vectors on vertices and/or constraints */ 1428 if(n_vertices) { 1429 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1430 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1431 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1432 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1433 } 1434 if(pcbddc->n_constraints) { 1435 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1436 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1437 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1438 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1439 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1440 } 1441 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1442 if(n_constraints) { 1443 /* some work vectors */ 1444 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1445 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1446 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1447 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1448 1449 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1450 for(i=0;i<n_constraints;i++) { 1451 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1452 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1453 /* Get row of constraint matrix in R numbering */ 1454 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1455 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1456 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1457 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1458 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1459 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1460 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1461 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1462 /* Solve for row of constraint matrix in R numbering */ 1463 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1464 /* Set values */ 1465 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1466 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1467 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1468 } 1469 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1470 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1471 1472 /* Create Constraint matrix on R nodes: C_{CR} */ 1473 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1474 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1475 1476 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1477 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1478 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1479 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1480 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1481 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1482 1483 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1484 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1485 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1486 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1487 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1488 for(i=0;i<n_constraints;i++) { 1489 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1490 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1491 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1492 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1493 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1494 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1495 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1496 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1497 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1498 } 1499 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1500 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1501 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1502 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1503 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1504 1505 } 1506 1507 /* Get submatrices from subdomain matrix */ 1508 if(n_vertices){ 1509 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1510 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1511 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1512 /* Assemble M2 = A_RR^{-1}A_RV */ 1513 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1514 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1515 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1516 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1517 for(i=0;i<n_vertices;i++) { 1518 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1519 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1520 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1521 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1522 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1523 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1524 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1525 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1526 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1527 } 1528 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1529 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1530 } 1531 1532 /* Matrix of coarse basis functions (local) */ 1533 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1534 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1535 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1536 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1537 if(pcbddc->prec_type || dbg_flag ) { 1538 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1539 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1540 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1541 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1542 } 1543 1544 if(dbg_flag) { 1545 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1546 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1547 } 1548 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1549 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1550 1551 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1552 for(i=0;i<n_vertices;i++){ 1553 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1554 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1555 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1556 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1557 /* solution of saddle point problem */ 1558 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1559 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1560 if(n_constraints) { 1561 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1562 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1563 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1564 } 1565 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1566 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1567 1568 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1569 /* coarse basis functions */ 1570 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1571 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1574 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1575 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1576 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1577 if( pcbddc->prec_type || dbg_flag ) { 1578 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1581 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1582 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1583 } 1584 /* subdomain contribution to coarse matrix */ 1585 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1586 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } //WARNING -> column major ordering 1587 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1588 if(n_constraints) { 1589 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1590 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } //WARNING -> column major ordering 1591 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1592 } 1593 1594 if( dbg_flag ) { 1595 /* assemble subdomain vector on nodes */ 1596 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1597 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1598 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1599 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1600 array[ vertices[i] ] = one; 1601 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1602 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1603 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1604 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1605 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1606 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1607 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1608 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1609 if(n_constraints) { 1610 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1611 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1612 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1613 } 1614 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1615 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1616 /* check saddle point solution */ 1617 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1618 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1619 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 1620 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1621 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1622 array[i]=array[i]+m_one; /* shift by the identity matrix */ 1623 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1624 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 1625 } 1626 } 1627 1628 for(i=0;i<n_constraints;i++){ 1629 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1630 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1631 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1632 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1633 /* solution of saddle point problem */ 1634 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1635 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1636 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1637 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1638 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1639 /* coarse basis functions */ 1640 index=i+n_vertices; 1641 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1642 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1645 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1646 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1647 if( pcbddc->prec_type || dbg_flag ) { 1648 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1649 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1651 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1652 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1653 } 1654 /* subdomain contribution to coarse matrix */ 1655 if(n_vertices) { 1656 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1657 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1658 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1659 } 1660 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1661 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} //WARNING -> column major ordering 1662 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1663 1664 if( dbg_flag ) { 1665 /* assemble subdomain vector on nodes */ 1666 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1667 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1668 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1669 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1670 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1671 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1672 /* assemble subdomain vector of lagrange multipliers */ 1673 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1674 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1675 if( n_vertices) { 1676 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1677 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 1678 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1679 } 1680 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1681 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 1682 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1683 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1684 /* check saddle point solution */ 1685 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1686 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1687 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1688 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1689 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1690 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1691 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1692 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1693 } 1694 } 1695 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1696 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 if( pcbddc->prec_type || dbg_flag ) { 1698 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 } 1701 /* Checking coarse_sub_mat and coarse basis functios */ 1702 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1703 if(dbg_flag) { 1704 1705 Mat coarse_sub_mat; 1706 Mat TM1,TM2,TM3,TM4; 1707 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1708 const MatType checkmattype=MATSEQAIJ; 1709 PetscScalar value; 1710 1711 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1712 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1713 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1714 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1715 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1716 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1717 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1718 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1719 1720 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1721 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1722 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1723 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1724 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1725 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1726 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1727 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1728 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1729 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1730 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1731 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1732 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1733 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1734 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1735 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1736 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1737 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1738 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1740 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1741 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1742 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1743 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1744 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1745 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1746 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1747 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1748 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1749 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1750 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1751 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1752 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1753 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1754 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1755 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1756 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1757 } 1758 1759 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1760 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1761 /* free memory */ 1762 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1763 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1764 ierr = PetscFree(nnz);CHKERRQ(ierr); 1765 if(n_vertices) { 1766 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1767 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1768 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1769 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1770 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1771 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1772 } 1773 if(pcbddc->n_constraints) { 1774 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1775 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1776 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1777 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1778 } 1779 } 1780 /* free memory */ 1781 if(n_vertices) { 1782 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1783 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1784 } 1785 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1786 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1787 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1788 1789 PetscFunctionReturn(0); 1790 } 1791 1792 /* -------------------------------------------------------------------------- */ 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1796 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1797 { 1798 1799 1800 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1801 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1802 PC_IS *pcis = (PC_IS*)pc->data; 1803 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1804 MPI_Comm coarse_comm; 1805 1806 /* common to all choiches */ 1807 PetscScalar *temp_coarse_mat_vals; 1808 PetscScalar *ins_coarse_mat_vals; 1809 PetscInt *ins_local_primal_indices; 1810 PetscMPIInt *localsizes2,*localdispl2; 1811 PetscMPIInt size_prec_comm; 1812 PetscMPIInt rank_prec_comm; 1813 PetscMPIInt active_rank=MPI_PROC_NULL; 1814 PetscMPIInt master_proc=0; 1815 PetscInt ins_local_primal_size; 1816 /* specific to MULTILEVEL_BDDC */ 1817 PetscMPIInt *ranks_recv; 1818 PetscMPIInt count_recv=0; 1819 PetscMPIInt rank_coarse_proc_send_to; 1820 PetscMPIInt coarse_color = MPI_UNDEFINED; 1821 ISLocalToGlobalMapping coarse_ISLG; 1822 /* some other variables */ 1823 PetscErrorCode ierr; 1824 const MatType coarse_mat_type; 1825 const PCType coarse_pc_type; 1826 const KSPType coarse_ksp_type; 1827 PC pc_temp; 1828 PetscInt i,j,k,bs; 1829 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 1830 /* verbose output viewer */ 1831 PetscViewer viewer=pcbddc->dbg_viewer; 1832 PetscBool dbg_flag=pcbddc->dbg_flag; 1833 1834 PetscFunctionBegin; 1835 1836 ins_local_primal_indices = 0; 1837 ins_coarse_mat_vals = 0; 1838 localsizes2 = 0; 1839 localdispl2 = 0; 1840 temp_coarse_mat_vals = 0; 1841 coarse_ISLG = 0; 1842 1843 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1844 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1845 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1846 1847 /* Assign global numbering to coarse dofs */ 1848 { 1849 PetscScalar one=1.,zero=0.; 1850 PetscScalar *array; 1851 PetscMPIInt *auxlocal_primal; 1852 PetscMPIInt *auxglobal_primal; 1853 PetscMPIInt *all_auxglobal_primal; 1854 PetscMPIInt *all_auxglobal_primal_dummy; 1855 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1856 PetscInt *vertices,*row_cmat_indices; 1857 PetscInt size_of_constraint; 1858 1859 /* Construct needed data structures for message passing */ 1860 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1861 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1862 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1863 /* Gather local_primal_size information for all processes */ 1864 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1865 pcbddc->replicated_primal_size = 0; 1866 for (i=0; i<size_prec_comm; i++) { 1867 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1868 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1869 } 1870 if(rank_prec_comm == 0) { 1871 /* allocate some auxiliary space */ 1872 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1873 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1874 } 1875 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1876 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1877 1878 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 1879 This code fragment assumes that the number of local constraints per connected component 1880 is not greater than the number of nodes defined for the connected component 1881 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1882 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 1883 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1884 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1885 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1886 for(i=0;i<pcbddc->n_vertices;i++) { /* note that pcbddc->n_vertices can be different from size of ISForVertices */ 1887 array[ vertices[i] ] = one; 1888 auxlocal_primal[i] = vertices[i]; 1889 } 1890 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1891 for(i=0;i<pcbddc->n_constraints;i++) { 1892 ierr = MatGetRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1893 for (j=0; j<size_of_constraint; j++) { 1894 k = row_cmat_indices[j]; 1895 if( array[k] == zero ) { 1896 array[k] = one; 1897 auxlocal_primal[i+pcbddc->n_vertices] = k; 1898 break; 1899 } 1900 } 1901 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1902 } 1903 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1904 1905 /* Now assign them a global numbering */ 1906 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1907 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1908 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1909 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1910 1911 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1912 /* It implements a function similar to PetscSortRemoveDupsInt */ 1913 if(rank_prec_comm==0) { 1914 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1915 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1916 k=1; 1917 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1918 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1919 if(j != all_auxglobal_primal[i] ) { 1920 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1921 k++; 1922 j=all_auxglobal_primal[i]; 1923 } 1924 } 1925 } else { 1926 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1927 } 1928 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1929 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1930 1931 /* Now get global coarse numbering of local primal nodes */ 1932 for(i=0;i<pcbddc->local_primal_size;i++) { 1933 k=0; 1934 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1935 pcbddc->local_primal_indices[i]=k; 1936 } 1937 if(dbg_flag) { 1938 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1939 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 1940 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1941 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1942 for(i=0;i<pcbddc->local_primal_size;i++) { 1943 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 1944 } 1945 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1946 } 1947 /* free allocated memory */ 1948 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1949 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1950 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1951 if(rank_prec_comm == 0) { 1952 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1953 } 1954 } 1955 1956 /* adapt coarse problem type */ 1957 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1958 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1959 1960 switch(pcbddc->coarse_problem_type){ 1961 1962 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1963 { 1964 /* we need additional variables */ 1965 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1966 MetisInt *metis_coarse_subdivision; 1967 MetisInt options[METIS_NOPTIONS]; 1968 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1969 PetscMPIInt procs_jumps_coarse_comm; 1970 PetscMPIInt *coarse_subdivision; 1971 PetscMPIInt *total_count_recv; 1972 PetscMPIInt *total_ranks_recv; 1973 PetscMPIInt *displacements_recv; 1974 PetscMPIInt *my_faces_connectivity; 1975 PetscMPIInt *petsc_faces_adjncy; 1976 MetisInt *faces_adjncy; 1977 MetisInt *faces_xadj; 1978 PetscMPIInt *number_of_faces; 1979 PetscMPIInt *faces_displacements; 1980 PetscInt *array_int; 1981 PetscMPIInt my_faces=0; 1982 PetscMPIInt total_faces=0; 1983 PetscInt ranks_stretching_ratio; 1984 1985 /* define some quantities */ 1986 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1987 coarse_mat_type = MATIS; 1988 coarse_pc_type = PCBDDC; 1989 coarse_ksp_type = KSPCHEBYSHEV; 1990 1991 /* details of coarse decomposition */ 1992 n_subdomains = pcbddc->active_procs; 1993 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1994 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 1995 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 1996 1997 printf("Coarse algorithm details: \n"); 1998 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1)); 1999 2000 /* build CSR graph of subdomains' connectivity through faces */ 2001 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2002 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2003 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2004 for(j=0;j<pcis->n_shared[i];j++){ 2005 array_int[ pcis->shared[i][j] ]+=1; 2006 } 2007 } 2008 for(i=1;i<pcis->n_neigh;i++){ 2009 for(j=0;j<pcis->n_shared[i];j++){ 2010 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2011 my_faces++; 2012 break; 2013 } 2014 } 2015 } 2016 //printf("I found %d faces.\n",my_faces); 2017 2018 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2019 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2020 my_faces=0; 2021 for(i=1;i<pcis->n_neigh;i++){ 2022 for(j=0;j<pcis->n_shared[i];j++){ 2023 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2024 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2025 my_faces++; 2026 break; 2027 } 2028 } 2029 } 2030 if(rank_prec_comm == master_proc) { 2031 //printf("I found %d total faces.\n",total_faces); 2032 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2033 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2034 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2035 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2036 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2037 } 2038 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2039 if(rank_prec_comm == master_proc) { 2040 faces_xadj[0]=0; 2041 faces_displacements[0]=0; 2042 j=0; 2043 for(i=1;i<size_prec_comm+1;i++) { 2044 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2045 if(number_of_faces[i-1]) { 2046 j++; 2047 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2048 } 2049 } 2050 printf("The J I count is %d and should be %d\n",j,n_subdomains); 2051 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 2052 } 2053 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2054 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2055 ierr = PetscFree(array_int);CHKERRQ(ierr); 2056 if(rank_prec_comm == master_proc) { 2057 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2058 printf("This is the face connectivity (actual ranks)\n"); 2059 for(i=0;i<n_subdomains;i++){ 2060 printf("proc %d is connected with \n",i); 2061 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2062 printf("%d ",faces_adjncy[j]); 2063 printf("\n"); 2064 } 2065 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2066 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2067 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2068 } 2069 2070 if( rank_prec_comm == master_proc ) { 2071 2072 PetscInt heuristic_for_metis=3; 2073 2074 ncon=1; 2075 faces_nvtxs=n_subdomains; 2076 /* partition graoh induced by face connectivity */ 2077 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2078 ierr = METIS_SetDefaultOptions(options); 2079 /* we need a contiguous partition of the coarse mesh */ 2080 options[METIS_OPTION_CONTIG]=1; 2081 options[METIS_OPTION_DBGLVL]=1; 2082 options[METIS_OPTION_NITER]=30; 2083 //options[METIS_OPTION_NCUTS]=1; 2084 printf("METIS PART GRAPH\n"); 2085 if(n_subdomains>n_parts*heuristic_for_metis) { 2086 printf("Using Kway\n"); 2087 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2088 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2089 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2090 } else { 2091 printf("Using Recursive\n"); 2092 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2093 } 2094 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2095 printf("Partition done!\n"); 2096 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2097 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2098 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2099 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2100 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2101 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2102 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2103 } 2104 2105 /* Create new communicator for coarse problem splitting the old one */ 2106 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2107 coarse_color=0; //for communicator splitting 2108 active_rank=rank_prec_comm; //for insertion of matrix values 2109 } 2110 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2111 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 2112 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2113 2114 if( coarse_color == 0 ) { 2115 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2116 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2117 printf("Details of coarse comm\n"); 2118 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2119 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 2120 } else { 2121 rank_coarse_comm = MPI_PROC_NULL; 2122 } 2123 2124 /* master proc take care of arranging and distributing coarse informations */ 2125 if(rank_coarse_comm == master_proc) { 2126 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2127 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2128 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2129 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2130 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2131 /* some initializations */ 2132 displacements_recv[0]=0; 2133 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 2134 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2135 for(j=0;j<size_coarse_comm;j++) 2136 for(i=0;i<size_prec_comm;i++) 2137 if(coarse_subdivision[i]==j) 2138 total_count_recv[j]++; 2139 /* displacements needed for scatterv of total_ranks_recv */ 2140 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2141 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2142 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2143 for(j=0;j<size_coarse_comm;j++) { 2144 for(i=0;i<size_prec_comm;i++) { 2145 if(coarse_subdivision[i]==j) { 2146 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2147 total_count_recv[j]+=1; 2148 } 2149 } 2150 } 2151 for(j=0;j<size_coarse_comm;j++) { 2152 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2153 for(i=0;i<total_count_recv[j];i++) { 2154 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2155 } 2156 printf("\n"); 2157 } 2158 2159 /* identify new decomposition in terms of ranks in the old communicator */ 2160 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2161 printf("coarse_subdivision in old end new ranks\n"); 2162 for(i=0;i<size_prec_comm;i++) 2163 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2164 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2165 } else { 2166 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2167 } 2168 printf("\n"); 2169 } 2170 2171 /* Scatter new decomposition for send details */ 2172 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2173 /* Scatter receiving details to members of coarse decomposition */ 2174 if( coarse_color == 0) { 2175 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2176 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2177 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2178 } 2179 2180 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2181 //if(coarse_color == 0) { 2182 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2183 // for(i=0;i<count_recv;i++) 2184 // printf("%d ",ranks_recv[i]); 2185 // printf("\n"); 2186 //} 2187 2188 if(rank_prec_comm == master_proc) { 2189 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2190 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2191 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2192 free(coarse_subdivision); 2193 free(total_count_recv); 2194 free(total_ranks_recv); 2195 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2196 } 2197 break; 2198 } 2199 2200 case(REPLICATED_BDDC): 2201 2202 pcbddc->coarse_communications_type = GATHERS_BDDC; 2203 coarse_mat_type = MATSEQAIJ; 2204 coarse_pc_type = PCLU; 2205 coarse_ksp_type = KSPPREONLY; 2206 coarse_comm = PETSC_COMM_SELF; 2207 active_rank = rank_prec_comm; 2208 break; 2209 2210 case(PARALLEL_BDDC): 2211 2212 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2213 coarse_mat_type = MATMPIAIJ; 2214 coarse_pc_type = PCREDUNDANT; 2215 coarse_ksp_type = KSPPREONLY; 2216 coarse_comm = prec_comm; 2217 active_rank = rank_prec_comm; 2218 break; 2219 2220 case(SEQUENTIAL_BDDC): 2221 pcbddc->coarse_communications_type = GATHERS_BDDC; 2222 coarse_mat_type = MATSEQAIJ; 2223 coarse_pc_type = PCLU; 2224 coarse_ksp_type = KSPPREONLY; 2225 coarse_comm = PETSC_COMM_SELF; 2226 active_rank = master_proc; 2227 break; 2228 } 2229 2230 switch(pcbddc->coarse_communications_type){ 2231 2232 case(SCATTERS_BDDC): 2233 { 2234 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2235 2236 PetscMPIInt send_size; 2237 PetscInt *aux_ins_indices; 2238 PetscInt ii,jj; 2239 MPI_Request *requests; 2240 2241 /* allocate auxiliary space */ 2242 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2243 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2244 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2245 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2246 /* allocate stuffs for message massing */ 2247 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2248 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2249 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2250 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2251 /* fill up quantities */ 2252 j=0; 2253 for(i=0;i<count_recv;i++){ 2254 ii = ranks_recv[i]; 2255 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2256 localdispl2[i]=j; 2257 j+=localsizes2[i]; 2258 jj = pcbddc->local_primal_displacements[ii]; 2259 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 2260 } 2261 //printf("aux_ins_indices 1\n"); 2262 //for(i=0;i<pcbddc->coarse_size;i++) 2263 // printf("%d ",aux_ins_indices[i]); 2264 //printf("\n"); 2265 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2266 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2267 /* evaluate how many values I will insert in coarse mat */ 2268 ins_local_primal_size=0; 2269 for(i=0;i<pcbddc->coarse_size;i++) 2270 if(aux_ins_indices[i]) 2271 ins_local_primal_size++; 2272 /* evaluate indices I will insert in coarse mat */ 2273 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2274 j=0; 2275 for(i=0;i<pcbddc->coarse_size;i++) 2276 if(aux_ins_indices[i]) 2277 ins_local_primal_indices[j++]=i; 2278 /* use aux_ins_indices to realize a global to local mapping */ 2279 j=0; 2280 for(i=0;i<pcbddc->coarse_size;i++){ 2281 if(aux_ins_indices[i]==0){ 2282 aux_ins_indices[i]=-1; 2283 } else { 2284 aux_ins_indices[i]=j; 2285 j++; 2286 } 2287 } 2288 2289 //printf("New details localsizes2 localdispl2\n"); 2290 //for(i=0;i<count_recv;i++) 2291 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2292 //printf("\n"); 2293 //printf("aux_ins_indices 2\n"); 2294 //for(i=0;i<pcbddc->coarse_size;i++) 2295 // printf("%d ",aux_ins_indices[i]); 2296 //printf("\n"); 2297 //printf("ins_local_primal_indices\n"); 2298 //for(i=0;i<ins_local_primal_size;i++) 2299 // printf("%d ",ins_local_primal_indices[i]); 2300 //printf("\n"); 2301 //printf("coarse_submat_vals\n"); 2302 //for(i=0;i<pcbddc->local_primal_size;i++) 2303 // for(j=0;j<pcbddc->local_primal_size;j++) 2304 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2305 //printf("\n"); 2306 2307 /* processes partecipating in coarse problem receive matrix data from their friends */ 2308 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2309 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2310 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2311 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2312 } 2313 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2314 2315 //if(coarse_color == 0) { 2316 // printf("temp_coarse_mat_vals\n"); 2317 // for(k=0;k<count_recv;k++){ 2318 // printf("---- %d ----\n",ranks_recv[k]); 2319 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2320 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2321 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2322 // printf("\n"); 2323 // } 2324 //} 2325 /* calculate data to insert in coarse mat */ 2326 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2327 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2328 2329 PetscMPIInt rr,kk,lps,lpd; 2330 PetscInt row_ind,col_ind; 2331 for(k=0;k<count_recv;k++){ 2332 rr = ranks_recv[k]; 2333 kk = localdispl2[k]; 2334 lps = pcbddc->local_primal_sizes[rr]; 2335 lpd = pcbddc->local_primal_displacements[rr]; 2336 //printf("Inserting the following indices (received from %d)\n",rr); 2337 for(j=0;j<lps;j++){ 2338 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2339 for(i=0;i<lps;i++){ 2340 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2341 //printf("%d %d\n",row_ind,col_ind); 2342 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2343 } 2344 } 2345 } 2346 ierr = PetscFree(requests);CHKERRQ(ierr); 2347 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2348 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2349 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2350 2351 /* create local to global mapping needed by coarse MATIS */ 2352 { 2353 IS coarse_IS; 2354 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2355 coarse_comm = prec_comm; 2356 active_rank=rank_prec_comm; 2357 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2358 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2359 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2360 } 2361 } 2362 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2363 /* arrays for values insertion */ 2364 ins_local_primal_size = pcbddc->local_primal_size; 2365 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2366 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2367 for(j=0;j<ins_local_primal_size;j++){ 2368 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2369 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2370 } 2371 } 2372 break; 2373 2374 } 2375 2376 case(GATHERS_BDDC): 2377 { 2378 2379 PetscMPIInt mysize,mysize2; 2380 2381 if(rank_prec_comm==active_rank) { 2382 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2383 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2384 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2385 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2386 /* arrays for values insertion */ 2387 ins_local_primal_size = pcbddc->coarse_size; 2388 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2389 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2390 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2391 localdispl2[0]=0; 2392 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2393 j=0; 2394 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2395 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2396 } 2397 2398 mysize=pcbddc->local_primal_size; 2399 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2400 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2401 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2402 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2403 } else { 2404 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2405 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2406 } 2407 2408 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2409 if(rank_prec_comm==active_rank) { 2410 PetscInt offset,offset2,row_ind,col_ind; 2411 for(j=0;j<ins_local_primal_size;j++){ 2412 ins_local_primal_indices[j]=j; 2413 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2414 } 2415 for(k=0;k<size_prec_comm;k++){ 2416 offset=pcbddc->local_primal_displacements[k]; 2417 offset2=localdispl2[k]; 2418 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2419 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2420 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2421 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2422 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2423 } 2424 } 2425 } 2426 } 2427 break; 2428 }//switch on coarse problem and communications associated with finished 2429 } 2430 2431 /* Now create and fill up coarse matrix */ 2432 if( rank_prec_comm == active_rank ) { 2433 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2434 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2435 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2436 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2437 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2438 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2439 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2440 } else { 2441 Mat matis_coarse_local_mat; 2442 /* remind bs */ 2443 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2444 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2445 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2446 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2447 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2448 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2449 } 2450 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2451 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2452 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2453 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2454 2455 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2456 /* Preconditioner for coarse problem */ 2457 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2458 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2459 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2460 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2461 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2462 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2463 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2464 /* Allow user's customization */ 2465 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2466 /* Set Up PC for coarse problem BDDC */ 2467 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2468 if(dbg_flag) { 2469 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2470 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2471 } 2472 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2473 } 2474 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2475 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2476 if(dbg_flag) { 2477 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2479 } 2480 } 2481 } 2482 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2483 IS local_IS,global_IS; 2484 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2485 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2486 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2487 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2488 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2489 } 2490 2491 2492 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2493 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2494 PetscScalar m_one=-1.0; 2495 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2496 const KSPType check_ksp_type=KSPGMRES; 2497 2498 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2499 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2500 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2501 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2502 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2503 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2504 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2505 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2506 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2507 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2508 if(dbg_flag) { 2509 kappa_2=lambda_max/lambda_min; 2510 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2511 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2512 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2513 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2514 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2515 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2516 } 2517 /* restore coarse ksp to default values */ 2518 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2519 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2520 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2521 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2522 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2523 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2524 } 2525 2526 /* free data structures no longer needed */ 2527 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2528 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2529 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2530 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2531 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2532 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2533 2534 PetscFunctionReturn(0); 2535 } 2536 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2539 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2540 { 2541 2542 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2543 PC_IS *pcis = (PC_IS*)pc->data; 2544 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2545 PCBDDCGraph mat_graph; 2546 Mat mat_adj; 2547 PetscInt **neighbours_set; 2548 PetscInt *queue_in_global_numbering; 2549 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2550 PetscInt total_counts,nodes_touched=0,where_values=1,vertex_size; 2551 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2552 PetscBool same_set,flg_row; 2553 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE; 2554 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2555 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2556 const PetscInt *neumann_nodes; 2557 const PetscInt *dirichlet_nodes; 2558 IS used_IS; 2559 2560 PetscFunctionBegin; 2561 /* allocate and initialize needed graph structure */ 2562 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2563 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2564 /* ierr = MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj);CHKERRQ(ierr); */ 2565 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2566 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2567 i = mat_graph->nvtxs; 2568 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2569 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2570 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2571 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2572 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2573 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2574 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2575 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2576 2577 /* Setting dofs splitting in mat_graph->which_dof */ 2578 if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */ 2579 PetscInt *is_indices; 2580 PetscInt is_size; 2581 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2582 ierr = ISGetSize(pcbddc->ISForDofs[i],&is_size);CHKERRQ(ierr); 2583 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2584 for(j=0;j<is_size;j++) { 2585 mat_graph->which_dof[is_indices[j]]=i; 2586 } 2587 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2588 } 2589 /* use mat block size as vertex size */ 2590 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2591 } else { /* otherwise it assumes a constant block size */ 2592 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2593 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2594 for(s=0;s<bs;s++) { 2595 mat_graph->which_dof[i*bs+s]=s; 2596 } 2597 } 2598 vertex_size=1; 2599 } 2600 /* count number of neigh per node */ 2601 total_counts=0; 2602 for(i=1;i<pcis->n_neigh;i++){ 2603 s=pcis->n_shared[i]; 2604 total_counts+=s; 2605 for(j=0;j<s;j++){ 2606 mat_graph->count[pcis->shared[i][j]] += 1; 2607 } 2608 } 2609 /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */ 2610 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 2611 if(used_IS) { 2612 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 2613 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 2614 for(i=0;i<neumann_bsize;i++){ 2615 iindex = neumann_nodes[i]; 2616 if(mat_graph->count[iindex] > 1){ 2617 mat_graph->count[iindex]+=1; 2618 total_counts++; 2619 } 2620 } 2621 } 2622 /* allocate space for storing the set of neighbours of each node */ 2623 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2624 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); } 2625 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2626 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2627 for(i=1;i<pcis->n_neigh;i++){ 2628 s=pcis->n_shared[i]; 2629 for(j=0;j<s;j++) { 2630 k=pcis->shared[i][j]; 2631 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2632 mat_graph->count[k]+=1; 2633 } 2634 } 2635 /* set -1 fake neighbour to mimic Neumann boundary */ 2636 if(used_IS) { 2637 for(i=0;i<neumann_bsize;i++){ 2638 iindex = neumann_nodes[i]; 2639 if(mat_graph->count[iindex] > 1){ 2640 neighbours_set[iindex][mat_graph->count[iindex]] = -1; 2641 mat_graph->count[iindex]+=1; 2642 } 2643 } 2644 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 2645 } 2646 /* sort set of sharing subdomains (needed for comparison below) */ 2647 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2648 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 2649 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 2650 if(used_IS) { 2651 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 2652 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 2653 for(i=0;i<dirichlet_bsize;i++){ 2654 mat_graph->count[dirichlet_nodes[i]]=0; 2655 } 2656 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 2657 } 2658 for(i=0;i<mat_graph->nvtxs;i++){ 2659 if(!mat_graph->count[i]){ /* interior nodes */ 2660 mat_graph->touched[i]=PETSC_TRUE; 2661 mat_graph->where[i]=0; 2662 nodes_touched++; 2663 } 2664 } 2665 mat_graph->ncmps = 0; 2666 while(nodes_touched<mat_graph->nvtxs) { 2667 /* find first untouched node in local ordering */ 2668 i=0; 2669 while(mat_graph->touched[i]) i++; 2670 mat_graph->touched[i]=PETSC_TRUE; 2671 mat_graph->where[i]=where_values; 2672 nodes_touched++; 2673 /* now find all other nodes having the same set of sharing subdomains */ 2674 for(j=i+1;j<mat_graph->nvtxs;j++){ 2675 /* check for same number of sharing subdomains and dof number */ 2676 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2677 /* check for same set of sharing subdomains */ 2678 same_set=PETSC_TRUE; 2679 for(k=0;k<mat_graph->count[j];k++){ 2680 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2681 same_set=PETSC_FALSE; 2682 } 2683 } 2684 /* I found a friend of mine */ 2685 if(same_set) { 2686 mat_graph->where[j]=where_values; 2687 mat_graph->touched[j]=PETSC_TRUE; 2688 nodes_touched++; 2689 } 2690 } 2691 } 2692 where_values++; 2693 } 2694 where_values--; if(where_values<0) where_values=0; 2695 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2696 /* Find connected components defined on the shared interface */ 2697 if(where_values) { 2698 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2699 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 2700 for(i=0;i<mat_graph->ncmps;i++) { 2701 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2702 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2703 } 2704 } 2705 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2706 for(i=0;i<where_values;i++) { 2707 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2708 if(mat_graph->where_ncmps[i]>1) { 2709 adapt_interface=1; 2710 break; 2711 } 2712 } 2713 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2714 if(where_values && adapt_interface_reduced) { 2715 2716 printf("Adapting Interface\n"); 2717 2718 PetscInt sum_requests=0,my_rank; 2719 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2720 PetscInt temp_buffer_size,ins_val,global_where_counter; 2721 PetscInt *cum_recv_counts; 2722 PetscInt *where_to_nodes_indices; 2723 PetscInt *petsc_buffer; 2724 PetscMPIInt *recv_buffer; 2725 PetscMPIInt *recv_buffer_where; 2726 PetscMPIInt *send_buffer; 2727 PetscMPIInt size_of_send; 2728 PetscInt *sizes_of_sends; 2729 MPI_Request *send_requests; 2730 MPI_Request *recv_requests; 2731 PetscInt *where_cc_adapt; 2732 PetscInt **temp_buffer; 2733 PetscInt *nodes_to_temp_buffer_indices; 2734 PetscInt *add_to_where; 2735 2736 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2737 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2738 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2739 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2740 /* first count how many neighbours per connected component I will receive from */ 2741 cum_recv_counts[0]=0; 2742 for(i=1;i<where_values+1;i++){ 2743 j=0; 2744 while(mat_graph->where[j] != i) j++; 2745 where_to_nodes_indices[i-1]=j; 2746 if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2747 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 2748 } 2749 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2750 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2751 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2752 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2753 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2754 for(i=0;i<cum_recv_counts[where_values];i++) { 2755 send_requests[i]=MPI_REQUEST_NULL; 2756 recv_requests[i]=MPI_REQUEST_NULL; 2757 } 2758 /* exchange with my neighbours the number of my connected components on the shared interface */ 2759 for(i=0;i<where_values;i++){ 2760 j=where_to_nodes_indices[i]; 2761 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2762 for(;k<mat_graph->count[j];k++){ 2763 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2764 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2765 sum_requests++; 2766 } 2767 } 2768 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2769 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2770 /* determine the connected component I need to adapt */ 2771 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2772 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2773 for(i=0;i<where_values;i++){ 2774 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2775 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2776 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 2777 where_cc_adapt[i]=PETSC_TRUE; 2778 break; 2779 } 2780 } 2781 } 2782 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2783 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2784 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2785 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2786 sum_requests=0; 2787 start_of_send=0; 2788 start_of_recv=cum_recv_counts[where_values]; 2789 for(i=0;i<where_values;i++) { 2790 if(where_cc_adapt[i]) { 2791 size_of_send=0; 2792 for(j=i;j<mat_graph->ncmps;j++) { 2793 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2794 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2795 size_of_send+=1; 2796 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2797 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2798 } 2799 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2800 } 2801 } 2802 j = where_to_nodes_indices[i]; 2803 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2804 for(;k<mat_graph->count[j];k++){ 2805 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2806 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2807 sum_requests++; 2808 } 2809 sizes_of_sends[i]=size_of_send; 2810 start_of_send+=size_of_send; 2811 } 2812 } 2813 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2814 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2815 buffer_size=0; 2816 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2817 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2818 /* now exchange the data */ 2819 start_of_recv=0; 2820 start_of_send=0; 2821 sum_requests=0; 2822 for(i=0;i<where_values;i++) { 2823 if(where_cc_adapt[i]) { 2824 size_of_send = sizes_of_sends[i]; 2825 j = where_to_nodes_indices[i]; 2826 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2827 for(;k<mat_graph->count[j];k++){ 2828 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2829 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2830 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2831 start_of_recv+=size_of_recv; 2832 sum_requests++; 2833 } 2834 start_of_send+=size_of_send; 2835 } 2836 } 2837 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2838 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2839 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2840 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2841 for(j=0;j<buffer_size;) { 2842 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2843 k=petsc_buffer[j]+1; 2844 j+=k; 2845 } 2846 sum_requests=cum_recv_counts[where_values]; 2847 start_of_recv=0; 2848 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2849 global_where_counter=0; 2850 for(i=0;i<where_values;i++){ 2851 if(where_cc_adapt[i]){ 2852 temp_buffer_size=0; 2853 /* find nodes on the shared interface we need to adapt */ 2854 for(j=0;j<mat_graph->nvtxs;j++){ 2855 if(mat_graph->where[j]==i+1) { 2856 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2857 temp_buffer_size++; 2858 } else { 2859 nodes_to_temp_buffer_indices[j]=-1; 2860 } 2861 } 2862 /* allocate some temporary space */ 2863 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2864 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2865 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2866 for(j=1;j<temp_buffer_size;j++){ 2867 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2868 } 2869 /* analyze contributions from neighbouring subdomains for i-th conn comp 2870 temp buffer structure: 2871 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2872 3 neighs procs with structured connected components: 2873 neigh 0: [0 1 4], [2 3]; (2 connected components) 2874 neigh 1: [0 1], [2 3 4]; (2 connected components) 2875 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2876 tempbuffer (row-oriented) should be filled as: 2877 [ 0, 0, 0; 2878 0, 0, 1; 2879 1, 1, 2; 2880 1, 1, 2; 2881 0, 1, 0; ]; 2882 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2883 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2884 */ 2885 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2886 ins_val=0; 2887 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2888 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2889 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2890 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2891 } 2892 buffer_size+=k; 2893 ins_val++; 2894 } 2895 start_of_recv+=size_of_recv; 2896 sum_requests++; 2897 } 2898 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2899 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2900 for(j=0;j<temp_buffer_size;j++){ 2901 if(!add_to_where[j]){ /* found a new cc */ 2902 global_where_counter++; 2903 add_to_where[j]=global_where_counter; 2904 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2905 same_set=PETSC_TRUE; 2906 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2907 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2908 same_set=PETSC_FALSE; 2909 break; 2910 } 2911 } 2912 if(same_set) add_to_where[k]=global_where_counter; 2913 } 2914 } 2915 } 2916 /* insert new data in where array */ 2917 temp_buffer_size=0; 2918 for(j=0;j<mat_graph->nvtxs;j++){ 2919 if(mat_graph->where[j]==i+1) { 2920 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2921 temp_buffer_size++; 2922 } 2923 } 2924 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 2925 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 2926 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 2927 } 2928 } 2929 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2930 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 2931 ierr = PetscFree(send_requests);CHKERRQ(ierr); 2932 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 2933 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 2934 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 2935 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 2936 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2937 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 2938 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 2939 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 2940 if(global_where_counter) { 2941 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 2942 global_where_counter=0; 2943 for(i=0;i<mat_graph->nvtxs;i++){ 2944 if(mat_graph->where[i] && !mat_graph->touched[i]) { 2945 global_where_counter++; 2946 for(j=i+1;j<mat_graph->nvtxs;j++){ 2947 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 2948 mat_graph->where[j]=global_where_counter; 2949 mat_graph->touched[j]=PETSC_TRUE; 2950 } 2951 } 2952 mat_graph->where[i]=global_where_counter; 2953 mat_graph->touched[i]=PETSC_TRUE; 2954 } 2955 } 2956 where_values=global_where_counter; 2957 } 2958 if(global_where_counter) { 2959 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2960 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2961 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2962 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2963 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2964 for(i=0;i<mat_graph->ncmps;i++) { 2965 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2966 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2967 } 2968 } 2969 } /* Finished adapting interface */ 2970 PetscInt nfc=0; 2971 PetscInt nec=0; 2972 PetscInt nvc=0; 2973 PetscBool twodim_flag=PETSC_FALSE; 2974 for (i=0; i<mat_graph->ncmps; i++) { 2975 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2976 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */ 2977 nfc++; 2978 } else { /* note that nec will be zero in 2d */ 2979 nec++; 2980 } 2981 } else { 2982 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2983 } 2984 } 2985 2986 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 2987 nec = nfc; 2988 nfc = 0; 2989 twodim_flag = PETSC_TRUE; 2990 } 2991 /* allocate IS arrays for faces, edges. Vertices need a single index set. 2992 Reusing space allocated in mat_graph->where for creating IS objects */ 2993 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 2994 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 2995 use_faces=PETSC_TRUE; 2996 } 2997 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 2998 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 2999 use_edges=PETSC_TRUE; 3000 } 3001 nfc=0; 3002 nec=0; 3003 for (i=0; i<mat_graph->ncmps; i++) { 3004 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3005 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 3006 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 3007 } 3008 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 3009 if(twodim_flag) { 3010 if(use_edges) { 3011 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3012 nec++; 3013 } 3014 } else { 3015 if(use_faces) { 3016 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 3017 nfc++; 3018 } 3019 } 3020 } else { 3021 if(use_edges) { 3022 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3023 nec++; 3024 } 3025 } 3026 } 3027 } 3028 pcbddc->n_ISForFaces=nfc; 3029 pcbddc->n_ISForEdges=nec; 3030 nvc=0; 3031 if( !pcbddc->constraints_flag ) { 3032 for (i=0; i<mat_graph->ncmps; i++) { 3033 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 3034 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 3035 mat_graph->where[nvc]=mat_graph->queue[j]; 3036 nvc++; 3037 } 3038 } 3039 } 3040 } 3041 /* sort vertex set (by local ordering) */ 3042 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 3043 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 3044 3045 if(pcbddc->dbg_flag) { 3046 PetscViewer viewer=pcbddc->dbg_viewer; 3047 3048 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3049 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3050 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3051 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 3052 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3053 for(i=0;i<mat_graph->nvtxs;i++) { 3054 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3055 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3056 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3057 } 3058 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3059 } 3060 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3061 for(i=0;i<mat_graph->ncmps;i++) { 3062 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3063 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3064 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3065 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); 3066 } 3067 } 3068 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr);*/ 3069 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3070 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3071 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3072 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3073 } 3074 3075 /* Restore CSR structure into sequantial matrix and free memory space no longer needed */ 3076 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 3077 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 3078 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 3079 /* Free graph structure */ 3080 if(mat_graph->nvtxs){ 3081 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 3082 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 3083 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3084 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3085 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3086 } 3087 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 3088 3089 PetscFunctionReturn(0); 3090 3091 } 3092 3093 /* -------------------------------------------------------------------------- */ 3094 3095 /* The following code has been adapted from function IsConnectedSubdomain contained 3096 in source file contig.c of METIS library (version 5.0.1) */ 3097 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3100 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3101 { 3102 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3103 PetscInt *xadj, *adjncy, *where, *queue; 3104 PetscInt *cptr; 3105 PetscBool *touched; 3106 3107 PetscFunctionBegin; 3108 3109 nvtxs = graph->nvtxs; 3110 xadj = graph->xadj; 3111 adjncy = graph->adjncy; 3112 where = graph->where; 3113 touched = graph->touched; 3114 queue = graph->queue; 3115 cptr = graph->cptr; 3116 3117 for (i=0; i<nvtxs; i++) 3118 touched[i] = PETSC_FALSE; 3119 3120 cum_queue=0; 3121 ncmps=0; 3122 3123 for(n=0; n<n_dist; n++) { 3124 pid = n+1; 3125 nleft = 0; 3126 for (i=0; i<nvtxs; i++) { 3127 if (where[i] == pid) 3128 nleft++; 3129 } 3130 for (i=0; i<nvtxs; i++) { 3131 if (where[i] == pid) 3132 break; 3133 } 3134 touched[i] = PETSC_TRUE; 3135 queue[cum_queue] = i; 3136 first = 0; last = 1; 3137 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3138 ncmps_pid = 0; 3139 while (first != nleft) { 3140 if (first == last) { /* Find another starting vertex */ 3141 cptr[++ncmps] = first+cum_queue; 3142 ncmps_pid++; 3143 for (i=0; i<nvtxs; i++) { 3144 if (where[i] == pid && !touched[i]) 3145 break; 3146 } 3147 queue[cum_queue+last] = i; 3148 last++; 3149 touched[i] = PETSC_TRUE; 3150 } 3151 i = queue[cum_queue+first]; 3152 first++; 3153 for (j=xadj[i]; j<xadj[i+1]; j++) { 3154 k = adjncy[j]; 3155 if (where[k] == pid && !touched[k]) { 3156 queue[cum_queue+last] = k; 3157 last++; 3158 touched[k] = PETSC_TRUE; 3159 } 3160 } 3161 } 3162 cptr[++ncmps] = first+cum_queue; 3163 ncmps_pid++; 3164 cum_queue=cptr[ncmps]; 3165 graph->where_ncmps[n] = ncmps_pid; 3166 } 3167 graph->ncmps = ncmps; 3168 3169 PetscFunctionReturn(0); 3170 } 3171 3172