1 /* TODOLIST 2 DofSplitting and DM attached to pc. 3 Exact solvers: Solve local saddle point directly for very hard problems 4 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 5 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 6 - mind the problem with coarsening_factor 7 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 8 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 9 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 10 - Add levels' slot to bddc data structure and associated Set/Get functions 11 code refactoring: 12 - pick up better names for static functions 13 check log_summary for leaking (actually: 1 Vector per level ) 14 change options structure: 15 - insert BDDC into MG framework? 16 provide other ops? Ask to developers 17 remove all unused printf 18 remove // commments and adhere to PETSc code requirements 19 man pages 20 */ 21 22 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 23 Implementation of BDDC preconditioner based on: 24 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 25 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 26 27 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 28 #include <petscblaslapack.h> 29 30 /* -------------------------------------------------------------------------- */ 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCSetFromOptions_BDDC" 33 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 34 { 35 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 36 PetscErrorCode ierr; 37 38 PetscFunctionBegin; 39 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 40 /* Verbose debugging of main data structures */ 41 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 42 /* Some customization for default primal space */ 43 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 47 /* Coarse solver context */ 48 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 49 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 50 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 51 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsTail();CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 71 /* -------------------------------------------------------------------------- */ 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 74 /*@ 75 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 76 77 Not collective 78 79 Input Parameters: 80 + pc - the preconditioning context 81 - CoarseProblemType - pick a better name and explain what this is 82 83 Level: intermediate 84 85 Notes: 86 Not collective but all procs must call this. 87 88 .seealso: PCBDDC 89 @*/ 90 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 91 { 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 96 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 /* -------------------------------------------------------------------------- */ 101 EXTERN_C_BEGIN 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 104 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 105 { 106 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 111 ierr = ISDuplicate(DirichletBoundaries,&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 112 ierr = ISCopy(DirichletBoundaries,pcbddc->DirichletBoundaries);CHKERRQ(ierr); 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 /* -------------------------------------------------------------------------- */ 118 #undef __FUNCT__ 119 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 120 /*@ 121 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part of 122 Dirichlet boundaries for the global problem. 123 124 Not collective 125 126 Input Parameters: 127 + pc - the preconditioning context 128 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 129 130 Level: intermediate 131 132 Notes: 133 The sequential IS is copied; the user must destroy the IS object passed in. 134 135 .seealso: PCBDDC 136 @*/ 137 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 138 { 139 PetscErrorCode ierr; 140 141 PetscFunctionBegin; 142 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 143 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 /* -------------------------------------------------------------------------- */ 147 EXTERN_C_BEGIN 148 #undef __FUNCT__ 149 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 150 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 151 { 152 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 153 PetscErrorCode ierr; 154 155 PetscFunctionBegin; 156 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 157 ierr = ISDuplicate(NeumannBoundaries,&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 158 ierr = ISCopy(NeumannBoundaries,pcbddc->NeumannBoundaries);CHKERRQ(ierr); 159 PetscFunctionReturn(0); 160 } 161 EXTERN_C_END 162 163 /* -------------------------------------------------------------------------- */ 164 #undef __FUNCT__ 165 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 166 /*@ 167 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 168 Neumann boundaries for the global problem. 169 170 Not collective 171 172 Input Parameters: 173 + pc - the preconditioning context 174 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 175 176 Level: intermediate 177 178 Notes: 179 The sequential IS is copied; the user must destroy the IS object passed in. 180 181 .seealso: PCBDDC 182 @*/ 183 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 184 { 185 PetscErrorCode ierr; 186 187 PetscFunctionBegin; 188 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 189 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 /* -------------------------------------------------------------------------- */ 193 EXTERN_C_BEGIN 194 #undef __FUNCT__ 195 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 196 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 197 { 198 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 199 200 PetscFunctionBegin; 201 if(pcbddc->NeumannBoundaries) { 202 *NeumannBoundaries = pcbddc->NeumannBoundaries; 203 } else { 204 *NeumannBoundaries = PETSC_NULL; 205 //SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Error in %s: Neumann boundaries not set!.\n",__FUNCT__); 206 } 207 PetscFunctionReturn(0); 208 } 209 EXTERN_C_END 210 211 /* -------------------------------------------------------------------------- */ 212 #undef __FUNCT__ 213 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 214 /*@ 215 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 216 Neumann boundaries for the global problem. 217 218 Not collective 219 220 Input Parameters: 221 + pc - the preconditioning context 222 223 Output Parameters: 224 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 225 226 Level: intermediate 227 228 Notes: 229 If the user has not yet provided such information, PETSC_NULL is returned. 230 231 .seealso: PCBDDC 232 @*/ 233 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 234 { 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 239 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 243 /* -------------------------------------------------------------------------- */ 244 EXTERN_C_BEGIN 245 #undef __FUNCT__ 246 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 247 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 248 { 249 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 250 PetscInt i; 251 PetscErrorCode ierr; 252 253 PetscFunctionBegin; 254 /* Destroy ISs if they were already set */ 255 for(i=0;i<pcbddc->n_ISForDofs;i++) { 256 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 257 } 258 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 259 260 /* allocate space then copy ISs */ 261 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 262 for(i=0;i<n_is;i++) { 263 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 264 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 265 } 266 pcbddc->n_ISForDofs=n_is; 267 268 PetscFunctionReturn(0); 269 } 270 EXTERN_C_END 271 272 /* -------------------------------------------------------------------------- */ 273 #undef __FUNCT__ 274 #define __FUNCT__ "PCBDDCSetDofsSplitting" 275 /*@ 276 PCBDDCSetDofsSplitting - Set index set defining how dofs are splitted. 277 278 Not collective 279 280 Input Parameters: 281 + pc - the preconditioning context 282 - n - number of index sets defining dofs spltting 283 - IS[] - array of IS describing dofs splitting 284 285 Level: intermediate 286 287 Notes: 288 Sequential ISs are copied, the user must destroy the array of IS passed in. 289 290 .seealso: PCBDDC 291 @*/ 292 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 293 { 294 PetscErrorCode ierr; 295 296 PetscFunctionBegin; 297 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 298 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "PCSetUp_BDDC" 304 /* -------------------------------------------------------------------------- */ 305 /* 306 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 307 by setting data structures and options. 308 309 Input Parameter: 310 + pc - the preconditioner context 311 312 Application Interface Routine: PCSetUp() 313 314 Notes: 315 The interface routine PCSetUp() is not usually called directly by 316 the user, but instead is called by PCApply() if necessary. 317 */ 318 PetscErrorCode PCSetUp_BDDC(PC pc) 319 { 320 PetscErrorCode ierr; 321 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 322 PC_IS *pcis = (PC_IS*)(pc->data); 323 324 PetscFunctionBegin; 325 if (!pc->setupcalled) { 326 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 327 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 328 Also, we decide to directly build the (same) Dirichlet problem */ 329 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 330 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 331 /* Set up all the "iterative substructuring" common block */ 332 ierr = PCISSetUp(pc);CHKERRQ(ierr); 333 /* Get stdout for dbg */ 334 if(pcbddc->dbg_flag) { 335 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 336 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 337 } 338 /* TODO MOVE CODE FRAGMENT */ 339 PetscInt im_active=0; 340 if(pcis->n) im_active = 1; 341 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 342 /* Analyze local interface */ 343 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 344 /* Set up local constraint matrix */ 345 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 346 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 347 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 348 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 349 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 350 } 351 PetscFunctionReturn(0); 352 } 353 354 /* -------------------------------------------------------------------------- */ 355 /* 356 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 357 358 Input Parameters: 359 . pc - the preconditioner context 360 . r - input vector (global) 361 362 Output Parameter: 363 . z - output vector (global) 364 365 Application Interface Routine: PCApply() 366 */ 367 #undef __FUNCT__ 368 #define __FUNCT__ "PCApply_BDDC" 369 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 370 { 371 PC_IS *pcis = (PC_IS*)(pc->data); 372 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 373 PetscErrorCode ierr; 374 const PetscScalar one = 1.0; 375 const PetscScalar m_one = -1.0; 376 377 /* This code is similar to that provided in nn.c for PCNN 378 NN interface preconditioner changed to BDDC 379 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 380 381 PetscFunctionBegin; 382 /* First Dirichlet solve */ 383 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 384 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 385 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 386 /* 387 Assembling right hand side for BDDC operator 388 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 389 - the interface part of the global vector z 390 */ 391 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 392 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 393 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 394 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 395 ierr = VecCopy(r,z);CHKERRQ(ierr); 396 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 397 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 398 399 /* 400 Apply interface preconditioner 401 Results are stored in: 402 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 403 - the interface part of the global vector z 404 */ 405 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 406 407 /* Second Dirichlet solve and assembling of output */ 408 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 409 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 410 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 411 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 412 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 413 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 414 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 415 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 416 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 417 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 418 419 PetscFunctionReturn(0); 420 421 } 422 423 /* -------------------------------------------------------------------------- */ 424 /* 425 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 426 427 */ 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 430 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 431 { 432 PetscErrorCode ierr; 433 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 434 PC_IS* pcis = (PC_IS*) (pc->data); 435 const PetscScalar zero = 0.0; 436 437 PetscFunctionBegin; 438 /* Get Local boundary and apply partition of unity */ 439 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 440 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 441 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 442 443 /* Application of PHI^T */ 444 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 445 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 446 447 /* Scatter data of coarse_rhs */ 448 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 449 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 450 451 /* Local solution on R nodes */ 452 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 453 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 454 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 if(pcbddc->prec_type) { 456 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 457 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 458 } 459 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 460 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 461 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 462 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 463 if(pcbddc->prec_type) { 464 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 } 467 468 /* Coarse solution */ 469 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 471 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 472 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 473 474 /* Sum contributions from two levels */ 475 /* Apply partition of unity and sum boundary values */ 476 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 477 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 478 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 479 ierr = VecSet(z,zero);CHKERRQ(ierr); 480 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 481 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 482 483 PetscFunctionReturn(0); 484 } 485 486 487 /* -------------------------------------------------------------------------- */ 488 /* 489 PCBDDCSolveSaddlePoint 490 491 */ 492 #undef __FUNCT__ 493 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 494 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 495 { 496 PetscErrorCode ierr; 497 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 498 499 PetscFunctionBegin; 500 501 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 502 if(pcbddc->n_constraints) { 503 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 504 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 505 } 506 507 PetscFunctionReturn(0); 508 } 509 /* -------------------------------------------------------------------------- */ 510 /* 511 PCBDDCScatterCoarseDataBegin 512 513 */ 514 #undef __FUNCT__ 515 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 516 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 517 { 518 PetscErrorCode ierr; 519 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 520 521 PetscFunctionBegin; 522 523 switch(pcbddc->coarse_communications_type){ 524 case SCATTERS_BDDC: 525 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 526 break; 527 case GATHERS_BDDC: 528 break; 529 } 530 PetscFunctionReturn(0); 531 532 } 533 /* -------------------------------------------------------------------------- */ 534 /* 535 PCBDDCScatterCoarseDataEnd 536 537 */ 538 #undef __FUNCT__ 539 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 540 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 541 { 542 PetscErrorCode ierr; 543 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 544 PetscScalar* array_to; 545 PetscScalar* array_from; 546 MPI_Comm comm=((PetscObject)pc)->comm; 547 PetscInt i; 548 549 PetscFunctionBegin; 550 551 switch(pcbddc->coarse_communications_type){ 552 case SCATTERS_BDDC: 553 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 554 break; 555 case GATHERS_BDDC: 556 if(vec_from) VecGetArray(vec_from,&array_from); 557 if(vec_to) VecGetArray(vec_to,&array_to); 558 switch(pcbddc->coarse_problem_type){ 559 case SEQUENTIAL_BDDC: 560 if(smode == SCATTER_FORWARD) { 561 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 562 if(vec_to) { 563 for(i=0;i<pcbddc->replicated_primal_size;i++) 564 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 565 } 566 } else { 567 if(vec_from) 568 for(i=0;i<pcbddc->replicated_primal_size;i++) 569 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 570 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 571 } 572 break; 573 case REPLICATED_BDDC: 574 if(smode == SCATTER_FORWARD) { 575 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 576 for(i=0;i<pcbddc->replicated_primal_size;i++) 577 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 578 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 579 for(i=0;i<pcbddc->local_primal_size;i++) 580 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 581 } 582 break; 583 case MULTILEVEL_BDDC: 584 break; 585 case PARALLEL_BDDC: 586 break; 587 } 588 if(vec_from) VecRestoreArray(vec_from,&array_from); 589 if(vec_to) VecRestoreArray(vec_to,&array_to); 590 break; 591 } 592 PetscFunctionReturn(0); 593 594 } 595 596 /* -------------------------------------------------------------------------- */ 597 /* 598 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 599 that was created with PCCreate_BDDC(). 600 601 Input Parameter: 602 . pc - the preconditioner context 603 604 Application Interface Routine: PCDestroy() 605 */ 606 #undef __FUNCT__ 607 #define __FUNCT__ "PCDestroy_BDDC" 608 PetscErrorCode PCDestroy_BDDC(PC pc) 609 { 610 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 611 PetscErrorCode ierr; 612 613 PetscFunctionBegin; 614 /* free data created by PCIS */ 615 ierr = PCISDestroy(pc);CHKERRQ(ierr); 616 /* free BDDC data */ 617 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 618 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 619 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 620 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 621 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 622 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 623 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 624 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 625 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 626 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 627 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 628 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 629 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 630 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 631 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 632 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 633 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 634 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 635 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 636 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 637 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 638 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 639 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 640 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 641 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 642 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 643 PetscInt i; 644 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 645 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 646 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 647 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 648 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 649 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 650 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 651 /* Free the private data structure that was hanging off the PC */ 652 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 /* -------------------------------------------------------------------------- */ 657 /*MC 658 PCBDDC - Balancing Domain Decomposition by Constraints. 659 660 Options Database Keys: 661 . -pcbddc ??? - 662 663 Level: intermediate 664 665 Notes: The matrix used with this preconditioner must be of type MATIS 666 667 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 668 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 669 on the subdomains). 670 671 Options for the coarse grid preconditioner can be set with - 672 Options for the Dirichlet subproblem can be set with - 673 Options for the Neumann subproblem can be set with - 674 675 Contributed by Stefano Zampini 676 677 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 678 M*/ 679 EXTERN_C_BEGIN 680 #undef __FUNCT__ 681 #define __FUNCT__ "PCCreate_BDDC" 682 PetscErrorCode PCCreate_BDDC(PC pc) 683 { 684 PetscErrorCode ierr; 685 PC_BDDC *pcbddc; 686 687 PetscFunctionBegin; 688 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 689 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 690 pc->data = (void*)pcbddc; 691 /* create PCIS data structure */ 692 ierr = PCISCreate(pc);CHKERRQ(ierr); 693 /* BDDC specific */ 694 pcbddc->coarse_vec = 0; 695 pcbddc->coarse_rhs = 0; 696 pcbddc->coarse_ksp = 0; 697 pcbddc->coarse_phi_B = 0; 698 pcbddc->coarse_phi_D = 0; 699 pcbddc->vec1_P = 0; 700 pcbddc->vec1_R = 0; 701 pcbddc->vec2_R = 0; 702 pcbddc->local_auxmat1 = 0; 703 pcbddc->local_auxmat2 = 0; 704 pcbddc->R_to_B = 0; 705 pcbddc->R_to_D = 0; 706 pcbddc->ksp_D = 0; 707 pcbddc->ksp_R = 0; 708 pcbddc->local_primal_indices = 0; 709 pcbddc->prec_type = PETSC_FALSE; 710 pcbddc->NeumannBoundaries = 0; 711 pcbddc->ISForDofs = 0; 712 pcbddc->ISForVertices = 0; 713 pcbddc->n_ISForFaces = 0; 714 pcbddc->n_ISForEdges = 0; 715 pcbddc->ConstraintMatrix = 0; 716 pcbddc->use_nnsp_true = PETSC_FALSE; 717 pcbddc->local_primal_sizes = 0; 718 pcbddc->local_primal_displacements = 0; 719 pcbddc->replicated_local_primal_indices = 0; 720 pcbddc->replicated_local_primal_values = 0; 721 pcbddc->coarse_loc_to_glob = 0; 722 pcbddc->dbg_flag = PETSC_FALSE; 723 pcbddc->coarsening_ratio = 8; 724 /* function pointers */ 725 pc->ops->apply = PCApply_BDDC; 726 pc->ops->applytranspose = 0; 727 pc->ops->setup = PCSetUp_BDDC; 728 pc->ops->destroy = PCDestroy_BDDC; 729 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 730 pc->ops->view = 0; 731 pc->ops->applyrichardson = 0; 732 pc->ops->applysymmetricleft = 0; 733 pc->ops->applysymmetricright = 0; 734 /* composing function */ 735 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 736 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 738 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 740 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 742 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 744 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 EXTERN_C_END 748 749 /* -------------------------------------------------------------------------- */ 750 /* 751 Create C matrix [I 0; 0 const] 752 */ 753 #ifdef BDDC_USE_POD 754 #if !defined(PETSC_MISSING_LAPACK_GESVD) 755 #define PETSC_MISSING_LAPACK_GESVD 1 756 #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 757 #endif 758 #endif 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 762 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 763 { 764 PetscErrorCode ierr; 765 PC_IS* pcis = (PC_IS*)(pc->data); 766 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 767 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 768 PetscInt *nnz,*vertices,*is_indices; 769 PetscScalar *temp_quadrature_constraint; 770 PetscInt *temp_indices,*temp_indices_to_constraint; 771 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 772 PetscInt n_constraints,n_vertices,size_of_constraint; 773 PetscReal quad_value; 774 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 775 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 776 IS *used_IS; 777 const MatType impMatType=MATSEQAIJ; 778 PetscBLASInt Bs,Bt,lwork,lierr; 779 PetscReal tol=1.0e-8; 780 MatNullSpace nearnullsp; 781 const Vec *nearnullvecs; 782 Vec *localnearnullsp; 783 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 784 PetscReal *rwork,*singular_vals; 785 PetscBLASInt Bone=1; 786 /* some ugly conditional declarations */ 787 #if defined(PETSC_MISSING_LAPACK_GESVD) 788 PetscScalar dot_result; 789 PetscScalar one=1.0,zero=0.0; 790 PetscInt ii; 791 #if defined(PETSC_USE_COMPLEX) 792 PetscScalar val1,val2; 793 #endif 794 #else 795 PetscBLASInt dummy_int; 796 PetscScalar dummy_scalar; 797 #endif 798 799 PetscFunctionBegin; 800 /* check if near null space is attached to global mat */ 801 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 802 if (nearnullsp) { 803 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 804 } else { /* if near null space is not provided it uses constants */ 805 nnsp_has_cnst = PETSC_TRUE; 806 use_nnsp_true = PETSC_TRUE; 807 } 808 if(nnsp_has_cnst) { 809 nnsp_addone = 1; 810 } 811 /* 812 Evaluate maximum storage size needed by the procedure 813 - temp_indices will contain start index of each constraint stored as follows 814 - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 815 - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 816 */ 817 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 818 total_counts *= (nnsp_addone+nnsp_size); 819 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 820 total_counts = 0; 821 max_size_of_constraint = 0; 822 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 823 if(i<pcbddc->n_ISForEdges){ 824 used_IS = &pcbddc->ISForEdges[i]; 825 } else { 826 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 827 } 828 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 829 total_counts += j; 830 if(j>max_size_of_constraint) max_size_of_constraint=j; 831 } 832 total_counts *= (nnsp_addone+nnsp_size); 833 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 834 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 835 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 836 rwork = 0; 837 work = 0; 838 singular_vals = 0; 839 temp_basis = 0; 840 correlation_mat = 0; 841 if(!pcbddc->use_nnsp_true) { 842 PetscScalar temp_work; 843 #if defined(PETSC_MISSING_LAPACK_GESVD) 844 /* POD */ 845 PetscInt max_n; 846 max_n = nnsp_addone+nnsp_size; 847 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 848 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 849 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 850 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 851 #if defined(PETSC_USE_COMPLEX) 852 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 853 #endif 854 /* now we evaluate the optimal workspace using query with lwork=-1 */ 855 Bt = PetscBLASIntCast(max_n); 856 lwork=-1; 857 #if !defined(PETSC_USE_COMPLEX) 858 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 859 #else 860 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 861 #endif 862 #else /* on missing GESVD */ 863 /* SVD */ 864 PetscInt max_n,min_n; 865 max_n = max_size_of_constraint; 866 min_n = nnsp_addone+nnsp_size; 867 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 868 min_n = max_size_of_constraint; 869 max_n = nnsp_addone+nnsp_size; 870 } 871 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 872 #if defined(PETSC_USE_COMPLEX) 873 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 874 #endif 875 /* now we evaluate the optimal workspace using query with lwork=-1 */ 876 lwork=-1; 877 Bs = PetscBLASIntCast(max_n); 878 Bt = PetscBLASIntCast(min_n); 879 dummy_int = Bs; 880 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 881 #if !defined(PETSC_USE_COMPLEX) 882 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 883 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 884 #else 885 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 886 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 887 #endif 888 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 889 ierr = PetscFPTrapPop();CHKERRQ(ierr); 890 #endif 891 /* Allocate optimal workspace */ 892 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 893 total_counts = (PetscInt)lwork; 894 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 895 } 896 /* get local part of global near null space vectors */ 897 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 898 for(k=0;k<nnsp_size;k++) { 899 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 900 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 } 903 /* Now we can loop on constraining sets */ 904 total_counts=0; 905 temp_indices[0]=0; 906 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 907 if(i<pcbddc->n_ISForEdges){ 908 used_IS = &pcbddc->ISForEdges[i]; 909 } else { 910 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 911 } 912 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 913 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 914 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 915 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 916 if(nnsp_has_cnst) { 917 temp_constraints++; 918 quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint); 919 for(j=0;j<size_of_constraint;j++) { 920 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 921 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 922 } 923 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 924 total_counts++; 925 } 926 for(k=0;k<nnsp_size;k++) { 927 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 928 for(j=0;j<size_of_constraint;j++) { 929 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 930 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 931 } 932 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 933 quad_value = 1.0; 934 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 935 Bs = PetscBLASIntCast(size_of_constraint); 936 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 937 } 938 if ( quad_value > 0.0 ) { /* keep indices and values */ 939 temp_constraints++; 940 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 941 total_counts++; 942 } 943 } 944 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 945 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 946 if(!use_nnsp_true) { 947 948 Bs = PetscBLASIntCast(size_of_constraint); 949 Bt = PetscBLASIntCast(temp_constraints); 950 951 #if defined(PETSC_MISSING_LAPACK_GESVD) 952 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 953 /* Store upper triangular part of correlation matrix */ 954 for(j=0;j<temp_constraints;j++) { 955 for(k=0;k<j+1;k++) { 956 #if defined(PETSC_USE_COMPLEX) 957 /* hand made complex dot product */ 958 dot_result = 0.0; 959 for (ii=0; ii<size_of_constraint; ii++) { 960 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 961 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 962 dot_result += val1*PetscConj(val2); 963 } 964 #else 965 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 966 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 967 #endif 968 correlation_mat[j*temp_constraints+k]=dot_result; 969 } 970 } 971 #if !defined(PETSC_USE_COMPLEX) 972 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 973 #else 974 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 975 #endif 976 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 977 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 978 j=0; 979 while( j < Bt && singular_vals[j] < tol) j++; 980 total_counts=total_counts-j; 981 if(j<temp_constraints) { 982 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 983 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 984 /* copy POD basis into used quadrature memory */ 985 for(k=0;k<Bt-j;k++) { 986 for(ii=0;ii<size_of_constraint;ii++) { 987 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 988 } 989 } 990 } 991 992 #else /* on missing GESVD */ 993 994 PetscInt min_n = temp_constraints; 995 if(min_n > size_of_constraint) min_n = size_of_constraint; 996 dummy_int = Bs; 997 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 998 #if !defined(PETSC_USE_COMPLEX) 999 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1000 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 1001 #else 1002 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1003 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 1004 #endif 1005 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 1006 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1007 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 1008 j=0; 1009 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 1010 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 1011 1012 #endif 1013 } 1014 } 1015 n_constraints=total_counts; 1016 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 1017 local_primal_size = n_vertices+n_constraints; 1018 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1019 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1020 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1021 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1022 for(i=0;i<n_vertices;i++) { nnz[i]= 1; } 1023 for(i=0;i<n_constraints;i++) { nnz[i+n_vertices]=temp_indices[i+1]-temp_indices[i]; } 1024 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1025 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1026 for(i=0;i<n_vertices;i++) { ierr = MatSetValue(pcbddc->ConstraintMatrix,i,vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); } 1027 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1028 for(i=0;i<n_constraints;i++) { 1029 j=i+n_vertices; 1030 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1031 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&j,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1032 } 1033 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1034 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1035 /* set quantities in pcbddc data structure */ 1036 pcbddc->n_vertices = n_vertices; 1037 pcbddc->n_constraints = n_constraints; 1038 pcbddc->local_primal_size = n_vertices+n_constraints; 1039 /* free workspace no longer needed */ 1040 ierr = PetscFree(rwork);CHKERRQ(ierr); 1041 ierr = PetscFree(work);CHKERRQ(ierr); 1042 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1043 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1044 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1045 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1046 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1047 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1048 ierr = PetscFree(nnz);CHKERRQ(ierr); 1049 for(k=0;k<nnsp_size;k++) { ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); } 1050 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD 1054 #undef PETSC_MISSING_LAPACK_GESVD 1055 #endif 1056 1057 /* -------------------------------------------------------------------------- */ 1058 /* 1059 PCBDDCCoarseSetUp - 1060 */ 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "PCBDDCCoarseSetUp" 1063 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1064 { 1065 PetscErrorCode ierr; 1066 1067 PC_IS* pcis = (PC_IS*)(pc->data); 1068 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1069 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1070 IS is_R_local; 1071 IS is_V_local; 1072 IS is_C_local; 1073 IS is_aux1; 1074 IS is_aux2; 1075 const VecType impVecType; 1076 const MatType impMatType; 1077 PetscInt n_R=0; 1078 PetscInt n_D=0; 1079 PetscInt n_B=0; 1080 PetscScalar zero=0.0; 1081 PetscScalar one=1.0; 1082 PetscScalar m_one=-1.0; 1083 PetscScalar* array; 1084 PetscScalar *coarse_submat_vals; 1085 PetscInt *idx_R_local; 1086 PetscInt *idx_V_B; 1087 PetscScalar *coarsefunctions_errors; 1088 PetscScalar *constraints_errors; 1089 /* auxiliary indices */ 1090 PetscInt s,i,j,k; 1091 /* for verbose output of bddc */ 1092 PetscViewer viewer=pcbddc->dbg_viewer; 1093 PetscBool dbg_flag=pcbddc->dbg_flag; 1094 /* for counting coarse dofs */ 1095 PetscScalar coarsesum; 1096 PetscInt n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints; 1097 PetscInt size_of_constraint; 1098 PetscInt *row_cmat_indices; 1099 PetscScalar *row_cmat_values; 1100 const PetscInt *vertices; 1101 1102 PetscFunctionBegin; 1103 /* Set Non-overlapping dimensions */ 1104 n_B = pcis->n_B; n_D = pcis->n - n_B; 1105 ierr = ISGetIndices(pcbddc->ISForVertices,&vertices);CHKERRQ(ierr); 1106 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */ 1107 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1108 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1109 for(i=0;i<n_vertices;i++) { array[ vertices[i] ] = one; } 1110 1111 for(i=0;i<n_constraints;i++) { 1112 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1113 for (j=0; j<size_of_constraint; j++) { 1114 k = row_cmat_indices[j]; 1115 if( array[k] == zero ) { 1116 array[k] = one; 1117 break; 1118 } 1119 } 1120 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1121 } 1122 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1123 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1124 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1125 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1126 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1127 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1128 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1129 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 1130 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1131 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1132 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1133 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1134 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1135 pcbddc->coarse_size = (PetscInt) coarsesum; 1136 1137 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1138 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1139 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1140 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1141 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1142 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1143 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1144 if(dbg_flag) { 1145 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1146 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1147 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1148 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1149 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1150 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1151 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1152 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 1153 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1154 } 1155 /* Allocate needed vectors */ 1156 /* Set Mat type for local matrices needed by BDDC precondtioner */ 1157 impMatType = MATSEQDENSE; 1158 impVecType = VECSEQ; 1159 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1160 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 1161 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1162 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1163 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1164 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1165 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1166 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1167 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1168 1169 /* Creating some index sets needed */ 1170 /* For submatrices */ 1171 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 1172 if(n_vertices) { 1173 ierr = ISDuplicate(pcbddc->ISForVertices,&is_V_local);CHKERRQ(ierr); 1174 ierr = ISCopy(pcbddc->ISForVertices,is_V_local);CHKERRQ(ierr); 1175 } 1176 if(n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); } 1177 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1178 { 1179 PetscInt *aux_array1; 1180 PetscInt *aux_array2; 1181 PetscScalar value; 1182 1183 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1184 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1185 1186 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1187 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1188 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1189 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1190 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1191 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1192 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1193 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1194 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 1195 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1196 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1197 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1198 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 1199 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1200 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1201 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1202 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1203 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1204 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1205 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1206 1207 if(pcbddc->prec_type || dbg_flag ) { 1208 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1209 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1210 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 1211 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1212 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1213 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1214 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1215 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1216 } 1217 1218 /* Check scatters */ 1219 if(dbg_flag) { 1220 1221 Vec vec_aux; 1222 1223 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1224 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 1225 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1226 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1227 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1228 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1229 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1230 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1232 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1235 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1236 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1237 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1238 1239 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1240 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1241 ierr = VecDuplicate(pcis->vec1_B,&vec_aux);CHKERRQ(ierr); 1242 ierr = VecCopy(pcis->vec1_B,vec_aux);CHKERRQ(ierr); 1243 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1244 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1245 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B);CHKERRQ(ierr); 1248 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1249 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1250 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1251 1252 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1253 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 1254 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1255 1256 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1257 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1258 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1259 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1260 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1263 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1264 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1265 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1266 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1267 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1268 1269 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1270 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1271 ierr = VecDuplicate(pcis->vec1_D,&vec_aux);CHKERRQ(ierr); 1272 ierr = VecCopy(pcis->vec1_D,vec_aux);CHKERRQ(ierr); 1273 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1274 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1275 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D);CHKERRQ(ierr); 1278 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1279 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1280 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1281 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1282 1283 } 1284 } 1285 1286 /* vertices in boundary numbering */ 1287 if(n_vertices) { 1288 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1289 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1290 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1291 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1292 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1293 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1294 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1295 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1296 for (i=0; i<n_vertices; i++) { 1297 s=0; 1298 while (array[s] != i ) {s++;} 1299 idx_V_B[i]=s; 1300 } 1301 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1302 } 1303 1304 1305 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1306 { 1307 Mat A_RR; 1308 PC pc_temp; 1309 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1310 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1311 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1312 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1313 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1314 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1315 /* default */ 1316 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1317 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1318 /* Allow user's customization */ 1319 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1320 /* Set Up KSP for Dirichlet problem of BDDC */ 1321 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1322 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1323 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1324 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1325 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1326 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1327 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1328 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1329 /* default */ 1330 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1331 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1332 /* Allow user's customization */ 1333 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1334 /* Set Up KSP for Neumann problem of BDDC */ 1335 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1336 /* check Dirichlet and Neumann solvers */ 1337 if(pcbddc->dbg_flag) { 1338 Vec temp_vec; 1339 PetscScalar value; 1340 1341 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1342 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1343 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1344 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1345 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1346 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1347 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1348 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1349 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1350 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1351 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1352 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1353 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1354 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1355 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1356 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1357 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1358 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1359 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1360 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1361 } 1362 /* free Neumann problem's matrix */ 1363 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1364 } 1365 1366 /* Assemble all remaining stuff needed to apply BDDC */ 1367 { 1368 Mat A_RV,A_VR,A_VV; 1369 Mat M1,M2; 1370 Mat C_CR; 1371 Mat AUXMAT; 1372 Vec vec1_C; 1373 Vec vec2_C; 1374 Vec vec1_V; 1375 Vec vec2_V; 1376 PetscInt *nnz; 1377 PetscInt *auxindices; 1378 PetscInt index; 1379 PetscScalar* array2; 1380 MatFactorInfo matinfo; 1381 1382 /* Allocating some extra storage just to be safe */ 1383 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1384 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1385 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1386 1387 /* some work vectors on vertices and/or constraints */ 1388 if(n_vertices) { 1389 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1390 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1391 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1392 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1393 } 1394 if(pcbddc->n_constraints) { 1395 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1396 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1397 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1398 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1399 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1400 } 1401 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1402 if(n_constraints) { 1403 /* some work vectors */ 1404 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1405 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1406 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1407 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1408 1409 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1410 for(i=0;i<n_constraints;i++) { 1411 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1412 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1413 /* Get row of constraint matrix in R numbering */ 1414 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1415 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1416 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1417 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1418 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1419 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1420 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1421 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1422 /* Solve for row of constraint matrix in R numbering */ 1423 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1424 /* Set values */ 1425 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1426 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1427 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1428 } 1429 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1430 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1431 1432 /* Create Constraint matrix on R nodes: C_{CR} */ 1433 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1434 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1435 1436 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1437 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1438 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1439 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1440 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1441 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1442 1443 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1444 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1445 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1446 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1447 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1448 for(i=0;i<n_constraints;i++) { 1449 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1450 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1451 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1452 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1453 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1454 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1455 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1456 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1457 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1458 } 1459 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1460 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1461 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1462 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1463 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1464 1465 } 1466 1467 /* Get submatrices from subdomain matrix */ 1468 if(n_vertices){ 1469 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1470 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1471 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1472 /* Assemble M2 = A_RR^{-1}A_RV */ 1473 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1474 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1475 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1476 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1477 for(i=0;i<n_vertices;i++) { 1478 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1479 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1480 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1481 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1482 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1483 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1484 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1485 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1486 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1487 } 1488 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1489 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1490 } 1491 1492 /* Matrix of coarse basis functions (local) */ 1493 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1494 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1495 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1496 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1497 if(pcbddc->prec_type || dbg_flag ) { 1498 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1499 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1500 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1501 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1502 } 1503 1504 if(dbg_flag) { 1505 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1506 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1507 } 1508 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1509 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1510 1511 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1512 for(i=0;i<n_vertices;i++){ 1513 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1514 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1515 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1516 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1517 /* solution of saddle point problem */ 1518 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1519 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1520 if(n_constraints) { 1521 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1522 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1523 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1524 } 1525 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1526 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1527 1528 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1529 /* coarse basis functions */ 1530 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1531 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1532 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1533 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1534 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1535 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1536 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1537 if( pcbddc->prec_type || dbg_flag ) { 1538 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1540 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1541 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1542 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1543 } 1544 /* subdomain contribution to coarse matrix */ 1545 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1546 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } //WARNING -> column major ordering 1547 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1548 if(n_constraints) { 1549 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1550 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } //WARNING -> column major ordering 1551 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1552 } 1553 1554 if( dbg_flag ) { 1555 /* assemble subdomain vector on nodes */ 1556 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1557 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1558 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1559 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1560 array[ vertices[i] ] = one; 1561 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1562 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1563 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1564 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1565 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1566 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1567 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1568 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1569 if(n_constraints) { 1570 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1571 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1572 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1573 } 1574 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1575 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1576 /* check saddle point solution */ 1577 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1578 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1579 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 1580 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1581 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1582 array[i]=array[i]+m_one; /* shift by the identity matrix */ 1583 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1584 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 1585 } 1586 } 1587 1588 for(i=0;i<n_constraints;i++){ 1589 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1590 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1591 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1592 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1593 /* solution of saddle point problem */ 1594 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1595 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1596 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1597 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1598 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1599 /* coarse basis functions */ 1600 index=i+n_vertices; 1601 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1602 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1605 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1606 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1607 if( pcbddc->prec_type || dbg_flag ) { 1608 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1611 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1612 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1613 } 1614 /* subdomain contribution to coarse matrix */ 1615 if(n_vertices) { 1616 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1617 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1618 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1619 } 1620 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1621 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} //WARNING -> column major ordering 1622 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1623 1624 if( dbg_flag ) { 1625 /* assemble subdomain vector on nodes */ 1626 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1627 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1628 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1629 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1630 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1631 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1632 /* assemble subdomain vector of lagrange multipliers */ 1633 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1634 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1635 if( n_vertices) { 1636 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1637 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 1638 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1639 } 1640 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1641 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 1642 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1643 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1644 /* check saddle point solution */ 1645 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1646 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1647 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1648 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1649 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1650 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1651 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1652 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1653 } 1654 } 1655 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1656 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1657 if( pcbddc->prec_type || dbg_flag ) { 1658 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1660 } 1661 /* Checking coarse_sub_mat and coarse basis functios */ 1662 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1663 if(dbg_flag) { 1664 1665 Mat coarse_sub_mat; 1666 Mat TM1,TM2,TM3,TM4; 1667 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1668 const MatType checkmattype=MATSEQAIJ; 1669 PetscScalar value; 1670 1671 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1672 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1673 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1674 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1675 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1676 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1677 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1678 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1679 1680 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1681 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1682 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1683 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1684 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1685 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1686 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1687 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1688 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1689 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1690 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1691 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1692 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1693 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1694 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1695 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1696 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1697 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1698 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1699 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1700 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1701 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1702 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1703 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1704 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1705 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1706 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1707 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1708 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1709 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1710 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1711 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1712 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1713 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1714 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1715 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1716 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1717 } 1718 1719 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1720 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1721 /* free memory */ 1722 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1723 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1724 ierr = PetscFree(nnz);CHKERRQ(ierr); 1725 if(n_vertices) { 1726 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1727 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1728 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1729 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1730 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1731 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1732 } 1733 if(pcbddc->n_constraints) { 1734 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1735 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1736 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1737 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1738 } 1739 } 1740 /* free memory */ 1741 if(n_vertices) { 1742 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1743 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1744 } 1745 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1746 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1747 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1748 1749 PetscFunctionReturn(0); 1750 } 1751 1752 /* -------------------------------------------------------------------------- */ 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1756 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1757 { 1758 1759 1760 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1761 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1762 PC_IS *pcis = (PC_IS*)pc->data; 1763 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1764 MPI_Comm coarse_comm; 1765 1766 /* common to all choiches */ 1767 PetscScalar *temp_coarse_mat_vals; 1768 PetscScalar *ins_coarse_mat_vals; 1769 PetscInt *ins_local_primal_indices; 1770 PetscMPIInt *localsizes2,*localdispl2; 1771 PetscMPIInt size_prec_comm; 1772 PetscMPIInt rank_prec_comm; 1773 PetscMPIInt active_rank=MPI_PROC_NULL; 1774 PetscMPIInt master_proc=0; 1775 PetscInt ins_local_primal_size; 1776 /* specific to MULTILEVEL_BDDC */ 1777 PetscMPIInt *ranks_recv; 1778 PetscMPIInt count_recv=0; 1779 PetscMPIInt rank_coarse_proc_send_to; 1780 PetscMPIInt coarse_color = MPI_UNDEFINED; 1781 ISLocalToGlobalMapping coarse_ISLG; 1782 /* some other variables */ 1783 PetscErrorCode ierr; 1784 const MatType coarse_mat_type; 1785 const PCType coarse_pc_type; 1786 const KSPType coarse_ksp_type; 1787 PC pc_temp; 1788 PetscInt i,j,k,bs; 1789 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 1790 /* verbose output viewer */ 1791 PetscViewer viewer=pcbddc->dbg_viewer; 1792 PetscBool dbg_flag=pcbddc->dbg_flag; 1793 1794 PetscFunctionBegin; 1795 1796 ins_local_primal_indices = 0; 1797 ins_coarse_mat_vals = 0; 1798 localsizes2 = 0; 1799 localdispl2 = 0; 1800 temp_coarse_mat_vals = 0; 1801 coarse_ISLG = 0; 1802 1803 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1804 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1805 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1806 1807 /* Assign global numbering to coarse dofs */ 1808 { 1809 PetscScalar one=1.,zero=0.; 1810 PetscScalar *array; 1811 PetscMPIInt *auxlocal_primal; 1812 PetscMPIInt *auxglobal_primal; 1813 PetscMPIInt *all_auxglobal_primal; 1814 PetscMPIInt *all_auxglobal_primal_dummy; 1815 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1816 PetscInt *vertices,*row_cmat_indices; 1817 PetscInt size_of_constraint; 1818 1819 /* Construct needed data structures for message passing */ 1820 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1821 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1822 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1823 /* Gather local_primal_size information for all processes */ 1824 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1825 pcbddc->replicated_primal_size = 0; 1826 for (i=0; i<size_prec_comm; i++) { 1827 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1828 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1829 } 1830 if(rank_prec_comm == 0) { 1831 /* allocate some auxiliary space */ 1832 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1833 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1834 } 1835 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1836 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1837 1838 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 1839 This code fragment assumes that the number of local constraints per connected component 1840 is not greater than the number of nodes defined for the connected component 1841 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1842 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 1843 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1844 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1845 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1846 for(i=0;i<pcbddc->n_vertices;i++) { /* note that pcbddc->n_vertices can be different from size of ISForVertices */ 1847 array[ vertices[i] ] = one; 1848 auxlocal_primal[i] = vertices[i]; 1849 } 1850 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1851 for(i=0;i<pcbddc->n_constraints;i++) { 1852 ierr = MatGetRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1853 for (j=0; j<size_of_constraint; j++) { 1854 k = row_cmat_indices[j]; 1855 if( array[k] == zero ) { 1856 array[k] = one; 1857 auxlocal_primal[i+pcbddc->n_vertices] = k; 1858 break; 1859 } 1860 } 1861 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1862 } 1863 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1864 1865 /* Now assign them a global numbering */ 1866 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1867 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1868 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1869 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1870 1871 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1872 /* It implements a function similar to PetscSortRemoveDupsInt */ 1873 if(rank_prec_comm==0) { 1874 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1875 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1876 k=1; 1877 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1878 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1879 if(j != all_auxglobal_primal[i] ) { 1880 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1881 k++; 1882 j=all_auxglobal_primal[i]; 1883 } 1884 } 1885 } else { 1886 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1887 } 1888 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1889 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1890 1891 /* Now get global coarse numbering of local primal nodes */ 1892 for(i=0;i<pcbddc->local_primal_size;i++) { 1893 k=0; 1894 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1895 pcbddc->local_primal_indices[i]=k; 1896 } 1897 if(dbg_flag) { 1898 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1899 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 1900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1901 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1902 for(i=0;i<pcbddc->local_primal_size;i++) { 1903 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 1904 } 1905 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1906 } 1907 /* free allocated memory */ 1908 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1909 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1910 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1911 if(rank_prec_comm == 0) { 1912 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1913 } 1914 } 1915 1916 /* adapt coarse problem type */ 1917 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1918 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1919 1920 switch(pcbddc->coarse_problem_type){ 1921 1922 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1923 { 1924 /* we need additional variables */ 1925 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1926 MetisInt *metis_coarse_subdivision; 1927 MetisInt options[METIS_NOPTIONS]; 1928 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1929 PetscMPIInt procs_jumps_coarse_comm; 1930 PetscMPIInt *coarse_subdivision; 1931 PetscMPIInt *total_count_recv; 1932 PetscMPIInt *total_ranks_recv; 1933 PetscMPIInt *displacements_recv; 1934 PetscMPIInt *my_faces_connectivity; 1935 PetscMPIInt *petsc_faces_adjncy; 1936 MetisInt *faces_adjncy; 1937 MetisInt *faces_xadj; 1938 PetscMPIInt *number_of_faces; 1939 PetscMPIInt *faces_displacements; 1940 PetscInt *array_int; 1941 PetscMPIInt my_faces=0; 1942 PetscMPIInt total_faces=0; 1943 PetscInt ranks_stretching_ratio; 1944 1945 /* define some quantities */ 1946 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1947 coarse_mat_type = MATIS; 1948 coarse_pc_type = PCBDDC; 1949 coarse_ksp_type = KSPCHEBYCHEV; 1950 1951 /* details of coarse decomposition */ 1952 n_subdomains = pcbddc->active_procs; 1953 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1954 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 1955 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 1956 1957 printf("Coarse algorithm details: \n"); 1958 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1)); 1959 1960 /* build CSR graph of subdomains' connectivity through faces */ 1961 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 1962 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 1963 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 1964 for(j=0;j<pcis->n_shared[i];j++){ 1965 array_int[ pcis->shared[i][j] ]+=1; 1966 } 1967 } 1968 for(i=1;i<pcis->n_neigh;i++){ 1969 for(j=0;j<pcis->n_shared[i];j++){ 1970 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1971 my_faces++; 1972 break; 1973 } 1974 } 1975 } 1976 //printf("I found %d faces.\n",my_faces); 1977 1978 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 1979 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 1980 my_faces=0; 1981 for(i=1;i<pcis->n_neigh;i++){ 1982 for(j=0;j<pcis->n_shared[i];j++){ 1983 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1984 my_faces_connectivity[my_faces]=pcis->neigh[i]; 1985 my_faces++; 1986 break; 1987 } 1988 } 1989 } 1990 if(rank_prec_comm == master_proc) { 1991 //printf("I found %d total faces.\n",total_faces); 1992 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 1993 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 1994 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 1995 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 1996 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 1997 } 1998 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1999 if(rank_prec_comm == master_proc) { 2000 faces_xadj[0]=0; 2001 faces_displacements[0]=0; 2002 j=0; 2003 for(i=1;i<size_prec_comm+1;i++) { 2004 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2005 if(number_of_faces[i-1]) { 2006 j++; 2007 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2008 } 2009 } 2010 printf("The J I count is %d and should be %d\n",j,n_subdomains); 2011 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 2012 } 2013 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2014 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2015 ierr = PetscFree(array_int);CHKERRQ(ierr); 2016 if(rank_prec_comm == master_proc) { 2017 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2018 printf("This is the face connectivity (actual ranks)\n"); 2019 for(i=0;i<n_subdomains;i++){ 2020 printf("proc %d is connected with \n",i); 2021 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2022 printf("%d ",faces_adjncy[j]); 2023 printf("\n"); 2024 } 2025 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2026 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2027 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2028 } 2029 2030 if( rank_prec_comm == master_proc ) { 2031 2032 PetscInt heuristic_for_metis=3; 2033 2034 ncon=1; 2035 faces_nvtxs=n_subdomains; 2036 /* partition graoh induced by face connectivity */ 2037 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2038 ierr = METIS_SetDefaultOptions(options); 2039 /* we need a contiguous partition of the coarse mesh */ 2040 options[METIS_OPTION_CONTIG]=1; 2041 options[METIS_OPTION_DBGLVL]=1; 2042 options[METIS_OPTION_NITER]=30; 2043 //options[METIS_OPTION_NCUTS]=1; 2044 printf("METIS PART GRAPH\n"); 2045 if(n_subdomains>n_parts*heuristic_for_metis) { 2046 printf("Using Kway\n"); 2047 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2048 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2049 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2050 } else { 2051 printf("Using Recursive\n"); 2052 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2053 } 2054 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2055 printf("Partition done!\n"); 2056 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2057 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2058 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2059 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2060 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2061 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2062 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2063 } 2064 2065 /* Create new communicator for coarse problem splitting the old one */ 2066 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2067 coarse_color=0; //for communicator splitting 2068 active_rank=rank_prec_comm; //for insertion of matrix values 2069 } 2070 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2071 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 2072 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2073 2074 if( coarse_color == 0 ) { 2075 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2076 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2077 printf("Details of coarse comm\n"); 2078 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2079 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 2080 } else { 2081 rank_coarse_comm = MPI_PROC_NULL; 2082 } 2083 2084 /* master proc take care of arranging and distributing coarse informations */ 2085 if(rank_coarse_comm == master_proc) { 2086 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2087 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2088 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2089 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2090 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2091 /* some initializations */ 2092 displacements_recv[0]=0; 2093 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 2094 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2095 for(j=0;j<size_coarse_comm;j++) 2096 for(i=0;i<size_prec_comm;i++) 2097 if(coarse_subdivision[i]==j) 2098 total_count_recv[j]++; 2099 /* displacements needed for scatterv of total_ranks_recv */ 2100 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2101 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2102 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2103 for(j=0;j<size_coarse_comm;j++) { 2104 for(i=0;i<size_prec_comm;i++) { 2105 if(coarse_subdivision[i]==j) { 2106 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2107 total_count_recv[j]+=1; 2108 } 2109 } 2110 } 2111 for(j=0;j<size_coarse_comm;j++) { 2112 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2113 for(i=0;i<total_count_recv[j];i++) { 2114 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2115 } 2116 printf("\n"); 2117 } 2118 2119 /* identify new decomposition in terms of ranks in the old communicator */ 2120 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2121 printf("coarse_subdivision in old end new ranks\n"); 2122 for(i=0;i<size_prec_comm;i++) 2123 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2124 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2125 } else { 2126 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2127 } 2128 printf("\n"); 2129 } 2130 2131 /* Scatter new decomposition for send details */ 2132 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2133 /* Scatter receiving details to members of coarse decomposition */ 2134 if( coarse_color == 0) { 2135 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2136 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2137 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2138 } 2139 2140 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2141 //if(coarse_color == 0) { 2142 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2143 // for(i=0;i<count_recv;i++) 2144 // printf("%d ",ranks_recv[i]); 2145 // printf("\n"); 2146 //} 2147 2148 if(rank_prec_comm == master_proc) { 2149 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2150 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2151 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2152 free(coarse_subdivision); 2153 free(total_count_recv); 2154 free(total_ranks_recv); 2155 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2156 } 2157 break; 2158 } 2159 2160 case(REPLICATED_BDDC): 2161 2162 pcbddc->coarse_communications_type = GATHERS_BDDC; 2163 coarse_mat_type = MATSEQAIJ; 2164 coarse_pc_type = PCLU; 2165 coarse_ksp_type = KSPPREONLY; 2166 coarse_comm = PETSC_COMM_SELF; 2167 active_rank = rank_prec_comm; 2168 break; 2169 2170 case(PARALLEL_BDDC): 2171 2172 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2173 coarse_mat_type = MATMPIAIJ; 2174 coarse_pc_type = PCREDUNDANT; 2175 coarse_ksp_type = KSPPREONLY; 2176 coarse_comm = prec_comm; 2177 active_rank = rank_prec_comm; 2178 break; 2179 2180 case(SEQUENTIAL_BDDC): 2181 pcbddc->coarse_communications_type = GATHERS_BDDC; 2182 coarse_mat_type = MATSEQAIJ; 2183 coarse_pc_type = PCLU; 2184 coarse_ksp_type = KSPPREONLY; 2185 coarse_comm = PETSC_COMM_SELF; 2186 active_rank = master_proc; 2187 break; 2188 } 2189 2190 switch(pcbddc->coarse_communications_type){ 2191 2192 case(SCATTERS_BDDC): 2193 { 2194 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2195 2196 PetscMPIInt send_size; 2197 PetscInt *aux_ins_indices; 2198 PetscInt ii,jj; 2199 MPI_Request *requests; 2200 2201 /* allocate auxiliary space */ 2202 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2203 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2204 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2205 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2206 /* allocate stuffs for message massing */ 2207 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2208 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2209 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2210 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2211 /* fill up quantities */ 2212 j=0; 2213 for(i=0;i<count_recv;i++){ 2214 ii = ranks_recv[i]; 2215 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2216 localdispl2[i]=j; 2217 j+=localsizes2[i]; 2218 jj = pcbddc->local_primal_displacements[ii]; 2219 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 2220 } 2221 //printf("aux_ins_indices 1\n"); 2222 //for(i=0;i<pcbddc->coarse_size;i++) 2223 // printf("%d ",aux_ins_indices[i]); 2224 //printf("\n"); 2225 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2226 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2227 /* evaluate how many values I will insert in coarse mat */ 2228 ins_local_primal_size=0; 2229 for(i=0;i<pcbddc->coarse_size;i++) 2230 if(aux_ins_indices[i]) 2231 ins_local_primal_size++; 2232 /* evaluate indices I will insert in coarse mat */ 2233 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2234 j=0; 2235 for(i=0;i<pcbddc->coarse_size;i++) 2236 if(aux_ins_indices[i]) 2237 ins_local_primal_indices[j++]=i; 2238 /* use aux_ins_indices to realize a global to local mapping */ 2239 j=0; 2240 for(i=0;i<pcbddc->coarse_size;i++){ 2241 if(aux_ins_indices[i]==0){ 2242 aux_ins_indices[i]=-1; 2243 } else { 2244 aux_ins_indices[i]=j; 2245 j++; 2246 } 2247 } 2248 2249 //printf("New details localsizes2 localdispl2\n"); 2250 //for(i=0;i<count_recv;i++) 2251 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2252 //printf("\n"); 2253 //printf("aux_ins_indices 2\n"); 2254 //for(i=0;i<pcbddc->coarse_size;i++) 2255 // printf("%d ",aux_ins_indices[i]); 2256 //printf("\n"); 2257 //printf("ins_local_primal_indices\n"); 2258 //for(i=0;i<ins_local_primal_size;i++) 2259 // printf("%d ",ins_local_primal_indices[i]); 2260 //printf("\n"); 2261 //printf("coarse_submat_vals\n"); 2262 //for(i=0;i<pcbddc->local_primal_size;i++) 2263 // for(j=0;j<pcbddc->local_primal_size;j++) 2264 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2265 //printf("\n"); 2266 2267 /* processes partecipating in coarse problem receive matrix data from their friends */ 2268 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2269 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2270 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2271 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2272 } 2273 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2274 2275 //if(coarse_color == 0) { 2276 // printf("temp_coarse_mat_vals\n"); 2277 // for(k=0;k<count_recv;k++){ 2278 // printf("---- %d ----\n",ranks_recv[k]); 2279 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2280 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2281 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2282 // printf("\n"); 2283 // } 2284 //} 2285 /* calculate data to insert in coarse mat */ 2286 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2287 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2288 2289 PetscMPIInt rr,kk,lps,lpd; 2290 PetscInt row_ind,col_ind; 2291 for(k=0;k<count_recv;k++){ 2292 rr = ranks_recv[k]; 2293 kk = localdispl2[k]; 2294 lps = pcbddc->local_primal_sizes[rr]; 2295 lpd = pcbddc->local_primal_displacements[rr]; 2296 //printf("Inserting the following indices (received from %d)\n",rr); 2297 for(j=0;j<lps;j++){ 2298 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2299 for(i=0;i<lps;i++){ 2300 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2301 //printf("%d %d\n",row_ind,col_ind); 2302 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2303 } 2304 } 2305 } 2306 ierr = PetscFree(requests);CHKERRQ(ierr); 2307 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2308 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2309 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2310 2311 /* create local to global mapping needed by coarse MATIS */ 2312 { 2313 IS coarse_IS; 2314 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2315 coarse_comm = prec_comm; 2316 active_rank=rank_prec_comm; 2317 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2318 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2319 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2320 } 2321 } 2322 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2323 /* arrays for values insertion */ 2324 ins_local_primal_size = pcbddc->local_primal_size; 2325 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2326 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2327 for(j=0;j<ins_local_primal_size;j++){ 2328 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2329 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2330 } 2331 } 2332 break; 2333 2334 } 2335 2336 case(GATHERS_BDDC): 2337 { 2338 2339 PetscMPIInt mysize,mysize2; 2340 2341 if(rank_prec_comm==active_rank) { 2342 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2343 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2344 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2345 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2346 /* arrays for values insertion */ 2347 ins_local_primal_size = pcbddc->coarse_size; 2348 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2349 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2350 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2351 localdispl2[0]=0; 2352 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2353 j=0; 2354 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2355 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2356 } 2357 2358 mysize=pcbddc->local_primal_size; 2359 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2360 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2361 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2362 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2363 } else { 2364 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2365 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2366 } 2367 2368 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2369 if(rank_prec_comm==active_rank) { 2370 PetscInt offset,offset2,row_ind,col_ind; 2371 for(j=0;j<ins_local_primal_size;j++){ 2372 ins_local_primal_indices[j]=j; 2373 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2374 } 2375 for(k=0;k<size_prec_comm;k++){ 2376 offset=pcbddc->local_primal_displacements[k]; 2377 offset2=localdispl2[k]; 2378 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2379 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2380 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2381 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2382 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2383 } 2384 } 2385 } 2386 } 2387 break; 2388 }//switch on coarse problem and communications associated with finished 2389 } 2390 2391 /* Now create and fill up coarse matrix */ 2392 if( rank_prec_comm == active_rank ) { 2393 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2394 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2395 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2396 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2397 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2398 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2399 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2400 } else { 2401 Mat matis_coarse_local_mat; 2402 ierr = MatCreateIS(coarse_comm,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2403 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2404 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2405 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2406 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2407 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2408 } 2409 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2410 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2411 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2412 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2413 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2414 Mat matis_coarse_local_mat; 2415 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2416 ierr = MatSetBlockSize(matis_coarse_local_mat,bs);CHKERRQ(ierr); 2417 } 2418 2419 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2420 /* Preconditioner for coarse problem */ 2421 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2422 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2423 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2424 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2425 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2426 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2427 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2428 /* Allow user's customization */ 2429 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2430 /* Set Up PC for coarse problem BDDC */ 2431 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2432 if(dbg_flag) { 2433 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2434 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2435 } 2436 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2437 } 2438 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2439 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2440 if(dbg_flag) { 2441 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2442 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2443 } 2444 } 2445 } 2446 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2447 IS local_IS,global_IS; 2448 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2449 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2450 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2451 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2452 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2453 } 2454 2455 2456 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2457 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2458 PetscScalar m_one=-1.0; 2459 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2460 const KSPType check_ksp_type=KSPGMRES; 2461 2462 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2463 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2464 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2465 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2466 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2467 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2468 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2469 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2470 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2471 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2472 if(dbg_flag) { 2473 kappa_2=lambda_max/lambda_min; 2474 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2475 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2476 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2477 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2478 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2479 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2480 } 2481 /* restore coarse ksp to default values */ 2482 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2483 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2484 ierr = KSPChebychevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2485 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2486 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2487 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2488 } 2489 2490 /* free data structures no longer needed */ 2491 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2492 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2493 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2494 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2495 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2496 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2497 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2503 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2504 { 2505 2506 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2507 PC_IS *pcis = (PC_IS*)pc->data; 2508 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2509 PCBDDCGraph mat_graph; 2510 Mat mat_adj; 2511 PetscInt **neighbours_set; 2512 PetscInt *queue_in_global_numbering; 2513 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2514 PetscInt total_counts,nodes_touched=0,where_values=1,vertex_size; 2515 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2516 PetscBool same_set,flg_row; 2517 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE; 2518 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2519 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2520 const PetscInt *neumann_nodes; 2521 const PetscInt *dirichlet_nodes; 2522 2523 PetscFunctionBegin; 2524 /* allocate and initialize needed graph structure */ 2525 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2526 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2527 /* ierr = MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj);CHKERRQ(ierr); */ 2528 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2529 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2530 i = mat_graph->nvtxs; 2531 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2532 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2533 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2534 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2535 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2536 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2537 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2538 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2539 2540 /* Setting dofs splitting in mat_graph->which_dof */ 2541 if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */ 2542 PetscInt *is_indices; 2543 PetscInt is_size; 2544 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2545 ierr = ISGetSize(pcbddc->ISForDofs[i],&is_size);CHKERRQ(ierr); 2546 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2547 for(j=0;j<is_size;j++) { 2548 mat_graph->which_dof[is_indices[j]]=i; 2549 } 2550 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2551 } 2552 /* use mat block size as vertex size */ 2553 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2554 } else { /* otherwise it assumes a constant block size */ 2555 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2556 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2557 for(s=0;s<bs;s++) { 2558 mat_graph->which_dof[i*bs+s]=s; 2559 } 2560 } 2561 vertex_size=1; 2562 } 2563 /* count number of neigh per node */ 2564 total_counts=0; 2565 for(i=1;i<pcis->n_neigh;i++){ 2566 s=pcis->n_shared[i]; 2567 total_counts+=s; 2568 for(j=0;j<s;j++){ 2569 mat_graph->count[pcis->shared[i][j]] += 1; 2570 } 2571 } 2572 /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */ 2573 if(pcbddc->NeumannBoundaries) { 2574 ierr = ISGetSize(pcbddc->NeumannBoundaries,&neumann_bsize);CHKERRQ(ierr); 2575 ierr = ISGetIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2576 for(i=0;i<neumann_bsize;i++){ 2577 iindex = neumann_nodes[i]; 2578 if(mat_graph->count[iindex] > 1){ 2579 mat_graph->count[iindex]+=1; 2580 total_counts++; 2581 } 2582 } 2583 } 2584 /* allocate space for storing the set of neighbours of each node */ 2585 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2586 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); } 2587 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2588 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2589 for(i=1;i<pcis->n_neigh;i++){ 2590 s=pcis->n_shared[i]; 2591 for(j=0;j<s;j++) { 2592 k=pcis->shared[i][j]; 2593 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2594 mat_graph->count[k]+=1; 2595 } 2596 } 2597 /* set -1 fake neighbour to mimic Neumann boundary */ 2598 if(pcbddc->NeumannBoundaries) { 2599 for(i=0;i<neumann_bsize;i++){ 2600 iindex = neumann_nodes[i]; 2601 if(mat_graph->count[iindex] > 1){ 2602 neighbours_set[iindex][mat_graph->count[iindex]] = -1; 2603 mat_graph->count[iindex]+=1; 2604 } 2605 } 2606 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2607 } 2608 /* sort set of sharing subdomains (needed for comparison below) */ 2609 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2610 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 2611 if(pcbddc->DirichletBoundaries) { 2612 ierr = ISGetSize(pcbddc->DirichletBoundaries,&dirichlet_bsize);CHKERRQ(ierr); 2613 ierr = ISGetIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2614 for(i=0;i<dirichlet_bsize;i++){ 2615 mat_graph->count[dirichlet_nodes[i]]=0; 2616 } 2617 ierr = ISRestoreIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2618 } 2619 for(i=0;i<mat_graph->nvtxs;i++){ 2620 if(!mat_graph->count[i]){ /* interior nodes */ 2621 mat_graph->touched[i]=PETSC_TRUE; 2622 mat_graph->where[i]=0; 2623 nodes_touched++; 2624 } 2625 } 2626 mat_graph->ncmps = 0; 2627 while(nodes_touched<mat_graph->nvtxs) { 2628 /* find first untouched node in local ordering */ 2629 i=0; 2630 while(mat_graph->touched[i]) i++; 2631 mat_graph->touched[i]=PETSC_TRUE; 2632 mat_graph->where[i]=where_values; 2633 nodes_touched++; 2634 /* now find all other nodes having the same set of sharing subdomains */ 2635 for(j=i+1;j<mat_graph->nvtxs;j++){ 2636 /* check for same number of sharing subdomains and dof number */ 2637 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2638 /* check for same set of sharing subdomains */ 2639 same_set=PETSC_TRUE; 2640 for(k=0;k<mat_graph->count[j];k++){ 2641 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2642 same_set=PETSC_FALSE; 2643 } 2644 } 2645 /* I found a friend of mine */ 2646 if(same_set) { 2647 mat_graph->where[j]=where_values; 2648 mat_graph->touched[j]=PETSC_TRUE; 2649 nodes_touched++; 2650 } 2651 } 2652 } 2653 where_values++; 2654 } 2655 where_values--; if(where_values<0) where_values=0; 2656 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2657 /* Find connected components defined on the shared interface */ 2658 if(where_values) { 2659 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2660 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 2661 for(i=0;i<mat_graph->ncmps;i++) { 2662 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2663 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2664 } 2665 } 2666 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2667 for(i=0;i<where_values;i++) { 2668 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2669 if(mat_graph->where_ncmps[i]>1) { 2670 adapt_interface=1; 2671 break; 2672 } 2673 } 2674 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2675 if(where_values && adapt_interface_reduced) { 2676 2677 printf("Adapting Interface\n"); 2678 2679 PetscInt sum_requests=0,my_rank; 2680 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2681 PetscInt temp_buffer_size,ins_val,global_where_counter; 2682 PetscInt *cum_recv_counts; 2683 PetscInt *where_to_nodes_indices; 2684 PetscInt *petsc_buffer; 2685 PetscMPIInt *recv_buffer; 2686 PetscMPIInt *recv_buffer_where; 2687 PetscMPIInt *send_buffer; 2688 PetscMPIInt size_of_send; 2689 PetscInt *sizes_of_sends; 2690 MPI_Request *send_requests; 2691 MPI_Request *recv_requests; 2692 PetscInt *where_cc_adapt; 2693 PetscInt **temp_buffer; 2694 PetscInt *nodes_to_temp_buffer_indices; 2695 PetscInt *add_to_where; 2696 2697 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2698 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2699 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2700 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2701 /* first count how many neighbours per connected component I will receive from */ 2702 cum_recv_counts[0]=0; 2703 for(i=1;i<where_values+1;i++){ 2704 j=0; 2705 while(mat_graph->where[j] != i) j++; 2706 where_to_nodes_indices[i-1]=j; 2707 if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2708 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 2709 } 2710 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2711 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2712 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2713 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2714 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2715 for(i=0;i<cum_recv_counts[where_values];i++) { 2716 send_requests[i]=MPI_REQUEST_NULL; 2717 recv_requests[i]=MPI_REQUEST_NULL; 2718 } 2719 /* exchange with my neighbours the number of my connected components on the shared interface */ 2720 for(i=0;i<where_values;i++){ 2721 j=where_to_nodes_indices[i]; 2722 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2723 for(;k<mat_graph->count[j];k++){ 2724 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2725 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2726 sum_requests++; 2727 } 2728 } 2729 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2730 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2731 /* determine the connected component I need to adapt */ 2732 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2733 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2734 for(i=0;i<where_values;i++){ 2735 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2736 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2737 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 2738 where_cc_adapt[i]=PETSC_TRUE; 2739 break; 2740 } 2741 } 2742 } 2743 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2744 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2745 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2746 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2747 sum_requests=0; 2748 start_of_send=0; 2749 start_of_recv=cum_recv_counts[where_values]; 2750 for(i=0;i<where_values;i++) { 2751 if(where_cc_adapt[i]) { 2752 size_of_send=0; 2753 for(j=i;j<mat_graph->ncmps;j++) { 2754 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2755 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2756 size_of_send+=1; 2757 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2758 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2759 } 2760 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2761 } 2762 } 2763 j = where_to_nodes_indices[i]; 2764 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2765 for(;k<mat_graph->count[j];k++){ 2766 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2767 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2768 sum_requests++; 2769 } 2770 sizes_of_sends[i]=size_of_send; 2771 start_of_send+=size_of_send; 2772 } 2773 } 2774 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2775 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2776 buffer_size=0; 2777 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2778 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2779 /* now exchange the data */ 2780 start_of_recv=0; 2781 start_of_send=0; 2782 sum_requests=0; 2783 for(i=0;i<where_values;i++) { 2784 if(where_cc_adapt[i]) { 2785 size_of_send = sizes_of_sends[i]; 2786 j = where_to_nodes_indices[i]; 2787 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2788 for(;k<mat_graph->count[j];k++){ 2789 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2790 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2791 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2792 start_of_recv+=size_of_recv; 2793 sum_requests++; 2794 } 2795 start_of_send+=size_of_send; 2796 } 2797 } 2798 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2799 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2800 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2801 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2802 for(j=0;j<buffer_size;) { 2803 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2804 k=petsc_buffer[j]+1; 2805 j+=k; 2806 } 2807 sum_requests=cum_recv_counts[where_values]; 2808 start_of_recv=0; 2809 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2810 global_where_counter=0; 2811 for(i=0;i<where_values;i++){ 2812 if(where_cc_adapt[i]){ 2813 temp_buffer_size=0; 2814 /* find nodes on the shared interface we need to adapt */ 2815 for(j=0;j<mat_graph->nvtxs;j++){ 2816 if(mat_graph->where[j]==i+1) { 2817 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2818 temp_buffer_size++; 2819 } else { 2820 nodes_to_temp_buffer_indices[j]=-1; 2821 } 2822 } 2823 /* allocate some temporary space */ 2824 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2825 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2826 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2827 for(j=1;j<temp_buffer_size;j++){ 2828 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2829 } 2830 /* analyze contributions from neighbouring subdomains for i-th conn comp 2831 temp buffer structure: 2832 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2833 3 neighs procs with structured connected components: 2834 neigh 0: [0 1 4], [2 3]; (2 connected components) 2835 neigh 1: [0 1], [2 3 4]; (2 connected components) 2836 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2837 tempbuffer (row-oriented) should be filled as: 2838 [ 0, 0, 0; 2839 0, 0, 1; 2840 1, 1, 2; 2841 1, 1, 2; 2842 0, 1, 0; ]; 2843 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2844 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2845 */ 2846 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2847 ins_val=0; 2848 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2849 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2850 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2851 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2852 } 2853 buffer_size+=k; 2854 ins_val++; 2855 } 2856 start_of_recv+=size_of_recv; 2857 sum_requests++; 2858 } 2859 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2860 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2861 for(j=0;j<temp_buffer_size;j++){ 2862 if(!add_to_where[j]){ /* found a new cc */ 2863 global_where_counter++; 2864 add_to_where[j]=global_where_counter; 2865 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2866 same_set=PETSC_TRUE; 2867 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2868 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2869 same_set=PETSC_FALSE; 2870 break; 2871 } 2872 } 2873 if(same_set) add_to_where[k]=global_where_counter; 2874 } 2875 } 2876 } 2877 /* insert new data in where array */ 2878 temp_buffer_size=0; 2879 for(j=0;j<mat_graph->nvtxs;j++){ 2880 if(mat_graph->where[j]==i+1) { 2881 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2882 temp_buffer_size++; 2883 } 2884 } 2885 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 2886 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 2887 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 2888 } 2889 } 2890 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2891 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 2892 ierr = PetscFree(send_requests);CHKERRQ(ierr); 2893 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 2894 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 2895 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 2896 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 2897 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2898 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 2899 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 2900 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 2901 if(global_where_counter) { 2902 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 2903 global_where_counter=0; 2904 for(i=0;i<mat_graph->nvtxs;i++){ 2905 if(mat_graph->where[i] && !mat_graph->touched[i]) { 2906 global_where_counter++; 2907 for(j=i+1;j<mat_graph->nvtxs;j++){ 2908 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 2909 mat_graph->where[j]=global_where_counter; 2910 mat_graph->touched[j]=PETSC_TRUE; 2911 } 2912 } 2913 mat_graph->where[i]=global_where_counter; 2914 mat_graph->touched[i]=PETSC_TRUE; 2915 } 2916 } 2917 where_values=global_where_counter; 2918 } 2919 if(global_where_counter) { 2920 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2921 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2922 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2923 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2924 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2925 for(i=0;i<mat_graph->ncmps;i++) { 2926 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2927 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2928 } 2929 } 2930 } /* Finished adapting interface */ 2931 PetscInt nfc=0; 2932 PetscInt nec=0; 2933 PetscInt nvc=0; 2934 PetscBool twodim_flag=PETSC_FALSE; 2935 for (i=0; i<mat_graph->ncmps; i++) { 2936 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2937 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */ 2938 nfc++; 2939 } else { /* note that nec will be zero in 2d */ 2940 nec++; 2941 } 2942 } else { 2943 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2944 } 2945 } 2946 2947 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 2948 nec = nfc; 2949 nfc = 0; 2950 twodim_flag = PETSC_TRUE; 2951 } 2952 /* allocate IS arrays for faces, edges. Vertices need a single index set. 2953 Reusing space allocated in mat_graph->where for creating IS objects */ 2954 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 2955 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 2956 use_faces=PETSC_TRUE; 2957 } 2958 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 2959 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 2960 use_edges=PETSC_TRUE; 2961 } 2962 nfc=0; 2963 nec=0; 2964 for (i=0; i<mat_graph->ncmps; i++) { 2965 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2966 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 2967 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 2968 } 2969 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 2970 if(twodim_flag) { 2971 if(use_edges) { 2972 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2973 nec++; 2974 } 2975 } else { 2976 if(use_faces) { 2977 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 2978 nfc++; 2979 } 2980 } 2981 } else { 2982 if(use_edges) { 2983 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2984 nec++; 2985 } 2986 } 2987 } 2988 } 2989 pcbddc->n_ISForFaces=nfc; 2990 pcbddc->n_ISForEdges=nec; 2991 nvc=0; 2992 if( !pcbddc->constraints_flag ) { 2993 for (i=0; i<mat_graph->ncmps; i++) { 2994 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 2995 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 2996 mat_graph->where[nvc]=mat_graph->queue[j]; 2997 nvc++; 2998 } 2999 } 3000 } 3001 } 3002 /* sort vertex set (by local ordering) */ 3003 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 3004 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 3005 3006 if(pcbddc->dbg_flag) { 3007 PetscViewer viewer=pcbddc->dbg_viewer; 3008 3009 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3010 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3011 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3012 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 3013 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3014 for(i=0;i<mat_graph->nvtxs;i++) { 3015 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3016 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3017 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3018 } 3019 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3020 } 3021 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3022 for(i=0;i<mat_graph->ncmps;i++) { 3023 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3024 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3025 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3026 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); 3027 } 3028 } 3029 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr);*/ 3030 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3031 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3032 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3033 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3034 } 3035 3036 /* Restore CSR structure into sequantial matrix and free memory space no longer needed */ 3037 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 3038 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 3039 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 3040 /* Free graph structure */ 3041 if(mat_graph->nvtxs){ 3042 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 3043 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 3044 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3045 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3046 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3047 } 3048 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 3049 3050 PetscFunctionReturn(0); 3051 3052 } 3053 3054 /* -------------------------------------------------------------------------- */ 3055 3056 /* The following code has been adapted from function IsConnectedSubdomain contained 3057 in source file contig.c of METIS library (version 5.0.1) */ 3058 3059 #undef __FUNCT__ 3060 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3061 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3062 { 3063 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3064 PetscInt *xadj, *adjncy, *where, *queue; 3065 PetscInt *cptr; 3066 PetscBool *touched; 3067 3068 PetscFunctionBegin; 3069 3070 nvtxs = graph->nvtxs; 3071 xadj = graph->xadj; 3072 adjncy = graph->adjncy; 3073 where = graph->where; 3074 touched = graph->touched; 3075 queue = graph->queue; 3076 cptr = graph->cptr; 3077 3078 for (i=0; i<nvtxs; i++) 3079 touched[i] = PETSC_FALSE; 3080 3081 cum_queue=0; 3082 ncmps=0; 3083 3084 for(n=0; n<n_dist; n++) { 3085 pid = n+1; 3086 nleft = 0; 3087 for (i=0; i<nvtxs; i++) { 3088 if (where[i] == pid) 3089 nleft++; 3090 } 3091 for (i=0; i<nvtxs; i++) { 3092 if (where[i] == pid) 3093 break; 3094 } 3095 touched[i] = PETSC_TRUE; 3096 queue[cum_queue] = i; 3097 first = 0; last = 1; 3098 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3099 ncmps_pid = 0; 3100 while (first != nleft) { 3101 if (first == last) { /* Find another starting vertex */ 3102 cptr[++ncmps] = first+cum_queue; 3103 ncmps_pid++; 3104 for (i=0; i<nvtxs; i++) { 3105 if (where[i] == pid && !touched[i]) 3106 break; 3107 } 3108 queue[cum_queue+last] = i; 3109 last++; 3110 touched[i] = PETSC_TRUE; 3111 } 3112 i = queue[cum_queue+first]; 3113 first++; 3114 for (j=xadj[i]; j<xadj[i+1]; j++) { 3115 k = adjncy[j]; 3116 if (where[k] == pid && !touched[k]) { 3117 queue[cum_queue+last] = k; 3118 last++; 3119 touched[k] = PETSC_TRUE; 3120 } 3121 } 3122 } 3123 cptr[++ncmps] = first+cum_queue; 3124 ncmps_pid++; 3125 cum_queue=cptr[ncmps]; 3126 graph->where_ncmps[n] = ncmps_pid; 3127 } 3128 graph->ncmps = ncmps; 3129 3130 PetscFunctionReturn(0); 3131 } 3132 3133