1 /* TODOLIST 2 Exact solvers: Solve local saddle point directly for very hard problems 3 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 4 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 5 - mind the problem with coarsening_factor 6 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 7 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 8 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 9 - Add levels' slot to bddc data structure and associated Set/Get functions 10 code refactoring: 11 - pick up better names for static functions 12 check log_summary for leaking (actually: 1 Vector per level ) 13 change options structure: 14 - insert BDDC into MG framework? 15 provide other ops? Ask to developers 16 remove all unused printf 17 remove // commments and adhere to PETSc code requirements 18 man pages 19 */ 20 21 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 22 Implementation of BDDC preconditioner based on: 23 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 24 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 25 26 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 27 #include <petscblaslapack.h> 28 29 /* -------------------------------------------------------------------------- */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "PCSetFromOptions_BDDC" 32 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 33 { 34 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 35 PetscErrorCode ierr; 36 37 PetscFunctionBegin; 38 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 39 /* Verbose debugging of main data structures */ 40 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 41 /* Some customization for default primal space */ 42 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 43 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 46 /* Coarse solver context */ 47 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 48 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 49 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 50 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 51 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsTail();CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 /* -------------------------------------------------------------------------- */ 57 EXTERN_C_BEGIN 58 #undef __FUNCT__ 59 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 60 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 61 { 62 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 63 64 PetscFunctionBegin; 65 pcbddc->coarse_problem_type = CPT; 66 PetscFunctionReturn(0); 67 } 68 EXTERN_C_END 69 70 /* -------------------------------------------------------------------------- */ 71 #undef __FUNCT__ 72 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 73 /*@ 74 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 75 76 Not collective 77 78 Input Parameters: 79 + pc - the preconditioning context 80 - CoarseProblemType - pick a better name and explain what this is 81 82 Level: intermediate 83 84 Notes: 85 Not collective but all procs must call this. 86 87 .seealso: PCBDDC 88 @*/ 89 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 90 { 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 95 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 99 /* -------------------------------------------------------------------------- */ 100 EXTERN_C_BEGIN 101 #undef __FUNCT__ 102 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 103 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 104 { 105 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 110 ierr = ISDuplicate(DirichletBoundaries,&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 111 ierr = ISCopy(DirichletBoundaries,pcbddc->DirichletBoundaries);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 /* -------------------------------------------------------------------------- */ 117 #undef __FUNCT__ 118 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 119 /*@ 120 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part of 121 Dirichlet boundaries for the global problem. 122 123 Not collective 124 125 Input Parameters: 126 + pc - the preconditioning context 127 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 128 129 Level: intermediate 130 131 Notes: 132 The sequential IS is copied; the user must destroy the IS object passed in. 133 134 .seealso: PCBDDC 135 @*/ 136 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 137 { 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 142 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 /* -------------------------------------------------------------------------- */ 146 EXTERN_C_BEGIN 147 #undef __FUNCT__ 148 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 149 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 150 { 151 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 152 PetscErrorCode ierr; 153 154 PetscFunctionBegin; 155 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 156 ierr = ISDuplicate(NeumannBoundaries,&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 157 ierr = ISCopy(NeumannBoundaries,pcbddc->NeumannBoundaries);CHKERRQ(ierr); 158 PetscFunctionReturn(0); 159 } 160 EXTERN_C_END 161 162 /* -------------------------------------------------------------------------- */ 163 #undef __FUNCT__ 164 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 165 /*@ 166 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 167 Neumann boundaries for the global problem. 168 169 Not collective 170 171 Input Parameters: 172 + pc - the preconditioning context 173 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 174 175 Level: intermediate 176 177 Notes: 178 The sequential IS is copied; the user must destroy the IS object passed in. 179 180 .seealso: PCBDDC 181 @*/ 182 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 183 { 184 PetscErrorCode ierr; 185 186 PetscFunctionBegin; 187 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 188 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 189 PetscFunctionReturn(0); 190 } 191 /* -------------------------------------------------------------------------- */ 192 EXTERN_C_BEGIN 193 #undef __FUNCT__ 194 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 195 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 196 { 197 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 198 199 PetscFunctionBegin; 200 if(pcbddc->NeumannBoundaries) { 201 *NeumannBoundaries = pcbddc->NeumannBoundaries; 202 } else { 203 *NeumannBoundaries = PETSC_NULL; 204 //SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Error in %s: Neumann boundaries not set!.\n",__FUNCT__); 205 } 206 PetscFunctionReturn(0); 207 } 208 EXTERN_C_END 209 210 /* -------------------------------------------------------------------------- */ 211 #undef __FUNCT__ 212 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 213 /*@ 214 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 215 Neumann boundaries for the global problem. 216 217 Not collective 218 219 Input Parameters: 220 + pc - the preconditioning context 221 222 Output Parameters: 223 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 224 225 Level: intermediate 226 227 Notes: 228 If the user has not yet provided such information, PETSC_NULL is returned. 229 230 .seealso: PCBDDC 231 @*/ 232 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 233 { 234 PetscErrorCode ierr; 235 236 PetscFunctionBegin; 237 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 238 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 /* -------------------------------------------------------------------------- */ 243 EXTERN_C_BEGIN 244 #undef __FUNCT__ 245 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 246 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 247 { 248 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 249 PetscInt i; 250 PetscErrorCode ierr; 251 252 PetscFunctionBegin; 253 /* Destroy IS if already set */ 254 for(i=0;i<pcbddc->n_ISForDofs;i++) { 255 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 256 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 257 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 258 } 259 /* allocate space then copy ISs */ 260 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 261 for(i=0;i<n_is;i++) { 262 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 263 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 264 } 265 pcbddc->n_ISForDofs=n_is; 266 PetscFunctionReturn(0); 267 } 268 EXTERN_C_END 269 270 /* -------------------------------------------------------------------------- */ 271 #undef __FUNCT__ 272 #define __FUNCT__ "PCBDDCSetDofsSplitting" 273 /*@ 274 PCBDDCSetDofsSplitting - Set index set defining how dofs are splitted. 275 276 Not collective 277 278 Input Parameters: 279 + pc - the preconditioning context 280 - n - number of index sets defining dofs spltting 281 - IS[] - array of IS describing dofs splitting 282 283 Level: intermediate 284 285 Notes: 286 Sequential ISs are copied, the user must destroy the array of IS passed in. 287 288 .seealso: PCBDDC 289 @*/ 290 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 291 { 292 PetscErrorCode ierr; 293 294 PetscFunctionBegin; 295 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 296 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "PCSetUp_BDDC" 302 /* -------------------------------------------------------------------------- */ 303 /* 304 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 305 by setting data structures and options. 306 307 Input Parameter: 308 + pc - the preconditioner context 309 310 Application Interface Routine: PCSetUp() 311 312 Notes: 313 The interface routine PCSetUp() is not usually called directly by 314 the user, but instead is called by PCApply() if necessary. 315 */ 316 PetscErrorCode PCSetUp_BDDC(PC pc) 317 { 318 PetscErrorCode ierr; 319 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 320 PC_IS *pcis = (PC_IS*)(pc->data); 321 322 PetscFunctionBegin; 323 if (!pc->setupcalled) { 324 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 325 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 326 Also, we decide to directly build the (same) Dirichlet problem */ 327 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 328 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 329 /* Set up all the "iterative substructuring" common block */ 330 ierr = PCISSetUp(pc);CHKERRQ(ierr); 331 /* Get stdout for dbg */ 332 if(pcbddc->dbg_flag) { 333 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 334 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 335 } 336 /* TODO MOVE CODE FRAGMENT */ 337 PetscInt im_active=0; 338 if(pcis->n) im_active = 1; 339 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 340 /* Analyze local interface */ 341 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 342 /* Set up local constraint matrix */ 343 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 344 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 345 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 346 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 347 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 348 } 349 PetscFunctionReturn(0); 350 } 351 352 /* -------------------------------------------------------------------------- */ 353 /* 354 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 355 356 Input Parameters: 357 . pc - the preconditioner context 358 . r - input vector (global) 359 360 Output Parameter: 361 . z - output vector (global) 362 363 Application Interface Routine: PCApply() 364 */ 365 #undef __FUNCT__ 366 #define __FUNCT__ "PCApply_BDDC" 367 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 368 { 369 PC_IS *pcis = (PC_IS*)(pc->data); 370 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 371 PetscErrorCode ierr; 372 const PetscScalar one = 1.0; 373 const PetscScalar m_one = -1.0; 374 375 /* This code is similar to that provided in nn.c for PCNN 376 NN interface preconditioner changed to BDDC 377 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 378 379 PetscFunctionBegin; 380 /* First Dirichlet solve */ 381 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 382 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 383 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 384 /* 385 Assembling right hand side for BDDC operator 386 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 387 - the interface part of the global vector z 388 */ 389 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 390 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 391 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 392 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 393 ierr = VecCopy(r,z);CHKERRQ(ierr); 394 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 395 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 396 397 /* 398 Apply interface preconditioner 399 Results are stored in: 400 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 401 - the interface part of the global vector z 402 */ 403 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 404 405 /* Second Dirichlet solve and assembling of output */ 406 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 409 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 410 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 411 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 412 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 413 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 414 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 415 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 416 417 PetscFunctionReturn(0); 418 419 } 420 421 /* -------------------------------------------------------------------------- */ 422 /* 423 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 424 425 */ 426 #undef __FUNCT__ 427 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 428 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 429 { 430 PetscErrorCode ierr; 431 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 432 PC_IS* pcis = (PC_IS*) (pc->data); 433 const PetscScalar zero = 0.0; 434 435 PetscFunctionBegin; 436 /* Get Local boundary and apply partition of unity */ 437 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 438 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 439 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 440 441 /* Application of PHI^T */ 442 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 443 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 444 445 /* Scatter data of coarse_rhs */ 446 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 447 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 448 449 /* Local solution on R nodes */ 450 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 451 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 452 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 453 if(pcbddc->prec_type) { 454 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 456 } 457 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 458 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 459 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 460 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 461 if(pcbddc->prec_type) { 462 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 463 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 464 } 465 466 /* Coarse solution */ 467 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 469 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 472 /* Sum contributions from two levels */ 473 /* Apply partition of unity and sum boundary values */ 474 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 475 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 476 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 477 ierr = VecSet(z,zero);CHKERRQ(ierr); 478 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 479 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 480 481 PetscFunctionReturn(0); 482 } 483 484 485 /* -------------------------------------------------------------------------- */ 486 /* 487 PCBDDCSolveSaddlePoint 488 489 */ 490 #undef __FUNCT__ 491 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 492 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 493 { 494 PetscErrorCode ierr; 495 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 496 497 PetscFunctionBegin; 498 499 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 500 if(pcbddc->n_constraints) { 501 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 502 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 503 } 504 505 PetscFunctionReturn(0); 506 } 507 /* -------------------------------------------------------------------------- */ 508 /* 509 PCBDDCScatterCoarseDataBegin 510 511 */ 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 514 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 515 { 516 PetscErrorCode ierr; 517 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 518 519 PetscFunctionBegin; 520 521 switch(pcbddc->coarse_communications_type){ 522 case SCATTERS_BDDC: 523 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 524 break; 525 case GATHERS_BDDC: 526 break; 527 } 528 PetscFunctionReturn(0); 529 530 } 531 /* -------------------------------------------------------------------------- */ 532 /* 533 PCBDDCScatterCoarseDataEnd 534 535 */ 536 #undef __FUNCT__ 537 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 538 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 539 { 540 PetscErrorCode ierr; 541 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 542 PetscScalar* array_to; 543 PetscScalar* array_from; 544 MPI_Comm comm=((PetscObject)pc)->comm; 545 PetscInt i; 546 547 PetscFunctionBegin; 548 549 switch(pcbddc->coarse_communications_type){ 550 case SCATTERS_BDDC: 551 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 552 break; 553 case GATHERS_BDDC: 554 if(vec_from) VecGetArray(vec_from,&array_from); 555 if(vec_to) VecGetArray(vec_to,&array_to); 556 switch(pcbddc->coarse_problem_type){ 557 case SEQUENTIAL_BDDC: 558 if(smode == SCATTER_FORWARD) { 559 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 560 if(vec_to) { 561 for(i=0;i<pcbddc->replicated_primal_size;i++) 562 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 563 } 564 } else { 565 if(vec_from) 566 for(i=0;i<pcbddc->replicated_primal_size;i++) 567 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 568 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 569 } 570 break; 571 case REPLICATED_BDDC: 572 if(smode == SCATTER_FORWARD) { 573 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 574 for(i=0;i<pcbddc->replicated_primal_size;i++) 575 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 576 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 577 for(i=0;i<pcbddc->local_primal_size;i++) 578 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 579 } 580 break; 581 case MULTILEVEL_BDDC: 582 break; 583 case PARALLEL_BDDC: 584 break; 585 } 586 if(vec_from) VecRestoreArray(vec_from,&array_from); 587 if(vec_to) VecRestoreArray(vec_to,&array_to); 588 break; 589 } 590 PetscFunctionReturn(0); 591 592 } 593 594 /* -------------------------------------------------------------------------- */ 595 /* 596 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 597 that was created with PCCreate_BDDC(). 598 599 Input Parameter: 600 . pc - the preconditioner context 601 602 Application Interface Routine: PCDestroy() 603 */ 604 #undef __FUNCT__ 605 #define __FUNCT__ "PCDestroy_BDDC" 606 PetscErrorCode PCDestroy_BDDC(PC pc) 607 { 608 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 609 PetscErrorCode ierr; 610 611 PetscFunctionBegin; 612 /* free data created by PCIS */ 613 ierr = PCISDestroy(pc);CHKERRQ(ierr); 614 /* free BDDC data */ 615 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 616 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 617 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 618 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 619 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 620 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 621 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 622 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 623 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 624 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 625 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 626 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 627 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 628 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 629 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 630 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 631 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 632 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 633 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 634 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 635 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 636 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 637 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 638 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 639 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 640 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 641 PetscInt i; 642 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 643 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 644 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 645 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 646 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 647 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 648 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 649 /* Free the private data structure that was hanging off the PC */ 650 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 /* -------------------------------------------------------------------------- */ 655 /*MC 656 PCBDDC - Balancing Domain Decomposition by Constraints. 657 658 Options Database Keys: 659 . -pcbddc ??? - 660 661 Level: intermediate 662 663 Notes: The matrix used with this preconditioner must be of type MATIS 664 665 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 666 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 667 on the subdomains). 668 669 Options for the coarse grid preconditioner can be set with - 670 Options for the Dirichlet subproblem can be set with - 671 Options for the Neumann subproblem can be set with - 672 673 Contributed by Stefano Zampini 674 675 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 676 M*/ 677 EXTERN_C_BEGIN 678 #undef __FUNCT__ 679 #define __FUNCT__ "PCCreate_BDDC" 680 PetscErrorCode PCCreate_BDDC(PC pc) 681 { 682 PetscErrorCode ierr; 683 PC_BDDC *pcbddc; 684 685 PetscFunctionBegin; 686 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 687 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 688 pc->data = (void*)pcbddc; 689 /* create PCIS data structure */ 690 ierr = PCISCreate(pc);CHKERRQ(ierr); 691 /* BDDC specific */ 692 pcbddc->coarse_vec = 0; 693 pcbddc->coarse_rhs = 0; 694 pcbddc->coarse_ksp = 0; 695 pcbddc->coarse_phi_B = 0; 696 pcbddc->coarse_phi_D = 0; 697 pcbddc->vec1_P = 0; 698 pcbddc->vec1_R = 0; 699 pcbddc->vec2_R = 0; 700 pcbddc->local_auxmat1 = 0; 701 pcbddc->local_auxmat2 = 0; 702 pcbddc->R_to_B = 0; 703 pcbddc->R_to_D = 0; 704 pcbddc->ksp_D = 0; 705 pcbddc->ksp_R = 0; 706 pcbddc->local_primal_indices = 0; 707 pcbddc->prec_type = PETSC_FALSE; 708 pcbddc->NeumannBoundaries = 0; 709 pcbddc->ISForDofs = 0; 710 pcbddc->ISForVertices = 0; 711 pcbddc->n_ISForFaces = 0; 712 pcbddc->n_ISForEdges = 0; 713 pcbddc->ConstraintMatrix = 0; 714 pcbddc->use_nnsp_true = PETSC_FALSE; 715 pcbddc->local_primal_sizes = 0; 716 pcbddc->local_primal_displacements = 0; 717 pcbddc->replicated_local_primal_indices = 0; 718 pcbddc->replicated_local_primal_values = 0; 719 pcbddc->coarse_loc_to_glob = 0; 720 pcbddc->dbg_flag = PETSC_FALSE; 721 pcbddc->coarsening_ratio = 8; 722 /* function pointers */ 723 pc->ops->apply = PCApply_BDDC; 724 pc->ops->applytranspose = 0; 725 pc->ops->setup = PCSetUp_BDDC; 726 pc->ops->destroy = PCDestroy_BDDC; 727 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 728 pc->ops->view = 0; 729 pc->ops->applyrichardson = 0; 730 pc->ops->applysymmetricleft = 0; 731 pc->ops->applysymmetricright = 0; 732 /* composing function */ 733 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 734 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 736 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 738 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 740 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 742 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 EXTERN_C_END 746 747 /* -------------------------------------------------------------------------- */ 748 /* 749 Create C matrix [I 0; 0 const] 750 */ 751 /* for testing only */ 752 #if defined(PETSC_MISSING_LAPACK_GESVD) 753 #define BDDC_USE_POD 754 #define PETSC_MISSING_LAPACK_GESVD 1 755 #endif 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 759 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 760 { 761 PetscErrorCode ierr; 762 PC_IS* pcis = (PC_IS*)(pc->data); 763 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 764 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 765 PetscInt *nnz,*vertices,*is_indices; 766 PetscScalar *temp_quadrature_constraint; 767 PetscInt *temp_indices,*temp_indices_to_constraint; 768 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 769 PetscInt n_constraints,n_vertices,size_of_constraint; 770 PetscReal quad_value; 771 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 772 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 773 IS *used_IS; 774 const MatType impMatType=MATSEQAIJ; 775 PetscBLASInt Bs,Bt,lwork,lierr; 776 PetscReal tol=1.0e-8; 777 Vec *localnearnullsp; 778 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 779 PetscReal *rwork,*singular_vals; 780 /* some conditional variables */ 781 #if defined(PETSC_MISSING_LAPACK_GESVD) 782 PetscScalar dot_result; 783 PetscScalar one=1.0,zero=0.0; 784 PetscInt ii; 785 #if defined(PETSC_USE_COMPLEX) 786 PetscScalar val1,val2; 787 #else 788 PetscBLASInt Bone=1; 789 #endif 790 #else 791 PetscBLASInt dummy_int; 792 PetscScalar dummy_scalar; 793 #endif 794 795 796 PetscFunctionBegin; 797 /* check if near null space is attached to global mat */ 798 if(pc->pmat->nearnullsp) { 799 nnsp_has_cnst = pc->pmat->nearnullsp->has_cnst; 800 nnsp_size = pc->pmat->nearnullsp->n; 801 } else { /* if near null space is not provided it uses constants */ 802 nnsp_has_cnst = PETSC_TRUE; 803 use_nnsp_true = PETSC_TRUE; 804 } 805 if(nnsp_has_cnst) { 806 nnsp_addone = 1; 807 } 808 /* 809 Evaluate maximum storage size needed by the procedure 810 - temp_indices will contain start index of each constraint stored as follows 811 - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 812 - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 813 */ 814 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 815 total_counts *= (nnsp_addone+nnsp_size); 816 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 817 total_counts = 0; 818 max_size_of_constraint = 0; 819 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 820 if(i<pcbddc->n_ISForEdges){ 821 used_IS = &pcbddc->ISForEdges[i]; 822 } else { 823 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 824 } 825 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 826 total_counts += j; 827 if(j>max_size_of_constraint) max_size_of_constraint=j; 828 } 829 total_counts *= (nnsp_addone+nnsp_size); 830 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 831 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 832 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 833 rwork = 0; 834 work = 0; 835 singular_vals = 0; 836 temp_basis = 0; 837 correlation_mat = 0; 838 if(!pcbddc->use_nnsp_true) { 839 PetscScalar temp_work; 840 #if defined(PETSC_MISSING_LAPACK_GESVD) 841 /* POD */ 842 PetscInt max_n; 843 max_n = nnsp_addone+nnsp_size; 844 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 845 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 846 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 847 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 848 #if defined(PETSC_USE_COMPLEX) 849 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 850 #endif 851 /* now we evaluate the optimal workspace using query with lwork=-1 */ 852 Bt = PetscBLASIntCast(max_n); 853 lwork=-1; 854 #if !defined(PETSC_USE_COMPLEX) 855 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 856 #else 857 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 858 #endif 859 #else /* on missing GESVD */ 860 /* SVD */ 861 PetscInt max_n,min_n; 862 max_n = max_size_of_constraint; 863 min_n = nnsp_addone+nnsp_size; 864 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 865 min_n = max_size_of_constraint; 866 max_n = nnsp_addone+nnsp_size; 867 } 868 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 869 #if defined(PETSC_USE_COMPLEX) 870 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 871 #endif 872 /* now we evaluate the optimal workspace using query with lwork=-1 */ 873 lwork=-1; 874 Bs = PetscBLASIntCast(max_n); 875 Bt = PetscBLASIntCast(min_n); 876 dummy_int = Bs; 877 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 878 #if !defined(PETSC_USE_COMPLEX) 879 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 880 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 881 #else 882 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 883 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 884 #endif 885 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 886 ierr = PetscFPTrapPop();CHKERRQ(ierr); 887 #endif 888 /* Allocate optimal workspace */ 889 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 890 total_counts = (PetscInt)lwork; 891 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 892 } 893 /* get local part of global near null space vectors */ 894 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 895 for(k=0;k<nnsp_size;k++) { 896 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 897 ierr = VecScatterBegin(matis->ctx,pc->pmat->nearnullsp->vecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 898 ierr = VecScatterEnd (matis->ctx,pc->pmat->nearnullsp->vecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 899 } 900 /* Now we can loop on constraining sets */ 901 total_counts=0; 902 temp_indices[0]=0; 903 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 904 if(i<pcbddc->n_ISForEdges){ 905 used_IS = &pcbddc->ISForEdges[i]; 906 } else { 907 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 908 } 909 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 910 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 911 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 912 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 913 if(nnsp_has_cnst) { 914 temp_constraints++; 915 quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint); 916 for(j=0;j<size_of_constraint;j++) { 917 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 918 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 919 } 920 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 921 total_counts++; 922 } 923 for(k=0;k<nnsp_size;k++) { 924 temp_constraints++; 925 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 926 for(j=0;j<size_of_constraint;j++) { 927 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 928 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 929 } 930 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 931 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 932 total_counts++; 933 } 934 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 935 /* perform SVD on the constraint if use true has not be requested by the user */ 936 if(!use_nnsp_true) { 937 Bs = PetscBLASIntCast(size_of_constraint); 938 Bt = PetscBLASIntCast(temp_constraints); 939 #if defined(PETSC_MISSING_LAPACK_GESVD) 940 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 941 /* Store upper triangular part of correlation matrix */ 942 for(j=0;j<temp_constraints;j++) { 943 for(k=0;k<j+1;k++) { 944 #if defined(PETSC_USE_COMPLEX) 945 /* hand made complex dot product */ 946 dot_result = 0.0; 947 for (ii=0; ii<size_of_constraint; ii++) { 948 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 949 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 950 dot_result += val1*PetscConj(val2); 951 } 952 #else 953 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 954 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 955 #endif 956 correlation_mat[j*temp_constraints+k]=dot_result; 957 } 958 } 959 #if !defined(PETSC_USE_COMPLEX) 960 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 961 #else 962 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 963 #endif 964 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 965 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 966 j=0; 967 while( j < Bt && singular_vals[j] < tol) j++; 968 total_counts=total_counts-j; 969 if(j<temp_constraints) { 970 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 971 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 972 /* copy POD basis into used quadrature memory */ 973 for(k=0;k<Bt-j;k++) { 974 for(ii=0;ii<size_of_constraint;ii++) { 975 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 976 } 977 } 978 } 979 #else /* on missing GESVD */ 980 PetscInt min_n = temp_constraints; 981 if(min_n > size_of_constraint) min_n = size_of_constraint; 982 dummy_int = Bs; 983 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 984 #if !defined(PETSC_USE_COMPLEX) 985 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 986 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 987 #else 988 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 989 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 990 #endif 991 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 992 ierr = PetscFPTrapPop();CHKERRQ(ierr); 993 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 994 j=0; 995 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 996 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 997 #endif 998 } 999 } 1000 n_constraints=total_counts; 1001 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 1002 local_primal_size = n_vertices+n_constraints; 1003 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1004 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1005 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1006 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1007 for(i=0;i<n_vertices;i++) { nnz[i]= 1; } 1008 for(i=0;i<n_constraints;i++) { nnz[i+n_vertices]=temp_indices[i+1]-temp_indices[i]; } 1009 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1010 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1011 for(i=0;i<n_vertices;i++) { ierr = MatSetValue(pcbddc->ConstraintMatrix,i,vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); } 1012 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1013 for(i=0;i<n_constraints;i++) { 1014 j=i+n_vertices; 1015 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1016 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&j,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1017 } 1018 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1019 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1020 /* set quantities in pcbddc data structure */ 1021 pcbddc->n_vertices = n_vertices; 1022 pcbddc->n_constraints = n_constraints; 1023 pcbddc->local_primal_size = n_vertices+n_constraints; 1024 /* free workspace no longer needed */ 1025 ierr = PetscFree(rwork);CHKERRQ(ierr); 1026 ierr = PetscFree(work);CHKERRQ(ierr); 1027 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1028 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1029 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1030 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1031 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1032 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1033 ierr = PetscFree(nnz);CHKERRQ(ierr); 1034 for(k=0;k<nnsp_size;k++) { ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); } 1035 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 #ifdef BDDC_USE_POD 1039 #undef PETSC_MISSING_LAPACK_GESVD 1040 #endif 1041 /* -------------------------------------------------------------------------- */ 1042 /* 1043 PCBDDCCoarseSetUp - 1044 */ 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "PCBDDCCoarseSetUp" 1047 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1048 { 1049 PetscErrorCode ierr; 1050 1051 PC_IS* pcis = (PC_IS*)(pc->data); 1052 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1053 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1054 IS is_R_local; 1055 IS is_V_local; 1056 IS is_C_local; 1057 IS is_aux1; 1058 IS is_aux2; 1059 const VecType impVecType; 1060 const MatType impMatType; 1061 PetscInt n_R=0; 1062 PetscInt n_D=0; 1063 PetscInt n_B=0; 1064 PetscScalar zero=0.0; 1065 PetscScalar one=1.0; 1066 PetscScalar m_one=-1.0; 1067 PetscScalar* array; 1068 PetscScalar *coarse_submat_vals; 1069 PetscInt *idx_R_local; 1070 PetscInt *idx_V_B; 1071 PetscScalar *coarsefunctions_errors; 1072 PetscScalar *constraints_errors; 1073 /* auxiliary indices */ 1074 PetscInt s,i,j,k; 1075 /* for verbose output of bddc */ 1076 PetscViewer viewer=pcbddc->dbg_viewer; 1077 PetscBool dbg_flag=pcbddc->dbg_flag; 1078 /* for counting coarse dofs */ 1079 PetscScalar coarsesum; 1080 PetscInt n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints; 1081 PetscInt size_of_constraint; 1082 PetscInt *row_cmat_indices; 1083 PetscScalar *row_cmat_values; 1084 const PetscInt *vertices; 1085 1086 PetscFunctionBegin; 1087 /* Set Non-overlapping dimensions */ 1088 n_B = pcis->n_B; n_D = pcis->n - n_B; 1089 ierr = ISGetIndices(pcbddc->ISForVertices,&vertices);CHKERRQ(ierr); 1090 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */ 1091 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1092 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1093 for(i=0;i<n_vertices;i++) { array[ vertices[i] ] = one; } 1094 1095 for(i=0;i<n_constraints;i++) { 1096 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1097 for (j=0; j<size_of_constraint; j++) { 1098 k = row_cmat_indices[j]; 1099 if( array[k] == zero ) { 1100 array[k] = one; 1101 break; 1102 } 1103 } 1104 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1105 } 1106 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1107 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1108 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1109 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1110 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1113 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 1114 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1115 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1116 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1117 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1118 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1119 pcbddc->coarse_size = (PetscInt) coarsesum; 1120 1121 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1122 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1123 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1124 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1125 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1126 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1127 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1128 if(dbg_flag) { 1129 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1130 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1131 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1132 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1133 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1134 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1135 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1136 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 1137 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1138 } 1139 /* Allocate needed vectors */ 1140 /* Set Mat type for local matrices needed by BDDC precondtioner */ 1141 impMatType = MATSEQDENSE; 1142 impVecType = VECSEQ; 1143 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1144 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 1145 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1146 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1147 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1148 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1149 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1150 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1151 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1152 1153 /* Creating some index sets needed */ 1154 /* For submatrices */ 1155 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 1156 if(n_vertices) { 1157 ierr = ISDuplicate(pcbddc->ISForVertices,&is_V_local);CHKERRQ(ierr); 1158 ierr = ISCopy(pcbddc->ISForVertices,is_V_local);CHKERRQ(ierr); 1159 } 1160 if(n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); } 1161 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1162 { 1163 PetscInt *aux_array1; 1164 PetscInt *aux_array2; 1165 PetscScalar value; 1166 1167 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1168 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1169 1170 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1171 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1173 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1174 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1178 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 1179 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1180 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1181 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1182 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 1183 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1184 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1185 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1186 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1187 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1188 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1189 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1190 1191 if(pcbddc->prec_type || dbg_flag ) { 1192 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1193 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1194 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 1195 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1196 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1197 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1198 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1199 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1200 } 1201 1202 /* Check scatters */ 1203 if(dbg_flag) { 1204 1205 Vec vec_aux; 1206 1207 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1208 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 1209 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1210 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1211 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1212 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1213 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1214 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1215 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1216 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1217 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1218 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1219 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1220 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1221 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1222 1223 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1224 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1225 ierr = VecDuplicate(pcis->vec1_B,&vec_aux);CHKERRQ(ierr); 1226 ierr = VecCopy(pcis->vec1_B,vec_aux);CHKERRQ(ierr); 1227 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1231 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B);CHKERRQ(ierr); 1232 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1233 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1234 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1235 1236 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1237 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 1238 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1239 1240 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1241 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1242 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1243 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1244 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1247 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1248 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1249 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1250 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1251 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1252 1253 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1254 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1255 ierr = VecDuplicate(pcis->vec1_D,&vec_aux);CHKERRQ(ierr); 1256 ierr = VecCopy(pcis->vec1_D,vec_aux);CHKERRQ(ierr); 1257 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1259 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D);CHKERRQ(ierr); 1262 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1263 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1264 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1265 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1266 1267 } 1268 } 1269 1270 /* vertices in boundary numbering */ 1271 if(n_vertices) { 1272 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1273 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1274 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1275 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1276 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1279 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1280 for (i=0; i<n_vertices; i++) { 1281 s=0; 1282 while (array[s] != i ) {s++;} 1283 idx_V_B[i]=s; 1284 } 1285 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1286 } 1287 1288 1289 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1290 { 1291 Mat A_RR; 1292 PC pc_temp; 1293 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1294 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1295 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1296 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1297 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1298 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1299 /* default */ 1300 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1301 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1302 /* Allow user's customization */ 1303 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1304 /* Set Up KSP for Dirichlet problem of BDDC */ 1305 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1306 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1307 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1308 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1309 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1310 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1311 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1312 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1313 /* default */ 1314 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1315 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1316 /* Allow user's customization */ 1317 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1318 /* Set Up KSP for Neumann problem of BDDC */ 1319 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1320 /* check Dirichlet and Neumann solvers */ 1321 if(pcbddc->dbg_flag) { 1322 Vec temp_vec; 1323 PetscScalar value; 1324 1325 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1326 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1327 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1328 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1329 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1330 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1331 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1332 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1334 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1335 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1336 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1337 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1338 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1339 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1340 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1341 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1342 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1343 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1344 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1345 } 1346 /* free Neumann problem's matrix */ 1347 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1348 } 1349 1350 /* Assemble all remaining stuff needed to apply BDDC */ 1351 { 1352 Mat A_RV,A_VR,A_VV; 1353 Mat M1,M2; 1354 Mat C_CR; 1355 Mat AUXMAT; 1356 Vec vec1_C; 1357 Vec vec2_C; 1358 Vec vec1_V; 1359 Vec vec2_V; 1360 PetscInt *nnz; 1361 PetscInt *auxindices; 1362 PetscInt index; 1363 PetscScalar* array2; 1364 MatFactorInfo matinfo; 1365 1366 /* Allocating some extra storage just to be safe */ 1367 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1368 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1369 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1370 1371 /* some work vectors on vertices and/or constraints */ 1372 if(n_vertices) { 1373 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1374 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1375 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1376 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1377 } 1378 if(pcbddc->n_constraints) { 1379 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1380 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1381 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1382 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1383 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1384 } 1385 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1386 if(n_constraints) { 1387 /* some work vectors */ 1388 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1389 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1390 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1391 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1392 1393 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1394 for(i=0;i<n_constraints;i++) { 1395 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1396 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1397 /* Get row of constraint matrix in R numbering */ 1398 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1399 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1400 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1401 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1402 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1403 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1404 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1405 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1406 /* Solve for row of constraint matrix in R numbering */ 1407 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1408 /* Set values */ 1409 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1410 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1411 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1412 } 1413 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1415 1416 /* Create Constraint matrix on R nodes: C_{CR} */ 1417 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1418 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1419 1420 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1421 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1422 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1423 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1424 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1425 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1426 1427 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1428 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1429 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1430 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1431 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1432 for(i=0;i<n_constraints;i++) { 1433 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1434 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1435 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1436 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1437 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1438 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1439 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1440 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1441 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1442 } 1443 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1444 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1445 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1446 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1447 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1448 1449 } 1450 1451 /* Get submatrices from subdomain matrix */ 1452 if(n_vertices){ 1453 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1454 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1455 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1456 /* Assemble M2 = A_RR^{-1}A_RV */ 1457 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1458 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1459 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1460 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1461 for(i=0;i<n_vertices;i++) { 1462 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1463 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1464 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1465 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1466 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1467 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1468 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1469 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1470 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1471 } 1472 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1473 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1474 } 1475 1476 /* Matrix of coarse basis functions (local) */ 1477 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1478 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1479 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1480 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1481 if(pcbddc->prec_type || dbg_flag ) { 1482 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1483 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1484 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1485 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1486 } 1487 1488 if(dbg_flag) { 1489 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1490 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1491 } 1492 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1493 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1494 1495 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1496 for(i=0;i<n_vertices;i++){ 1497 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1498 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1499 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1500 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1501 /* solution of saddle point problem */ 1502 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1503 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1504 if(n_constraints) { 1505 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1506 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1507 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1508 } 1509 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1510 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1511 1512 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1513 /* coarse basis functions */ 1514 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1515 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1516 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1517 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1518 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1519 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1520 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1521 if( pcbddc->prec_type || dbg_flag ) { 1522 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1525 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1526 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1527 } 1528 /* subdomain contribution to coarse matrix */ 1529 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1530 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } //WARNING -> column major ordering 1531 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1532 if(n_constraints) { 1533 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1534 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } //WARNING -> column major ordering 1535 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1536 } 1537 1538 if( dbg_flag ) { 1539 /* assemble subdomain vector on nodes */ 1540 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1541 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1542 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1543 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1544 array[ vertices[i] ] = one; 1545 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1546 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1547 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1548 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1549 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1550 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1551 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1552 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1553 if(n_constraints) { 1554 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1555 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1556 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1557 } 1558 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1559 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1560 /* check saddle point solution */ 1561 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1562 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1563 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 1564 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1565 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1566 array[i]=array[i]+m_one; /* shift by the identity matrix */ 1567 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1568 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 1569 } 1570 } 1571 1572 for(i=0;i<n_constraints;i++){ 1573 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1574 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1575 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1576 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1577 /* solution of saddle point problem */ 1578 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1579 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1580 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1581 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1582 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1583 /* coarse basis functions */ 1584 index=i+n_vertices; 1585 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1586 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1589 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1590 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1591 if( pcbddc->prec_type || dbg_flag ) { 1592 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1594 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1595 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1596 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1597 } 1598 /* subdomain contribution to coarse matrix */ 1599 if(n_vertices) { 1600 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1601 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1602 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1603 } 1604 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1605 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} //WARNING -> column major ordering 1606 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1607 1608 if( dbg_flag ) { 1609 /* assemble subdomain vector on nodes */ 1610 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1611 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1612 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1613 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1614 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1615 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1616 /* assemble subdomain vector of lagrange multipliers */ 1617 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1618 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1619 if( n_vertices) { 1620 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1621 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 1622 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1623 } 1624 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1625 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 1626 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1627 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1628 /* check saddle point solution */ 1629 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1630 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1631 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1632 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1633 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1634 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1635 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1636 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1637 } 1638 } 1639 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1640 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 if( pcbddc->prec_type || dbg_flag ) { 1642 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 } 1645 /* Checking coarse_sub_mat and coarse basis functios */ 1646 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1647 if(dbg_flag) { 1648 1649 Mat coarse_sub_mat; 1650 Mat TM1,TM2,TM3,TM4; 1651 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1652 const MatType checkmattype=MATSEQAIJ; 1653 PetscScalar value; 1654 1655 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1656 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1657 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1658 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1659 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1660 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1662 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1663 1664 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1665 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1666 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1667 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1668 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1669 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1670 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1672 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1673 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1674 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1675 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1676 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1677 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1678 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1679 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1680 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1681 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1682 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1683 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1684 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1685 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1686 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1687 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1688 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1689 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1690 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1691 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1692 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1693 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1694 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1695 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1696 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1697 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1698 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1699 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1700 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1701 } 1702 1703 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1704 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1705 /* free memory */ 1706 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1707 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1708 ierr = PetscFree(nnz);CHKERRQ(ierr); 1709 if(n_vertices) { 1710 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1711 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1712 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1713 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1714 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1715 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1716 } 1717 if(pcbddc->n_constraints) { 1718 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1719 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1720 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1721 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1722 } 1723 } 1724 /* free memory */ 1725 if(n_vertices) { 1726 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1727 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1728 } 1729 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1730 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1731 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1732 1733 PetscFunctionReturn(0); 1734 } 1735 1736 /* -------------------------------------------------------------------------- */ 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1740 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1741 { 1742 1743 1744 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1745 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1746 PC_IS *pcis = (PC_IS*)pc->data; 1747 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1748 MPI_Comm coarse_comm; 1749 1750 /* common to all choiches */ 1751 PetscScalar *temp_coarse_mat_vals; 1752 PetscScalar *ins_coarse_mat_vals; 1753 PetscInt *ins_local_primal_indices; 1754 PetscMPIInt *localsizes2,*localdispl2; 1755 PetscMPIInt size_prec_comm; 1756 PetscMPIInt rank_prec_comm; 1757 PetscMPIInt active_rank=MPI_PROC_NULL; 1758 PetscMPIInt master_proc=0; 1759 PetscInt ins_local_primal_size; 1760 /* specific to MULTILEVEL_BDDC */ 1761 PetscMPIInt *ranks_recv; 1762 PetscMPIInt count_recv=0; 1763 PetscMPIInt rank_coarse_proc_send_to; 1764 PetscMPIInt coarse_color = MPI_UNDEFINED; 1765 ISLocalToGlobalMapping coarse_ISLG; 1766 /* some other variables */ 1767 PetscErrorCode ierr; 1768 const MatType coarse_mat_type; 1769 const PCType coarse_pc_type; 1770 const KSPType coarse_ksp_type; 1771 PC pc_temp; 1772 PetscInt i,j,k,bs; 1773 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 1774 /* verbose output viewer */ 1775 PetscViewer viewer=pcbddc->dbg_viewer; 1776 PetscBool dbg_flag=pcbddc->dbg_flag; 1777 1778 PetscFunctionBegin; 1779 1780 ins_local_primal_indices = 0; 1781 ins_coarse_mat_vals = 0; 1782 localsizes2 = 0; 1783 localdispl2 = 0; 1784 temp_coarse_mat_vals = 0; 1785 coarse_ISLG = 0; 1786 1787 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1788 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1789 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1790 1791 /* Assign global numbering to coarse dofs */ 1792 { 1793 PetscScalar one=1.,zero=0.; 1794 PetscScalar *array; 1795 PetscMPIInt *auxlocal_primal; 1796 PetscMPIInt *auxglobal_primal; 1797 PetscMPIInt *all_auxglobal_primal; 1798 PetscMPIInt *all_auxglobal_primal_dummy; 1799 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1800 PetscInt *vertices,*row_cmat_indices; 1801 PetscInt size_of_constraint; 1802 1803 /* Construct needed data structures for message passing */ 1804 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1805 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1806 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1807 /* Gather local_primal_size information for all processes */ 1808 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1809 pcbddc->replicated_primal_size = 0; 1810 for (i=0; i<size_prec_comm; i++) { 1811 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1812 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1813 } 1814 if(rank_prec_comm == 0) { 1815 /* allocate some auxiliary space */ 1816 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1817 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1818 } 1819 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1820 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1821 1822 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 1823 This code fragment assumes that the number of local constraints per connected component 1824 is not greater than the number of nodes defined for the connected component 1825 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1826 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 1827 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1828 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1829 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1830 for(i=0;i<pcbddc->n_vertices;i++) { /* note that pcbddc->n_vertices can be different from size of ISForVertices */ 1831 array[ vertices[i] ] = one; 1832 auxlocal_primal[i] = vertices[i]; 1833 } 1834 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1835 for(i=0;i<pcbddc->n_constraints;i++) { 1836 ierr = MatGetRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1837 for (j=0; j<size_of_constraint; j++) { 1838 k = row_cmat_indices[j]; 1839 if( array[k] == zero ) { 1840 array[k] = one; 1841 auxlocal_primal[i+pcbddc->n_vertices] = k; 1842 break; 1843 } 1844 } 1845 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1846 } 1847 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1848 1849 /* Now assign them a global numbering */ 1850 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1851 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1852 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1853 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1854 1855 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1856 /* It implements a function similar to PetscSortRemoveDupsInt */ 1857 if(rank_prec_comm==0) { 1858 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1859 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1860 k=1; 1861 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1862 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1863 if(j != all_auxglobal_primal[i] ) { 1864 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1865 k++; 1866 j=all_auxglobal_primal[i]; 1867 } 1868 } 1869 } else { 1870 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1871 } 1872 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1873 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1874 1875 /* Now get global coarse numbering of local primal nodes */ 1876 for(i=0;i<pcbddc->local_primal_size;i++) { 1877 k=0; 1878 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1879 pcbddc->local_primal_indices[i]=k; 1880 } 1881 if(dbg_flag) { 1882 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1883 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 1884 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1885 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1886 for(i=0;i<pcbddc->local_primal_size;i++) { 1887 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 1888 } 1889 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1890 } 1891 /* free allocated memory */ 1892 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1893 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1894 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1895 if(rank_prec_comm == 0) { 1896 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1897 } 1898 } 1899 1900 /* adapt coarse problem type */ 1901 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1902 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1903 1904 switch(pcbddc->coarse_problem_type){ 1905 1906 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1907 { 1908 /* we need additional variables */ 1909 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1910 MetisInt *metis_coarse_subdivision; 1911 MetisInt options[METIS_NOPTIONS]; 1912 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1913 PetscMPIInt procs_jumps_coarse_comm; 1914 PetscMPIInt *coarse_subdivision; 1915 PetscMPIInt *total_count_recv; 1916 PetscMPIInt *total_ranks_recv; 1917 PetscMPIInt *displacements_recv; 1918 PetscMPIInt *my_faces_connectivity; 1919 PetscMPIInt *petsc_faces_adjncy; 1920 MetisInt *faces_adjncy; 1921 MetisInt *faces_xadj; 1922 PetscMPIInt *number_of_faces; 1923 PetscMPIInt *faces_displacements; 1924 PetscInt *array_int; 1925 PetscMPIInt my_faces=0; 1926 PetscMPIInt total_faces=0; 1927 PetscInt ranks_stretching_ratio; 1928 1929 /* define some quantities */ 1930 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1931 coarse_mat_type = MATIS; 1932 coarse_pc_type = PCBDDC; 1933 coarse_ksp_type = KSPCHEBYCHEV; 1934 1935 /* details of coarse decomposition */ 1936 n_subdomains = pcbddc->active_procs; 1937 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1938 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 1939 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 1940 1941 printf("Coarse algorithm details: \n"); 1942 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1)); 1943 1944 /* build CSR graph of subdomains' connectivity through faces */ 1945 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 1946 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 1947 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 1948 for(j=0;j<pcis->n_shared[i];j++){ 1949 array_int[ pcis->shared[i][j] ]+=1; 1950 } 1951 } 1952 for(i=1;i<pcis->n_neigh;i++){ 1953 for(j=0;j<pcis->n_shared[i];j++){ 1954 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1955 my_faces++; 1956 break; 1957 } 1958 } 1959 } 1960 //printf("I found %d faces.\n",my_faces); 1961 1962 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 1963 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 1964 my_faces=0; 1965 for(i=1;i<pcis->n_neigh;i++){ 1966 for(j=0;j<pcis->n_shared[i];j++){ 1967 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1968 my_faces_connectivity[my_faces]=pcis->neigh[i]; 1969 my_faces++; 1970 break; 1971 } 1972 } 1973 } 1974 if(rank_prec_comm == master_proc) { 1975 //printf("I found %d total faces.\n",total_faces); 1976 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 1977 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 1978 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 1979 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 1980 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 1981 } 1982 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1983 if(rank_prec_comm == master_proc) { 1984 faces_xadj[0]=0; 1985 faces_displacements[0]=0; 1986 j=0; 1987 for(i=1;i<size_prec_comm+1;i++) { 1988 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 1989 if(number_of_faces[i-1]) { 1990 j++; 1991 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 1992 } 1993 } 1994 printf("The J I count is %d and should be %d\n",j,n_subdomains); 1995 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 1996 } 1997 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1998 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 1999 ierr = PetscFree(array_int);CHKERRQ(ierr); 2000 if(rank_prec_comm == master_proc) { 2001 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2002 printf("This is the face connectivity (actual ranks)\n"); 2003 for(i=0;i<n_subdomains;i++){ 2004 printf("proc %d is connected with \n",i); 2005 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2006 printf("%d ",faces_adjncy[j]); 2007 printf("\n"); 2008 } 2009 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2010 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2011 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2012 } 2013 2014 if( rank_prec_comm == master_proc ) { 2015 2016 PetscInt heuristic_for_metis=3; 2017 2018 ncon=1; 2019 faces_nvtxs=n_subdomains; 2020 /* partition graoh induced by face connectivity */ 2021 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2022 ierr = METIS_SetDefaultOptions(options); 2023 /* we need a contiguous partition of the coarse mesh */ 2024 options[METIS_OPTION_CONTIG]=1; 2025 options[METIS_OPTION_DBGLVL]=1; 2026 options[METIS_OPTION_NITER]=30; 2027 //options[METIS_OPTION_NCUTS]=1; 2028 printf("METIS PART GRAPH\n"); 2029 if(n_subdomains>n_parts*heuristic_for_metis) { 2030 printf("Using Kway\n"); 2031 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2032 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2033 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2034 } else { 2035 printf("Using Recursive\n"); 2036 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2037 } 2038 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2039 printf("Partition done!\n"); 2040 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2041 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2042 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2043 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2044 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2045 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2046 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2047 } 2048 2049 /* Create new communicator for coarse problem splitting the old one */ 2050 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2051 coarse_color=0; //for communicator splitting 2052 active_rank=rank_prec_comm; //for insertion of matrix values 2053 } 2054 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2055 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 2056 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2057 2058 if( coarse_color == 0 ) { 2059 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2060 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2061 printf("Details of coarse comm\n"); 2062 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2063 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 2064 } else { 2065 rank_coarse_comm = MPI_PROC_NULL; 2066 } 2067 2068 /* master proc take care of arranging and distributing coarse informations */ 2069 if(rank_coarse_comm == master_proc) { 2070 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2071 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2072 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2073 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2074 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2075 /* some initializations */ 2076 displacements_recv[0]=0; 2077 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 2078 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2079 for(j=0;j<size_coarse_comm;j++) 2080 for(i=0;i<size_prec_comm;i++) 2081 if(coarse_subdivision[i]==j) 2082 total_count_recv[j]++; 2083 /* displacements needed for scatterv of total_ranks_recv */ 2084 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2085 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2086 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2087 for(j=0;j<size_coarse_comm;j++) { 2088 for(i=0;i<size_prec_comm;i++) { 2089 if(coarse_subdivision[i]==j) { 2090 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2091 total_count_recv[j]+=1; 2092 } 2093 } 2094 } 2095 for(j=0;j<size_coarse_comm;j++) { 2096 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2097 for(i=0;i<total_count_recv[j];i++) { 2098 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2099 } 2100 printf("\n"); 2101 } 2102 2103 /* identify new decomposition in terms of ranks in the old communicator */ 2104 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2105 printf("coarse_subdivision in old end new ranks\n"); 2106 for(i=0;i<size_prec_comm;i++) 2107 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2108 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2109 } else { 2110 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2111 } 2112 printf("\n"); 2113 } 2114 2115 /* Scatter new decomposition for send details */ 2116 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2117 /* Scatter receiving details to members of coarse decomposition */ 2118 if( coarse_color == 0) { 2119 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2120 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2121 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2122 } 2123 2124 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2125 //if(coarse_color == 0) { 2126 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2127 // for(i=0;i<count_recv;i++) 2128 // printf("%d ",ranks_recv[i]); 2129 // printf("\n"); 2130 //} 2131 2132 if(rank_prec_comm == master_proc) { 2133 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2134 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2135 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2136 free(coarse_subdivision); 2137 free(total_count_recv); 2138 free(total_ranks_recv); 2139 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2140 } 2141 break; 2142 } 2143 2144 case(REPLICATED_BDDC): 2145 2146 pcbddc->coarse_communications_type = GATHERS_BDDC; 2147 coarse_mat_type = MATSEQAIJ; 2148 coarse_pc_type = PCLU; 2149 coarse_ksp_type = KSPPREONLY; 2150 coarse_comm = PETSC_COMM_SELF; 2151 active_rank = rank_prec_comm; 2152 break; 2153 2154 case(PARALLEL_BDDC): 2155 2156 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2157 coarse_mat_type = MATMPIAIJ; 2158 coarse_pc_type = PCREDUNDANT; 2159 coarse_ksp_type = KSPPREONLY; 2160 coarse_comm = prec_comm; 2161 active_rank = rank_prec_comm; 2162 break; 2163 2164 case(SEQUENTIAL_BDDC): 2165 pcbddc->coarse_communications_type = GATHERS_BDDC; 2166 coarse_mat_type = MATSEQAIJ; 2167 coarse_pc_type = PCLU; 2168 coarse_ksp_type = KSPPREONLY; 2169 coarse_comm = PETSC_COMM_SELF; 2170 active_rank = master_proc; 2171 break; 2172 } 2173 2174 switch(pcbddc->coarse_communications_type){ 2175 2176 case(SCATTERS_BDDC): 2177 { 2178 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2179 2180 PetscMPIInt send_size; 2181 PetscInt *aux_ins_indices; 2182 PetscInt ii,jj; 2183 MPI_Request *requests; 2184 2185 /* allocate auxiliary space */ 2186 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2187 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2188 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2189 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2190 /* allocate stuffs for message massing */ 2191 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2192 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2193 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2194 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2195 /* fill up quantities */ 2196 j=0; 2197 for(i=0;i<count_recv;i++){ 2198 ii = ranks_recv[i]; 2199 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2200 localdispl2[i]=j; 2201 j+=localsizes2[i]; 2202 jj = pcbddc->local_primal_displacements[ii]; 2203 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 2204 } 2205 //printf("aux_ins_indices 1\n"); 2206 //for(i=0;i<pcbddc->coarse_size;i++) 2207 // printf("%d ",aux_ins_indices[i]); 2208 //printf("\n"); 2209 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2210 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2211 /* evaluate how many values I will insert in coarse mat */ 2212 ins_local_primal_size=0; 2213 for(i=0;i<pcbddc->coarse_size;i++) 2214 if(aux_ins_indices[i]) 2215 ins_local_primal_size++; 2216 /* evaluate indices I will insert in coarse mat */ 2217 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2218 j=0; 2219 for(i=0;i<pcbddc->coarse_size;i++) 2220 if(aux_ins_indices[i]) 2221 ins_local_primal_indices[j++]=i; 2222 /* use aux_ins_indices to realize a global to local mapping */ 2223 j=0; 2224 for(i=0;i<pcbddc->coarse_size;i++){ 2225 if(aux_ins_indices[i]==0){ 2226 aux_ins_indices[i]=-1; 2227 } else { 2228 aux_ins_indices[i]=j; 2229 j++; 2230 } 2231 } 2232 2233 //printf("New details localsizes2 localdispl2\n"); 2234 //for(i=0;i<count_recv;i++) 2235 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2236 //printf("\n"); 2237 //printf("aux_ins_indices 2\n"); 2238 //for(i=0;i<pcbddc->coarse_size;i++) 2239 // printf("%d ",aux_ins_indices[i]); 2240 //printf("\n"); 2241 //printf("ins_local_primal_indices\n"); 2242 //for(i=0;i<ins_local_primal_size;i++) 2243 // printf("%d ",ins_local_primal_indices[i]); 2244 //printf("\n"); 2245 //printf("coarse_submat_vals\n"); 2246 //for(i=0;i<pcbddc->local_primal_size;i++) 2247 // for(j=0;j<pcbddc->local_primal_size;j++) 2248 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2249 //printf("\n"); 2250 2251 /* processes partecipating in coarse problem receive matrix data from their friends */ 2252 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2253 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2254 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2255 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2256 } 2257 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2258 2259 //if(coarse_color == 0) { 2260 // printf("temp_coarse_mat_vals\n"); 2261 // for(k=0;k<count_recv;k++){ 2262 // printf("---- %d ----\n",ranks_recv[k]); 2263 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2264 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2265 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2266 // printf("\n"); 2267 // } 2268 //} 2269 /* calculate data to insert in coarse mat */ 2270 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2271 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2272 2273 PetscMPIInt rr,kk,lps,lpd; 2274 PetscInt row_ind,col_ind; 2275 for(k=0;k<count_recv;k++){ 2276 rr = ranks_recv[k]; 2277 kk = localdispl2[k]; 2278 lps = pcbddc->local_primal_sizes[rr]; 2279 lpd = pcbddc->local_primal_displacements[rr]; 2280 //printf("Inserting the following indices (received from %d)\n",rr); 2281 for(j=0;j<lps;j++){ 2282 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2283 for(i=0;i<lps;i++){ 2284 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2285 //printf("%d %d\n",row_ind,col_ind); 2286 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2287 } 2288 } 2289 } 2290 ierr = PetscFree(requests);CHKERRQ(ierr); 2291 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2292 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2293 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2294 2295 /* create local to global mapping needed by coarse MATIS */ 2296 { 2297 IS coarse_IS; 2298 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2299 coarse_comm = prec_comm; 2300 active_rank=rank_prec_comm; 2301 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2302 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2303 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2304 } 2305 } 2306 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2307 /* arrays for values insertion */ 2308 ins_local_primal_size = pcbddc->local_primal_size; 2309 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2310 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2311 for(j=0;j<ins_local_primal_size;j++){ 2312 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2313 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2314 } 2315 } 2316 break; 2317 2318 } 2319 2320 case(GATHERS_BDDC): 2321 { 2322 2323 PetscMPIInt mysize,mysize2; 2324 2325 if(rank_prec_comm==active_rank) { 2326 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2327 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2328 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2329 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2330 /* arrays for values insertion */ 2331 ins_local_primal_size = pcbddc->coarse_size; 2332 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2333 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2334 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2335 localdispl2[0]=0; 2336 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2337 j=0; 2338 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2339 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2340 } 2341 2342 mysize=pcbddc->local_primal_size; 2343 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2344 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2345 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2346 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2347 } else { 2348 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2349 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2350 } 2351 2352 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2353 if(rank_prec_comm==active_rank) { 2354 PetscInt offset,offset2,row_ind,col_ind; 2355 for(j=0;j<ins_local_primal_size;j++){ 2356 ins_local_primal_indices[j]=j; 2357 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2358 } 2359 for(k=0;k<size_prec_comm;k++){ 2360 offset=pcbddc->local_primal_displacements[k]; 2361 offset2=localdispl2[k]; 2362 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2363 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2364 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2365 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2366 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2367 } 2368 } 2369 } 2370 } 2371 break; 2372 }//switch on coarse problem and communications associated with finished 2373 } 2374 2375 /* Now create and fill up coarse matrix */ 2376 if( rank_prec_comm == active_rank ) { 2377 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2378 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2379 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2380 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2381 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2382 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2383 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2384 } else { 2385 Mat matis_coarse_local_mat; 2386 ierr = MatCreateIS(coarse_comm,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2387 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2388 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2389 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2390 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2391 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2392 } 2393 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2394 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2395 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2396 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2397 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2398 Mat matis_coarse_local_mat; 2399 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2400 ierr = MatSetBlockSize(matis_coarse_local_mat,bs);CHKERRQ(ierr); 2401 } 2402 2403 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2404 /* Preconditioner for coarse problem */ 2405 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2406 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2407 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2408 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2409 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2410 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2411 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2412 /* Allow user's customization */ 2413 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2414 /* Set Up PC for coarse problem BDDC */ 2415 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2416 if(dbg_flag) { 2417 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2419 } 2420 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2421 } 2422 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2423 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2424 if(dbg_flag) { 2425 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2426 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2427 } 2428 } 2429 } 2430 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2431 IS local_IS,global_IS; 2432 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2433 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2434 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2435 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2436 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2437 } 2438 2439 2440 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2441 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2442 PetscScalar m_one=-1.0; 2443 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2444 const KSPType check_ksp_type=KSPGMRES; 2445 2446 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2447 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2448 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2449 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2450 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2451 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2452 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2453 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2454 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2455 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2456 if(dbg_flag) { 2457 kappa_2=lambda_max/lambda_min; 2458 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2459 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2460 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2461 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2462 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2463 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2464 } 2465 /* restore coarse ksp to default values */ 2466 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2467 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2468 ierr = KSPChebychevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2469 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2470 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2471 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2472 } 2473 2474 /* free data structures no longer needed */ 2475 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2476 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2477 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2478 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2479 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2480 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2481 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2487 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2488 { 2489 2490 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2491 PC_IS *pcis = (PC_IS*)pc->data; 2492 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2493 PCBDDCGraph mat_graph; 2494 Mat mat_adj; 2495 PetscInt **neighbours_set; 2496 PetscInt *queue_in_global_numbering; 2497 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2498 PetscInt total_counts,nodes_touched=0,where_values=1,vertex_size; 2499 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2500 PetscBool same_set,flg_row; 2501 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE; 2502 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2503 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2504 const PetscInt *neumann_nodes; 2505 const PetscInt *dirichlet_nodes; 2506 2507 PetscFunctionBegin; 2508 /* allocate and initialize needed graph structure */ 2509 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2510 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2511 /* ierr = MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj);CHKERRQ(ierr); */ 2512 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2513 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2514 i = mat_graph->nvtxs; 2515 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2516 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2517 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2518 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2519 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2520 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2521 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2522 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2523 2524 /* Setting dofs splitting in mat_graph->which_dof */ 2525 if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */ 2526 PetscInt *is_indices; 2527 PetscInt is_size; 2528 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2529 ierr = ISGetSize(pcbddc->ISForDofs[i],&is_size);CHKERRQ(ierr); 2530 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2531 for(j=0;j<is_size;j++) { 2532 mat_graph->which_dof[is_indices[j]]=i; 2533 } 2534 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2535 } 2536 /* use mat block size as vertex size */ 2537 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2538 } else { /* otherwise it assumes a constant block size */ 2539 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2540 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2541 for(s=0;s<bs;s++) { 2542 mat_graph->which_dof[i*bs+s]=s; 2543 } 2544 } 2545 vertex_size=1; 2546 } 2547 /* count number of neigh per node */ 2548 total_counts=0; 2549 for(i=1;i<pcis->n_neigh;i++){ 2550 s=pcis->n_shared[i]; 2551 total_counts+=s; 2552 for(j=0;j<s;j++){ 2553 mat_graph->count[pcis->shared[i][j]] += 1; 2554 } 2555 } 2556 /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */ 2557 if(pcbddc->NeumannBoundaries) { 2558 ierr = ISGetSize(pcbddc->NeumannBoundaries,&neumann_bsize);CHKERRQ(ierr); 2559 ierr = ISGetIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2560 for(i=0;i<neumann_bsize;i++){ 2561 iindex = neumann_nodes[i]; 2562 if(mat_graph->count[iindex] > 1){ 2563 mat_graph->count[iindex]+=1; 2564 total_counts++; 2565 } 2566 } 2567 } 2568 /* allocate space for storing the set of neighbours of each node */ 2569 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2570 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); } 2571 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2572 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2573 for(i=1;i<pcis->n_neigh;i++){ 2574 s=pcis->n_shared[i]; 2575 for(j=0;j<s;j++) { 2576 k=pcis->shared[i][j]; 2577 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2578 mat_graph->count[k]+=1; 2579 } 2580 } 2581 /* set -1 fake neighbour to mimic Neumann boundary */ 2582 if(pcbddc->NeumannBoundaries) { 2583 for(i=0;i<neumann_bsize;i++){ 2584 iindex = neumann_nodes[i]; 2585 if(mat_graph->count[iindex] > 1){ 2586 neighbours_set[iindex][mat_graph->count[iindex]] = -1; 2587 mat_graph->count[iindex]+=1; 2588 } 2589 } 2590 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2591 } 2592 /* sort set of sharing subdomains (needed for comparison below) */ 2593 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2594 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 2595 if(pcbddc->DirichletBoundaries) { 2596 ierr = ISGetSize(pcbddc->DirichletBoundaries,&dirichlet_bsize);CHKERRQ(ierr); 2597 ierr = ISGetIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2598 for(i=0;i<dirichlet_bsize;i++){ 2599 mat_graph->count[dirichlet_nodes[i]]=0; 2600 } 2601 ierr = ISRestoreIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2602 } 2603 for(i=0;i<mat_graph->nvtxs;i++){ 2604 if(!mat_graph->count[i]){ /* interior nodes */ 2605 mat_graph->touched[i]=PETSC_TRUE; 2606 mat_graph->where[i]=0; 2607 nodes_touched++; 2608 } 2609 } 2610 mat_graph->ncmps = 0; 2611 while(nodes_touched<mat_graph->nvtxs) { 2612 /* find first untouched node in local ordering */ 2613 i=0; 2614 while(mat_graph->touched[i]) i++; 2615 mat_graph->touched[i]=PETSC_TRUE; 2616 mat_graph->where[i]=where_values; 2617 nodes_touched++; 2618 /* now find all other nodes having the same set of sharing subdomains */ 2619 for(j=i+1;j<mat_graph->nvtxs;j++){ 2620 /* check for same number of sharing subdomains and dof number */ 2621 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2622 /* check for same set of sharing subdomains */ 2623 same_set=PETSC_TRUE; 2624 for(k=0;k<mat_graph->count[j];k++){ 2625 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2626 same_set=PETSC_FALSE; 2627 } 2628 } 2629 /* I found a friend of mine */ 2630 if(same_set) { 2631 mat_graph->where[j]=where_values; 2632 mat_graph->touched[j]=PETSC_TRUE; 2633 nodes_touched++; 2634 } 2635 } 2636 } 2637 where_values++; 2638 } 2639 where_values--; if(where_values<0) where_values=0; 2640 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2641 /* Find connected components defined on the shared interface */ 2642 if(where_values) { 2643 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2644 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 2645 for(i=0;i<mat_graph->ncmps;i++) { 2646 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2647 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2648 } 2649 } 2650 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2651 for(i=0;i<where_values;i++) { 2652 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2653 if(mat_graph->where_ncmps[i]>1) { 2654 adapt_interface=1; 2655 break; 2656 } 2657 } 2658 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2659 if(where_values && adapt_interface_reduced) { 2660 2661 printf("Adapting Interface\n"); 2662 2663 PetscInt sum_requests=0,my_rank; 2664 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2665 PetscInt temp_buffer_size,ins_val,global_where_counter; 2666 PetscInt *cum_recv_counts; 2667 PetscInt *where_to_nodes_indices; 2668 PetscInt *petsc_buffer; 2669 PetscMPIInt *recv_buffer; 2670 PetscMPIInt *recv_buffer_where; 2671 PetscMPIInt *send_buffer; 2672 PetscMPIInt size_of_send; 2673 PetscInt *sizes_of_sends; 2674 MPI_Request *send_requests; 2675 MPI_Request *recv_requests; 2676 PetscInt *where_cc_adapt; 2677 PetscInt **temp_buffer; 2678 PetscInt *nodes_to_temp_buffer_indices; 2679 PetscInt *add_to_where; 2680 2681 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2682 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2683 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2684 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2685 /* first count how many neighbours per connected component I will receive from */ 2686 cum_recv_counts[0]=0; 2687 for(i=1;i<where_values+1;i++){ 2688 j=0; 2689 while(mat_graph->where[j] != i) j++; 2690 where_to_nodes_indices[i-1]=j; 2691 if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2692 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 2693 } 2694 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2695 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2696 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2697 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2698 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2699 for(i=0;i<cum_recv_counts[where_values];i++) { 2700 send_requests[i]=MPI_REQUEST_NULL; 2701 recv_requests[i]=MPI_REQUEST_NULL; 2702 } 2703 /* exchange with my neighbours the number of my connected components on the shared interface */ 2704 for(i=0;i<where_values;i++){ 2705 j=where_to_nodes_indices[i]; 2706 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2707 for(;k<mat_graph->count[j];k++){ 2708 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2709 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2710 sum_requests++; 2711 } 2712 } 2713 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2714 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2715 /* determine the connected component I need to adapt */ 2716 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2717 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2718 for(i=0;i<where_values;i++){ 2719 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2720 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2721 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 2722 where_cc_adapt[i]=PETSC_TRUE; 2723 break; 2724 } 2725 } 2726 } 2727 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2728 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2729 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2730 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2731 sum_requests=0; 2732 start_of_send=0; 2733 start_of_recv=cum_recv_counts[where_values]; 2734 for(i=0;i<where_values;i++) { 2735 if(where_cc_adapt[i]) { 2736 size_of_send=0; 2737 for(j=i;j<mat_graph->ncmps;j++) { 2738 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2739 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2740 size_of_send+=1; 2741 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2742 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2743 } 2744 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2745 } 2746 } 2747 j = where_to_nodes_indices[i]; 2748 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2749 for(;k<mat_graph->count[j];k++){ 2750 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2751 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2752 sum_requests++; 2753 } 2754 sizes_of_sends[i]=size_of_send; 2755 start_of_send+=size_of_send; 2756 } 2757 } 2758 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2759 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2760 buffer_size=0; 2761 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2762 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2763 /* now exchange the data */ 2764 start_of_recv=0; 2765 start_of_send=0; 2766 sum_requests=0; 2767 for(i=0;i<where_values;i++) { 2768 if(where_cc_adapt[i]) { 2769 size_of_send = sizes_of_sends[i]; 2770 j = where_to_nodes_indices[i]; 2771 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2772 for(;k<mat_graph->count[j];k++){ 2773 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2774 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2775 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2776 start_of_recv+=size_of_recv; 2777 sum_requests++; 2778 } 2779 start_of_send+=size_of_send; 2780 } 2781 } 2782 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2783 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2784 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2785 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2786 for(j=0;j<buffer_size;) { 2787 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2788 k=petsc_buffer[j]+1; 2789 j+=k; 2790 } 2791 sum_requests=cum_recv_counts[where_values]; 2792 start_of_recv=0; 2793 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2794 global_where_counter=0; 2795 for(i=0;i<where_values;i++){ 2796 if(where_cc_adapt[i]){ 2797 temp_buffer_size=0; 2798 /* find nodes on the shared interface we need to adapt */ 2799 for(j=0;j<mat_graph->nvtxs;j++){ 2800 if(mat_graph->where[j]==i+1) { 2801 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2802 temp_buffer_size++; 2803 } else { 2804 nodes_to_temp_buffer_indices[j]=-1; 2805 } 2806 } 2807 /* allocate some temporary space */ 2808 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2809 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2810 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2811 for(j=1;j<temp_buffer_size;j++){ 2812 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2813 } 2814 /* analyze contributions from neighbouring subdomains for i-th conn comp 2815 temp buffer structure: 2816 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2817 3 neighs procs with structured connected components: 2818 neigh 0: [0 1 4], [2 3]; (2 connected components) 2819 neigh 1: [0 1], [2 3 4]; (2 connected components) 2820 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2821 tempbuffer (row-oriented) should be filled as: 2822 [ 0, 0, 0; 2823 0, 0, 1; 2824 1, 1, 2; 2825 1, 1, 2; 2826 0, 1, 0; ]; 2827 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2828 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2829 */ 2830 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2831 ins_val=0; 2832 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2833 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2834 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2835 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2836 } 2837 buffer_size+=k; 2838 ins_val++; 2839 } 2840 start_of_recv+=size_of_recv; 2841 sum_requests++; 2842 } 2843 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2844 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2845 for(j=0;j<temp_buffer_size;j++){ 2846 if(!add_to_where[j]){ /* found a new cc */ 2847 global_where_counter++; 2848 add_to_where[j]=global_where_counter; 2849 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2850 same_set=PETSC_TRUE; 2851 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2852 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2853 same_set=PETSC_FALSE; 2854 break; 2855 } 2856 } 2857 if(same_set) add_to_where[k]=global_where_counter; 2858 } 2859 } 2860 } 2861 /* insert new data in where array */ 2862 temp_buffer_size=0; 2863 for(j=0;j<mat_graph->nvtxs;j++){ 2864 if(mat_graph->where[j]==i+1) { 2865 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2866 temp_buffer_size++; 2867 } 2868 } 2869 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 2870 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 2871 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 2872 } 2873 } 2874 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2875 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 2876 ierr = PetscFree(send_requests);CHKERRQ(ierr); 2877 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 2878 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 2879 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 2880 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 2881 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2882 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 2883 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 2884 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 2885 if(global_where_counter) { 2886 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 2887 global_where_counter=0; 2888 for(i=0;i<mat_graph->nvtxs;i++){ 2889 if(mat_graph->where[i] && !mat_graph->touched[i]) { 2890 global_where_counter++; 2891 for(j=i+1;j<mat_graph->nvtxs;j++){ 2892 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 2893 mat_graph->where[j]=global_where_counter; 2894 mat_graph->touched[j]=PETSC_TRUE; 2895 } 2896 } 2897 mat_graph->where[i]=global_where_counter; 2898 mat_graph->touched[i]=PETSC_TRUE; 2899 } 2900 } 2901 where_values=global_where_counter; 2902 } 2903 if(global_where_counter) { 2904 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2905 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2906 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2907 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2908 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2909 for(i=0;i<mat_graph->ncmps;i++) { 2910 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2911 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2912 } 2913 } 2914 } /* Finished adapting interface */ 2915 PetscInt nfc=0; 2916 PetscInt nec=0; 2917 PetscInt nvc=0; 2918 PetscBool twodim_flag=PETSC_FALSE; 2919 for (i=0; i<mat_graph->ncmps; i++) { 2920 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2921 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */ 2922 nfc++; 2923 } else { /* note that nec will be zero in 2d */ 2924 nec++; 2925 } 2926 } else { 2927 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2928 } 2929 } 2930 2931 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 2932 nec = nfc; 2933 nfc = 0; 2934 twodim_flag = PETSC_TRUE; 2935 } 2936 /* allocate IS arrays for faces, edges. Vertices need a single index set. 2937 Reusing space allocated in mat_graph->where for creating IS objects */ 2938 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 2939 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 2940 use_faces=PETSC_TRUE; 2941 } 2942 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 2943 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 2944 use_edges=PETSC_TRUE; 2945 } 2946 nfc=0; 2947 nec=0; 2948 for (i=0; i<mat_graph->ncmps; i++) { 2949 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2950 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 2951 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 2952 } 2953 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 2954 if(twodim_flag) { 2955 if(use_edges) { 2956 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2957 nec++; 2958 } 2959 } else { 2960 if(use_faces) { 2961 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 2962 nfc++; 2963 } 2964 } 2965 } else { 2966 if(use_edges) { 2967 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2968 nec++; 2969 } 2970 } 2971 } 2972 } 2973 pcbddc->n_ISForFaces=nfc; 2974 pcbddc->n_ISForEdges=nec; 2975 nvc=0; 2976 if( !pcbddc->constraints_flag ) { 2977 for (i=0; i<mat_graph->ncmps; i++) { 2978 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 2979 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 2980 mat_graph->where[nvc]=mat_graph->queue[j]; 2981 nvc++; 2982 } 2983 } 2984 } 2985 } 2986 /* sort vertex set (by local ordering) */ 2987 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 2988 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 2989 2990 if(pcbddc->dbg_flag) { 2991 PetscViewer viewer=pcbddc->dbg_viewer; 2992 2993 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2994 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2995 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2996 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 2997 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2998 for(i=0;i<mat_graph->nvtxs;i++) { 2999 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3000 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3001 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3002 } 3003 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3004 } 3005 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3006 for(i=0;i<mat_graph->ncmps;i++) { 3007 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3008 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3009 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3010 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); 3011 } 3012 } 3013 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr);*/ 3014 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3015 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3016 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3017 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3018 } 3019 3020 /* Restore CSR structure into sequantial matrix and free memory space no longer needed */ 3021 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 3022 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 3023 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 3024 /* Free graph structure */ 3025 if(mat_graph->nvtxs){ 3026 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 3027 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 3028 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3029 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3030 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3031 } 3032 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 3033 3034 PetscFunctionReturn(0); 3035 3036 } 3037 3038 /* -------------------------------------------------------------------------- */ 3039 3040 /* The following code has been adapted from function IsConnectedSubdomain contained 3041 in source file contig.c of METIS library (version 5.0.1) */ 3042 3043 #undef __FUNCT__ 3044 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3045 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3046 { 3047 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3048 PetscInt *xadj, *adjncy, *where, *queue; 3049 PetscInt *cptr; 3050 PetscBool *touched; 3051 3052 PetscFunctionBegin; 3053 3054 nvtxs = graph->nvtxs; 3055 xadj = graph->xadj; 3056 adjncy = graph->adjncy; 3057 where = graph->where; 3058 touched = graph->touched; 3059 queue = graph->queue; 3060 cptr = graph->cptr; 3061 3062 for (i=0; i<nvtxs; i++) 3063 touched[i] = PETSC_FALSE; 3064 3065 cum_queue=0; 3066 ncmps=0; 3067 3068 for(n=0; n<n_dist; n++) { 3069 pid = n+1; 3070 nleft = 0; 3071 for (i=0; i<nvtxs; i++) { 3072 if (where[i] == pid) 3073 nleft++; 3074 } 3075 for (i=0; i<nvtxs; i++) { 3076 if (where[i] == pid) 3077 break; 3078 } 3079 touched[i] = PETSC_TRUE; 3080 queue[cum_queue] = i; 3081 first = 0; last = 1; 3082 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3083 ncmps_pid = 0; 3084 while (first != nleft) { 3085 if (first == last) { /* Find another starting vertex */ 3086 cptr[++ncmps] = first+cum_queue; 3087 ncmps_pid++; 3088 for (i=0; i<nvtxs; i++) { 3089 if (where[i] == pid && !touched[i]) 3090 break; 3091 } 3092 queue[cum_queue+last] = i; 3093 last++; 3094 touched[i] = PETSC_TRUE; 3095 } 3096 i = queue[cum_queue+first]; 3097 first++; 3098 for (j=xadj[i]; j<xadj[i+1]; j++) { 3099 k = adjncy[j]; 3100 if (where[k] == pid && !touched[k]) { 3101 queue[cum_queue+last] = k; 3102 last++; 3103 touched[k] = PETSC_TRUE; 3104 } 3105 } 3106 } 3107 cptr[++ncmps] = first+cum_queue; 3108 ncmps_pid++; 3109 cum_queue=cptr[ncmps]; 3110 graph->where_ncmps[n] = ncmps_pid; 3111 } 3112 graph->ncmps = ncmps; 3113 3114 PetscFunctionReturn(0); 3115 } 3116 3117