1 /* TODOLIST 2 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 3 - mind the problem with coarsening_factor 4 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 5 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 6 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 7 - Add level slot to bddc data structure and associated Set/Get functions 8 code refactoring: 9 - removing indices to constraints, quadrature constrainsts and similar from PCBDDCManageLocalBoundaries 10 - analyze MatSetNearNullSpace as suggested by Jed 11 - build constraint matrix after SVD on local connected components 12 - pick up better names for static functions 13 check log_summary for leaking (actually: 1 Vector per level plus only 1 IS) 14 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 15 change options structure: 16 - insert BDDC into MG framework? 17 provide other ops? Ask to developers 18 remove all unused printf 19 remove // commments and adhere to PETSc code requirements 20 man pages 21 */ 22 23 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 24 Implementation of BDDC preconditioner based on: 25 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 26 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 27 28 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 29 30 /* -------------------------------------------------------------------------- */ 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCSetFromOptions_BDDC" 33 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 34 { 35 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 36 PetscErrorCode ierr; 37 38 PetscFunctionBegin; 39 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 40 /* Verbose debugging of main data structures */ 41 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 42 /* Some customization for default primal space */ 43 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 47 /* Coarse solver context */ 48 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 49 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 50 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 51 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsTail();CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 71 /* -------------------------------------------------------------------------- */ 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 74 /*@ 75 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 76 77 Not collective 78 79 Input Parameters: 80 + pc - the preconditioning context 81 - CoarseProblemType - pick a better name and explain what this is 82 83 Level: intermediate 84 85 Notes: 86 Not collective but all procs must call this. 87 88 .seealso: PCBDDC 89 @*/ 90 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 91 { 92 PetscErrorCode ierr; 93 94 PetscFunctionBegin; 95 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 96 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 /* -------------------------------------------------------------------------- */ 101 EXTERN_C_BEGIN 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 104 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 105 { 106 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 //ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 112 ierr = ISDuplicate(NeumannBoundaries,&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 113 ierr = ISCopy(NeumannBoundaries,pcbddc->NeumannBoundaries);CHKERRQ(ierr); 114 PetscFunctionReturn(0); 115 } 116 EXTERN_C_END 117 118 /* -------------------------------------------------------------------------- */ 119 #undef __FUNCT__ 120 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 121 /*@ 122 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 123 Neumann boundaries for the global problem. 124 125 Not collective 126 127 Input Parameters: 128 + pc - the preconditioning context 129 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 130 131 Level: intermediate 132 133 Notes: 134 The sequential IS is copied; the user must destroy the IS object passed in. 135 136 .seealso: PCBDDC 137 @*/ 138 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 139 { 140 PetscErrorCode ierr; 141 142 PetscFunctionBegin; 143 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 144 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 /* -------------------------------------------------------------------------- */ 148 EXTERN_C_BEGIN 149 #undef __FUNCT__ 150 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 151 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 152 { 153 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 154 155 PetscFunctionBegin; 156 if(pcbddc->NeumannBoundaries) { 157 *NeumannBoundaries = pcbddc->NeumannBoundaries; 158 } else { 159 *NeumannBoundaries = PETSC_NULL; 160 //SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Error in %s: Neumann boundaries not set!.\n",__FUNCT__); 161 } 162 PetscFunctionReturn(0); 163 } 164 EXTERN_C_END 165 166 /* -------------------------------------------------------------------------- */ 167 #undef __FUNCT__ 168 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 169 /*@ 170 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 171 Neumann boundaries for the global problem. 172 173 Not collective 174 175 Input Parameters: 176 + pc - the preconditioning context 177 178 Output Parameters: 179 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 180 181 Level: intermediate 182 183 Notes: 184 If the user has not yet provided such information, PETSC_NULL is returned. 185 186 .seealso: PCBDDC 187 @*/ 188 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 189 { 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 194 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 195 PetscFunctionReturn(0); 196 } 197 198 /* -------------------------------------------------------------------------- */ 199 EXTERN_C_BEGIN 200 #undef __FUNCT__ 201 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 202 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 203 { 204 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 205 PetscInt i; 206 PetscErrorCode ierr; 207 208 PetscFunctionBegin; 209 /* Destroy IS if already set */ 210 for(i=0;i<pcbddc->n_ISForDofs;i++) { 211 //ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 212 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 213 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 214 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 215 } 216 /* allocate space then copy ISs */ 217 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 218 for(i=0;i<n_is;i++) { 219 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 220 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 221 } 222 pcbddc->n_ISForDofs=n_is; 223 PetscFunctionReturn(0); 224 } 225 EXTERN_C_END 226 227 /* -------------------------------------------------------------------------- */ 228 #undef __FUNCT__ 229 #define __FUNCT__ "PCBDDCSetDofsSplitting" 230 /*@ 231 PCBDDCSetDofsSplitting - Set index set defining how dofs are splitted. 232 233 Not collective 234 235 Input Parameters: 236 + pc - the preconditioning context 237 - n - number of index sets defining dofs spltting 238 - IS[] - array of IS describing dofs splitting 239 240 Level: intermediate 241 242 Notes: 243 Sequential ISs are copied, the user must destroy the array of IS passed in. 244 245 .seealso: PCBDDC 246 @*/ 247 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 248 { 249 PetscErrorCode ierr; 250 251 PetscFunctionBegin; 252 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 253 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 254 PetscFunctionReturn(0); 255 } 256 257 #undef __FUNCT__ 258 #define __FUNCT__ "PCSetUp_BDDC" 259 /* -------------------------------------------------------------------------- */ 260 /* 261 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 262 by setting data structures and options. 263 264 Input Parameter: 265 + pc - the preconditioner context 266 267 Application Interface Routine: PCSetUp() 268 269 Notes: 270 The interface routine PCSetUp() is not usually called directly by 271 the user, but instead is called by PCApply() if necessary. 272 */ 273 PetscErrorCode PCSetUp_BDDC(PC pc) 274 { 275 PetscErrorCode ierr; 276 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 277 PC_IS *pcis = (PC_IS*)(pc->data); 278 279 PetscFunctionBegin; 280 if (!pc->setupcalled) { 281 282 /* For BDDC we need to define a local "Neumann" to that defined in PCISSetup 283 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 284 Also, we decide to directly build the (same) Dirichlet problem */ 285 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 286 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 287 /* Set up all the "iterative substructuring" common block */ 288 ierr = PCISSetUp(pc);CHKERRQ(ierr); 289 /* Destroy some PC_IS data which is not needed by BDDC */ 290 //if (pcis->ksp_D) {ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);} 291 //if (pcis->ksp_N) {ierr = KSPDestroy(&pcis->ksp_N);CHKERRQ(ierr);} 292 //if (pcis->vec2_B) {ierr = VecDestroy(&pcis->vec2_B);CHKERRQ(ierr);} 293 //if (pcis->vec3_B) {ierr = VecDestroy(&pcis->vec3_B);CHKERRQ(ierr);} 294 //pcis->ksp_D = 0; 295 //pcis->ksp_N = 0; 296 //pcis->vec2_B = 0; 297 //pcis->vec3_B = 0; 298 if(pcbddc->dbg_flag) { 299 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 300 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 301 } 302 303 //TODO MOVE CODE FRAGMENT 304 PetscInt im_active=0; 305 if(pcis->n) im_active = 1; 306 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 307 //printf("Calling PCBDDC MANAGE with active procs %d (im_active = %d)\n",pcbddc->active_procs,im_active); 308 /* Set up grid quantities for BDDC */ 309 //TODO only active procs must call this -> remove synchronized print inside (the only point of sync) 310 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 311 312 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 313 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 314 315 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; //processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo 316 /* We no more need this matrix */ 317 //if (pcis->A_BB) {ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);} 318 //pcis->A_BB = 0; 319 320 //printf("Using coarse problem type %d\n",pcbddc->coarse_problem_type); 321 //printf("Using coarse communications type %d\n",pcbddc->coarse_communications_type); 322 //printf("Using coarsening ratio %d\n",pcbddc->coarsening_ratio); 323 } 324 325 PetscFunctionReturn(0); 326 } 327 328 /* -------------------------------------------------------------------------- */ 329 /* 330 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 331 332 Input Parameters: 333 . pc - the preconditioner context 334 . r - input vector (global) 335 336 Output Parameter: 337 . z - output vector (global) 338 339 Application Interface Routine: PCApply() 340 */ 341 #undef __FUNCT__ 342 #define __FUNCT__ "PCApply_BDDC" 343 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 344 { 345 PC_IS *pcis = (PC_IS*)(pc->data); 346 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 347 PetscErrorCode ierr; 348 PetscScalar one = 1.0; 349 PetscScalar m_one = -1.0; 350 351 /* This code is similar to that provided in nn.c for PCNN 352 NN interface preconditioner changed to BDDC 353 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 354 355 PetscFunctionBegin; 356 /* First Dirichlet solve */ 357 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 358 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 359 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 360 /* 361 Assembling right hand side for BDDC operator 362 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 363 - the interface part of the global vector z 364 */ 365 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 366 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 367 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 368 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 369 ierr = VecCopy(r,z);CHKERRQ(ierr); 370 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 371 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 372 373 /* 374 Apply interface preconditioner 375 Results are stored in: 376 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 377 - the interface part of the global vector z 378 */ 379 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 380 381 /* Second Dirichlet solves and assembling of output */ 382 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 383 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 384 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 385 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 386 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 387 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 388 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 389 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 390 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 391 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 392 393 PetscFunctionReturn(0); 394 395 } 396 397 /* -------------------------------------------------------------------------- */ 398 /* 399 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 400 401 */ 402 #undef __FUNCT__ 403 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 404 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 405 { 406 PetscErrorCode ierr; 407 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 408 PC_IS* pcis = (PC_IS*) (pc->data); 409 PetscScalar zero = 0.0; 410 411 PetscFunctionBegin; 412 /* Get Local boundary and apply partition of unity */ 413 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 414 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 415 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 416 417 /* Application of PHI^T */ 418 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 419 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 420 421 /* Scatter data of coarse_rhs */ 422 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 423 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 424 425 /* Local solution on R nodes */ 426 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 427 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 428 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 429 if(pcbddc->prec_type) { 430 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 431 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 432 } 433 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 434 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 435 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 436 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 437 if(pcbddc->prec_type) { 438 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 439 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 440 } 441 442 /* Coarse solution */ 443 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 444 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 445 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 446 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 447 448 /* Sum contributions from two levels */ 449 /* Apply partition of unity and sum boundary values */ 450 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 451 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 452 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 453 ierr = VecSet(z,zero);CHKERRQ(ierr); 454 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 456 457 PetscFunctionReturn(0); 458 } 459 460 461 /* -------------------------------------------------------------------------- */ 462 /* 463 PCBDDCSolveSaddlePoint 464 465 */ 466 #undef __FUNCT__ 467 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 468 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 469 { 470 PetscErrorCode ierr; 471 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 472 473 PetscFunctionBegin; 474 475 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 476 if(pcbddc->n_constraints) { 477 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 478 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 479 } 480 481 PetscFunctionReturn(0); 482 } 483 /* -------------------------------------------------------------------------- */ 484 /* 485 PCBDDCScatterCoarseDataBegin 486 487 */ 488 #undef __FUNCT__ 489 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 490 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 491 { 492 PetscErrorCode ierr; 493 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 494 495 PetscFunctionBegin; 496 497 switch(pcbddc->coarse_communications_type){ 498 case SCATTERS_BDDC: 499 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 500 break; 501 case GATHERS_BDDC: 502 break; 503 } 504 PetscFunctionReturn(0); 505 506 } 507 /* -------------------------------------------------------------------------- */ 508 /* 509 PCBDDCScatterCoarseDataEnd 510 511 */ 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 514 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 515 { 516 PetscErrorCode ierr; 517 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 518 PetscScalar* array_to; 519 PetscScalar* array_from; 520 MPI_Comm comm=((PetscObject)pc)->comm; 521 PetscInt i; 522 523 PetscFunctionBegin; 524 525 switch(pcbddc->coarse_communications_type){ 526 case SCATTERS_BDDC: 527 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 528 break; 529 case GATHERS_BDDC: 530 if(vec_from) VecGetArray(vec_from,&array_from); 531 if(vec_to) VecGetArray(vec_to,&array_to); 532 switch(pcbddc->coarse_problem_type){ 533 case SEQUENTIAL_BDDC: 534 if(smode == SCATTER_FORWARD) { 535 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 536 if(vec_to) { 537 for(i=0;i<pcbddc->replicated_primal_size;i++) 538 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 539 } 540 } else { 541 if(vec_from) 542 for(i=0;i<pcbddc->replicated_primal_size;i++) 543 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 544 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 545 } 546 break; 547 case REPLICATED_BDDC: 548 if(smode == SCATTER_FORWARD) { 549 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 550 for(i=0;i<pcbddc->replicated_primal_size;i++) 551 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 552 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 553 for(i=0;i<pcbddc->local_primal_size;i++) 554 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 555 } 556 break; 557 case MULTILEVEL_BDDC: 558 break; 559 case PARALLEL_BDDC: 560 break; 561 } 562 if(vec_from) VecRestoreArray(vec_from,&array_from); 563 if(vec_to) VecRestoreArray(vec_to,&array_to); 564 break; 565 } 566 PetscFunctionReturn(0); 567 568 } 569 570 /* -------------------------------------------------------------------------- */ 571 /* 572 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 573 that was created with PCCreate_BDDC(). 574 575 Input Parameter: 576 . pc - the preconditioner context 577 578 Application Interface Routine: PCDestroy() 579 */ 580 #undef __FUNCT__ 581 #define __FUNCT__ "PCDestroy_BDDC" 582 PetscErrorCode PCDestroy_BDDC(PC pc) 583 { 584 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 585 PetscErrorCode ierr; 586 587 PetscFunctionBegin; 588 /* free data created by PCIS */ 589 ierr = PCISDestroy(pc);CHKERRQ(ierr); 590 /* free BDDC data */ 591 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 592 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 593 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 594 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 595 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 596 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 597 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 598 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 599 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 600 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 601 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 602 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 603 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 604 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 605 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 606 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 607 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 608 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 609 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 610 ierr = PetscFree(pcbddc->vertices);CHKERRQ(ierr); 611 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 612 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 613 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 614 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 615 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 616 if (pcbddc->n_constraints) { 617 ierr = PetscFree(pcbddc->indices_to_constraint[0]);CHKERRQ(ierr); 618 ierr = PetscFree(pcbddc->indices_to_constraint);CHKERRQ(ierr); 619 ierr = PetscFree(pcbddc->quadrature_constraint[0]);CHKERRQ(ierr); 620 ierr = PetscFree(pcbddc->quadrature_constraint);CHKERRQ(ierr); 621 ierr = PetscFree(pcbddc->sizes_of_constraint);CHKERRQ(ierr); 622 } 623 PetscInt i; 624 for(i=0;i<pcbddc->n_ISForDofs;i++) { 625 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 626 } 627 ierr = PetscFree(pcbddc->ISForDofs); 628 /* Free the private data structure that was hanging off the PC */ 629 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 /* -------------------------------------------------------------------------- */ 634 /*MC 635 PCBDDC - Balancing Domain Decomposition by Constraints. 636 637 Options Database Keys: 638 . -pcbddc ??? - 639 640 Level: intermediate 641 642 Notes: The matrix used with this preconditioner must be of type MATIS 643 644 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 645 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 646 on the subdomains). 647 648 Options for the coarse grid preconditioner can be set with - 649 Options for the Dirichlet subproblem can be set with - 650 Options for the Neumann subproblem can be set with - 651 652 Contributed by Stefano Zampini 653 654 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 655 M*/ 656 EXTERN_C_BEGIN 657 #undef __FUNCT__ 658 #define __FUNCT__ "PCCreate_BDDC" 659 PetscErrorCode PCCreate_BDDC(PC pc) 660 { 661 PetscErrorCode ierr; 662 PC_BDDC *pcbddc; 663 664 PetscFunctionBegin; 665 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 666 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 667 pc->data = (void*)pcbddc; 668 /* create PCIS data structure */ 669 ierr = PCISCreate(pc);CHKERRQ(ierr); 670 /* BDDC specific */ 671 pcbddc->coarse_vec = 0; 672 pcbddc->coarse_rhs = 0; 673 pcbddc->coarse_ksp = 0; 674 pcbddc->coarse_phi_B = 0; 675 pcbddc->coarse_phi_D = 0; 676 pcbddc->vec1_P = 0; 677 pcbddc->vec1_R = 0; 678 pcbddc->vec2_R = 0; 679 pcbddc->local_auxmat1 = 0; 680 pcbddc->local_auxmat2 = 0; 681 pcbddc->R_to_B = 0; 682 pcbddc->R_to_D = 0; 683 pcbddc->ksp_D = 0; 684 pcbddc->ksp_R = 0; 685 pcbddc->n_constraints = 0; 686 pcbddc->n_vertices = 0; 687 pcbddc->vertices = 0; 688 pcbddc->indices_to_constraint = 0; 689 pcbddc->quadrature_constraint = 0; 690 pcbddc->sizes_of_constraint = 0; 691 pcbddc->local_primal_indices = 0; 692 pcbddc->prec_type = PETSC_FALSE; 693 pcbddc->NeumannBoundaries = 0; 694 pcbddc->local_primal_sizes = 0; 695 pcbddc->local_primal_displacements = 0; 696 pcbddc->replicated_local_primal_indices = 0; 697 pcbddc->replicated_local_primal_values = 0; 698 pcbddc->coarse_loc_to_glob = 0; 699 pcbddc->dbg_flag = PETSC_FALSE; 700 pcbddc->vertices_flag = PETSC_FALSE; 701 pcbddc->constraints_flag = PETSC_FALSE; 702 pcbddc->faces_flag = PETSC_FALSE; 703 pcbddc->edges_flag = PETSC_FALSE; 704 pcbddc->coarsening_ratio = 8; 705 /* function pointers */ 706 pc->ops->apply = PCApply_BDDC; 707 pc->ops->applytranspose = 0; 708 pc->ops->setup = PCSetUp_BDDC; 709 pc->ops->destroy = PCDestroy_BDDC; 710 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 711 pc->ops->view = 0; 712 pc->ops->applyrichardson = 0; 713 pc->ops->applysymmetricleft = 0; 714 pc->ops->applysymmetricright = 0; 715 /* composing function */ 716 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 717 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 718 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 719 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 720 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 721 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 722 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 723 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 724 PetscFunctionReturn(0); 725 } 726 EXTERN_C_END 727 728 /* -------------------------------------------------------------------------- */ 729 /* 730 PCBDDCCoarseSetUp - 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "PCBDDCCoarseSetUp" 734 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 735 { 736 PetscErrorCode ierr; 737 738 PC_IS* pcis = (PC_IS*)(pc->data); 739 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 740 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 741 IS is_R_local; 742 IS is_V_local; 743 IS is_C_local; 744 IS is_aux1; 745 IS is_aux2; 746 const VecType impVecType; 747 const MatType impMatType; 748 PetscInt n_R=0; 749 PetscInt n_D=0; 750 PetscInt n_B=0; 751 PetscMPIInt totprocs; 752 PetscScalar zero=0.0; 753 PetscScalar one=1.0; 754 PetscScalar m_one=-1.0; 755 PetscScalar* array; 756 PetscScalar *coarse_submat_vals; 757 PetscInt *idx_R_local; 758 PetscInt *idx_V_B; 759 PetscScalar *coarsefunctions_errors; 760 PetscScalar *constraints_errors; 761 /* auxiliary indices */ 762 PetscInt s,i,j,k; 763 /* for verbose output of bddc */ 764 PetscViewer viewer=pcbddc->dbg_viewer; 765 PetscBool dbg_flag=pcbddc->dbg_flag; 766 /* for counting coarse dofs */ 767 PetscScalar coarsesum; 768 769 PetscFunctionBegin; 770 /* Set Non-overlapping dimensions */ 771 n_B = pcis->n_B; n_D = pcis->n - n_B; 772 /* Set local primal size */ 773 pcbddc->local_primal_size = pcbddc->n_vertices + pcbddc->n_constraints; 774 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */ 775 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 776 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 777 for(i=0;i<pcbddc->n_vertices;i++) { array[ pcbddc->vertices[i] ] = one; } 778 for(i=0;i<pcbddc->n_constraints;i++) { 779 for (j=0; j<pcbddc->sizes_of_constraint[i]; j++) { 780 k = pcbddc->indices_to_constraint[i][j]; 781 if( array[k] == zero ) { 782 array[k] = one; 783 break; 784 } 785 } 786 } 787 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 788 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 789 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 790 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 791 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 794 for(i=0;i<pcis->n;i++) { if(array[i] > zero) array[i] = one/array[i]; } 795 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 796 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 797 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 798 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 799 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 800 pcbddc->coarse_size = (PetscInt) coarsesum; 801 802 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 803 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 804 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 805 for (i=0;i<pcbddc->n_vertices;i++) { array[ pcbddc->vertices[i] ] = zero; } 806 ierr = PetscMalloc(( pcis->n - pcbddc->n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 807 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 808 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 809 if(dbg_flag) { 810 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 811 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 812 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 813 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 814 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,pcbddc->n_vertices,pcbddc->n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 815 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 816 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 817 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 818 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 819 } 820 /* Allocate needed vectors */ 821 /* Set Mat type for local matrices needed by BDDC precondtioner */ 822 impMatType = MATSEQDENSE; 823 impVecType = VECSEQ; 824 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 825 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 826 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 827 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 828 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 829 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 830 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 831 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 832 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 833 834 /* Creating some index sets needed */ 835 /* For submatrices */ 836 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 837 if(pcbddc->n_vertices) { ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->vertices,PETSC_COPY_VALUES,&is_V_local);CHKERRQ(ierr); } 838 if(pcbddc->n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,pcbddc->n_constraints,pcbddc->n_vertices,1,&is_C_local);CHKERRQ(ierr); } 839 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 840 { 841 PetscInt *aux_array1; 842 PetscInt *aux_array2; 843 PetscScalar value; 844 845 ierr = PetscMalloc( (pcis->n_B-pcbddc->n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 846 ierr = PetscMalloc( (pcis->n_B-pcbddc->n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 847 848 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 849 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 850 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 851 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 853 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 854 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 856 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 857 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 858 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 859 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 860 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 861 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 862 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 863 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 864 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 865 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 866 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 867 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 868 869 if(pcbddc->prec_type || dbg_flag ) { 870 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 871 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 872 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 873 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 874 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 875 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 876 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 877 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 878 } 879 880 /* Check scatters */ 881 if(dbg_flag) { 882 883 Vec vec_aux; 884 885 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 886 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 887 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 888 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 889 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 890 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 891 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 892 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 895 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 896 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 897 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 899 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 900 901 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 902 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 903 ierr = VecDuplicate(pcis->vec1_B,&vec_aux);CHKERRQ(ierr); 904 ierr = VecCopy(pcis->vec1_B,vec_aux);CHKERRQ(ierr); 905 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 906 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 907 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B);CHKERRQ(ierr); 910 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 911 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 912 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 913 914 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 915 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 916 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 917 918 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 919 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 920 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 921 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 922 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 925 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 926 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 927 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 928 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 929 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 930 931 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 932 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 933 ierr = VecDuplicate(pcis->vec1_D,&vec_aux);CHKERRQ(ierr); 934 ierr = VecCopy(pcis->vec1_D,vec_aux);CHKERRQ(ierr); 935 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 936 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 937 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 938 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 939 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D);CHKERRQ(ierr); 940 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 941 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 942 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 943 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 944 945 } 946 } 947 948 /* vertices in boundary numbering */ 949 if(pcbddc->n_vertices) { 950 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 951 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 952 for (i=0; i<pcbddc->n_vertices; i++) { array[ pcbddc->vertices[i] ] = i; } 953 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 954 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956 ierr = PetscMalloc(pcbddc->n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 957 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 958 //printf("vertices in boundary numbering\n"); 959 for (i=0; i<pcbddc->n_vertices; i++) { 960 s=0; 961 while (array[s] != i ) {s++;} 962 idx_V_B[i]=s; 963 //printf("idx_V_B[%d]=%d\n",i,s); 964 } 965 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 966 } 967 968 969 /* Creating PC contexts for local Dirichlet and Neumann problems */ 970 { 971 Mat A_RR; 972 PC pc_temp; 973 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 974 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 975 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 976 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 977 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 978 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 979 /* default */ 980 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 981 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 982 /* Allow user's customization */ 983 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 984 /* Set Up KSP for Dirichlet problem of BDDC */ 985 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 986 /* Matrix for Neumann problem is A_RR -> we need to create it */ 987 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 988 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 989 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 990 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 991 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 992 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 993 /* default */ 994 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 995 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 996 /* Allow user's customization */ 997 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 998 /* Set Up KSP for Neumann problem of BDDC */ 999 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1000 /* check Dirichlet and Neumann solvers */ 1001 if(pcbddc->dbg_flag) { 1002 Vec temp_vec; 1003 PetscScalar value; 1004 1005 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1006 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1007 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1008 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1009 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1010 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1011 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1012 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1013 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1014 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1015 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1016 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1017 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1018 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1019 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1020 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1021 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1022 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1023 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1024 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1025 } 1026 /* free Neumann problem's matrix */ 1027 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1028 } 1029 1030 /* Assemble all remaining stuff needed to apply BDDC */ 1031 { 1032 Mat A_RV,A_VR,A_VV; 1033 Mat M1,M2; 1034 Mat C_CR; 1035 Mat CMAT,AUXMAT; 1036 Vec vec1_C; 1037 Vec vec2_C; 1038 Vec vec1_V; 1039 Vec vec2_V; 1040 PetscInt *nnz; 1041 PetscInt *auxindices; 1042 PetscInt index; 1043 PetscScalar* array2; 1044 MatFactorInfo matinfo; 1045 1046 /* Allocating some extra storage just to be safe */ 1047 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1048 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1049 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1050 1051 /* some work vectors on vertices and/or constraints */ 1052 if(pcbddc->n_vertices) { 1053 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1054 ierr = VecSetSizes(vec1_V,pcbddc->n_vertices,pcbddc->n_vertices);CHKERRQ(ierr); 1055 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1056 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1057 } 1058 if(pcbddc->n_constraints) { 1059 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1060 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1061 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1062 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1063 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1064 } 1065 /* Create C matrix [I 0; 0 const] */ 1066 ierr = MatCreate(PETSC_COMM_SELF,&CMAT);CHKERRQ(ierr); 1067 ierr = MatSetType(CMAT,MATSEQAIJ);CHKERRQ(ierr); 1068 ierr = MatSetSizes(CMAT,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1069 /* nonzeros */ 1070 for(i=0;i<pcbddc->n_vertices;i++) { nnz[i]= 1; } 1071 for(i=0;i<pcbddc->n_constraints;i++) { nnz[i+pcbddc->n_vertices]=pcbddc->sizes_of_constraint[i];} 1072 ierr = MatSeqAIJSetPreallocation(CMAT,0,nnz);CHKERRQ(ierr); 1073 for(i=0;i<pcbddc->n_vertices;i++) { 1074 ierr = MatSetValue(CMAT,i,pcbddc->vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1075 } 1076 for(i=0;i<pcbddc->n_constraints;i++) { 1077 index=i+pcbddc->n_vertices; 1078 ierr = MatSetValues(CMAT,1,&index,pcbddc->sizes_of_constraint[i],pcbddc->indices_to_constraint[i],pcbddc->quadrature_constraint[i],INSERT_VALUES);CHKERRQ(ierr); 1079 } 1080 ierr = MatAssemblyBegin(CMAT,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1081 ierr = MatAssemblyEnd(CMAT,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1082 1083 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1084 1085 if(pcbddc->n_constraints) { 1086 /* some work vectors */ 1087 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1088 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,pcbddc->n_constraints,n_R,pcbddc->n_constraints);CHKERRQ(ierr); 1089 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1090 1091 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1092 for(i=0;i<pcbddc->n_constraints;i++) { 1093 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1094 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1095 for(j=0;j<pcbddc->sizes_of_constraint[i];j++) { array[ pcbddc->indices_to_constraint[i][j] ] = - pcbddc->quadrature_constraint[i][j]; } 1096 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1097 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1098 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1099 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1100 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1101 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1102 index=i; 1103 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1104 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1105 } 1106 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1107 ierr = MatAssemblyEnd (pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1108 1109 /* Create Constraint matrix on R nodes: C_{CR} */ 1110 ierr = MatGetSubMatrix(CMAT,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1111 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1112 1113 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1114 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1115 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1116 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1117 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1118 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1119 1120 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc (should be dense) */ 1121 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1122 ierr = MatSetSizes(M1,pcbddc->n_constraints,pcbddc->n_constraints,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1123 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1124 for(i=0;i<pcbddc->n_constraints;i++) { 1125 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1126 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1127 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1128 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1129 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1130 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1131 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1132 index=i; 1133 ierr = MatSetValues(M1,pcbddc->n_constraints,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1134 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1135 } 1136 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1137 ierr = MatAssemblyEnd (M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1138 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1139 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1140 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1141 1142 } 1143 1144 /* Get submatrices from subdomain matrix */ 1145 if(pcbddc->n_vertices){ 1146 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1147 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1148 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1149 /* Assemble M2 = A_RR^{-1}A_RV */ 1150 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1151 ierr = MatSetSizes(M2,n_R,pcbddc->n_vertices,n_R,pcbddc->n_vertices);CHKERRQ(ierr); 1152 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1153 for(i=0;i<pcbddc->n_vertices;i++) { 1154 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1155 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1156 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1157 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1158 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1159 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1160 index=i; 1161 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1162 ierr = MatSetValues(M2,n_R,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1163 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1164 } 1165 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1166 ierr = MatAssemblyEnd (M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1167 } 1168 1169 /* Matrix of coarse basis functions (local) */ 1170 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1171 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1172 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1173 if(pcbddc->prec_type || dbg_flag ) { 1174 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1175 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1176 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1177 } 1178 1179 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1180 if(dbg_flag) { 1181 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1182 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1183 } 1184 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1185 1186 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1187 for(i=0;i<pcbddc->n_vertices;i++){ 1188 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1189 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1190 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1191 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1192 /* solution of saddle point problem */ 1193 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1194 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1195 if(pcbddc->n_constraints) { 1196 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1197 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1198 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1199 } 1200 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1201 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1202 1203 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1204 /* coarse basis functions */ 1205 index=i; 1206 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1207 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1208 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1209 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1210 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1211 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1212 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1213 if( pcbddc->prec_type || dbg_flag ) { 1214 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1215 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1216 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1217 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1218 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1219 } 1220 /* subdomain contribution to coarse matrix */ 1221 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1222 for(j=0;j<pcbddc->n_vertices;j++) {coarse_submat_vals[i*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1223 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1224 if( pcbddc->n_constraints) { 1225 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1226 for(j=0;j<pcbddc->n_constraints;j++) {coarse_submat_vals[i*pcbddc->local_primal_size+j+pcbddc->n_vertices]=array[j];} //WARNING -> column major ordering 1227 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1228 } 1229 1230 if( dbg_flag ) { 1231 /* assemble subdomain vector on nodes */ 1232 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1233 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1234 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1235 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1236 array[ pcbddc->vertices[i] ] = one; 1237 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1238 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1239 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1240 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1241 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1242 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1243 for(j=0;j<pcbddc->n_vertices;j++) { array2[j]=array[j]; } 1244 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1245 if(pcbddc->n_constraints) { 1246 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1247 for(j=0;j<pcbddc->n_constraints;j++) { array2[j+pcbddc->n_vertices]=array[j]; } 1248 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1249 } 1250 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1251 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1252 /* check saddle point solution */ 1253 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1254 ierr = MatMultTransposeAdd(CMAT,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1255 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1256 ierr = MatMult(CMAT,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1257 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1258 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1259 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1260 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1261 } 1262 } 1263 1264 for(i=0;i<pcbddc->n_constraints;i++){ 1265 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1266 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1267 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1268 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1269 /* solution of saddle point problem */ 1270 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1271 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1272 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1273 if(pcbddc->n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1274 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1275 /* coarse basis functions */ 1276 index=i+pcbddc->n_vertices; 1277 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1278 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1279 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1280 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1281 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1282 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1283 if( pcbddc->prec_type || dbg_flag ) { 1284 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1285 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1286 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1287 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1288 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1289 } 1290 /* subdomain contribution to coarse matrix */ 1291 if(pcbddc->n_vertices) { 1292 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1293 for(j=0;j<pcbddc->n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1294 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1295 } 1296 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1297 for(j=0;j<pcbddc->n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+pcbddc->n_vertices]=array[j];} //WARNING -> column major ordering 1298 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1299 1300 if( dbg_flag ) { 1301 /* assemble subdomain vector on nodes */ 1302 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1303 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1304 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1305 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1306 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1307 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1308 /* assemble subdomain vector of lagrange multipliers */ 1309 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1310 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1311 if( pcbddc->n_vertices) { 1312 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1313 for(j=0;j<pcbddc->n_vertices;j++) {array2[j]=-array[j];} 1314 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1315 } 1316 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1317 for(j=0;j<pcbddc->n_constraints;j++) {array2[j+pcbddc->n_vertices]=-array[j];} 1318 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1319 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1320 /* check saddle point solution */ 1321 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1322 ierr = MatMultTransposeAdd(CMAT,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1323 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1324 ierr = MatMult(CMAT,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1325 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1326 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1327 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1328 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1329 } 1330 } 1331 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1332 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1333 if( pcbddc->prec_type || dbg_flag ) { 1334 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1335 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1336 } 1337 /* Checking coarse_sub_mat and coarse basis functios */ 1338 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1339 if(dbg_flag) { 1340 1341 Mat coarse_sub_mat; 1342 Mat TM1,TM2,TM3,TM4; 1343 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1344 const MatType checkmattype=MATSEQAIJ; 1345 PetscScalar value; 1346 1347 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1348 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1349 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1350 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1351 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1352 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1353 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1354 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1355 1356 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1357 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1358 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1359 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1360 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1361 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1362 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1363 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1364 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1365 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1366 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1367 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1368 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1369 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1370 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1371 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1372 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1373 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1374 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1375 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1376 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1377 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1378 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1379 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1380 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1381 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1382 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1383 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1384 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1385 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1386 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1387 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1388 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1389 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1390 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1391 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1392 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1393 } 1394 1395 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1396 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1397 /* free memory */ 1398 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1399 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1400 ierr = PetscFree(nnz);CHKERRQ(ierr); 1401 if(pcbddc->n_vertices) { 1402 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1403 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1404 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1405 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1406 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1407 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1408 } 1409 if(pcbddc->n_constraints) { 1410 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1411 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1412 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1413 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1414 } 1415 ierr = MatDestroy(&CMAT);CHKERRQ(ierr); 1416 } 1417 /* free memory */ 1418 if(pcbddc->n_vertices) { 1419 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1420 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1421 } 1422 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1423 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1424 1425 PetscFunctionReturn(0); 1426 } 1427 1428 /* -------------------------------------------------------------------------- */ 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1432 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1433 { 1434 1435 1436 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1437 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1438 PC_IS *pcis = (PC_IS*)pc->data; 1439 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1440 MPI_Comm coarse_comm; 1441 1442 /* common to all choiches */ 1443 PetscScalar *temp_coarse_mat_vals; 1444 PetscScalar *ins_coarse_mat_vals; 1445 PetscInt *ins_local_primal_indices; 1446 PetscMPIInt *localsizes2,*localdispl2; 1447 PetscMPIInt size_prec_comm; 1448 PetscMPIInt rank_prec_comm; 1449 PetscMPIInt active_rank=MPI_PROC_NULL; 1450 PetscMPIInt master_proc=0; 1451 PetscInt ins_local_primal_size; 1452 /* specific to MULTILEVEL_BDDC */ 1453 PetscMPIInt *ranks_recv; 1454 PetscMPIInt count_recv=0; 1455 PetscMPIInt rank_coarse_proc_send_to; 1456 PetscMPIInt coarse_color = MPI_UNDEFINED; 1457 ISLocalToGlobalMapping coarse_ISLG; 1458 /* some other variables */ 1459 PetscErrorCode ierr; 1460 const MatType coarse_mat_type; 1461 const PCType coarse_pc_type; 1462 const KSPType coarse_ksp_type; 1463 PC pc_temp; 1464 PetscInt i,j,k,bs; 1465 /* verbose output viewer */ 1466 PetscViewer viewer=pcbddc->dbg_viewer; 1467 PetscBool dbg_flag=pcbddc->dbg_flag; 1468 1469 PetscFunctionBegin; 1470 1471 ins_local_primal_indices = 0; 1472 ins_coarse_mat_vals = 0; 1473 localsizes2 = 0; 1474 localdispl2 = 0; 1475 temp_coarse_mat_vals = 0; 1476 coarse_ISLG = 0; 1477 1478 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1479 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1480 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1481 1482 /* Assign global numbering to coarse dofs */ 1483 { 1484 PetscScalar one=1.,zero=0.; 1485 PetscScalar *array; 1486 PetscMPIInt *auxlocal_primal; 1487 PetscMPIInt *auxglobal_primal; 1488 PetscMPIInt *all_auxglobal_primal; 1489 PetscMPIInt *all_auxglobal_primal_dummy; 1490 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1491 1492 /* Construct needed data structures for message passing */ 1493 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1494 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1495 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1496 /* Gather local_primal_size information for all processes */ 1497 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1498 pcbddc->replicated_primal_size = 0; 1499 for (i=0; i<size_prec_comm; i++) { 1500 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1501 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1502 } 1503 if(rank_prec_comm == 0) { 1504 /* allocate some auxiliary space */ 1505 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1506 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1507 } 1508 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1509 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1510 1511 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 1512 This code fragment assumes that the number of local constraints per connected component 1513 is not greater than the number of nodes defined for the connected component 1514 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1515 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) */ 1516 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1517 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1518 for(i=0;i<pcbddc->n_vertices;i++) { 1519 array[ pcbddc->vertices[i] ] = one; 1520 auxlocal_primal[i] = pcbddc->vertices[i]; 1521 } 1522 for(i=0;i<pcbddc->n_constraints;i++) { 1523 for (j=0; j<pcbddc->sizes_of_constraint[i]; j++) { 1524 k = pcbddc->indices_to_constraint[i][j]; 1525 if( array[k] == zero ) { 1526 array[k] = one; 1527 auxlocal_primal[i+pcbddc->n_vertices] = k; 1528 break; 1529 } 1530 } 1531 } 1532 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1533 1534 /*ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1535 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1536 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1537 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1540 for(i=0;i<pcis->n;i++) { if(array[i] > 0.0) array[i] = one/array[i]; } 1541 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1542 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1543 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1544 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1545 1546 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1547 pcbddc->coarse_size = (PetscInt) coarsesum; 1548 if(dbg_flag) { 1549 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1550 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 1551 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1552 }*/ 1553 1554 /* Now assign them a global numbering */ 1555 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1556 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1557 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1558 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1559 1560 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1561 /* It implements a function similar to PetscSortRemoveDupsInt */ 1562 if(rank_prec_comm==0) { 1563 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1564 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1565 k=1; 1566 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1567 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1568 if(j != all_auxglobal_primal[i] ) { 1569 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1570 k++; 1571 j=all_auxglobal_primal[i]; 1572 } 1573 } 1574 } else { 1575 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1576 } 1577 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1578 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1579 1580 /* Now get global coarse numbering of local primal nodes */ 1581 for(i=0;i<pcbddc->local_primal_size;i++) { 1582 k=0; 1583 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1584 pcbddc->local_primal_indices[i]=k; 1585 } 1586 if(dbg_flag) { 1587 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1588 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 1589 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1590 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1591 for(i=0;i<pcbddc->local_primal_size;i++) { 1592 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 1593 } 1594 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1595 } 1596 /* free allocated memory */ 1597 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1598 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1599 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1600 if(rank_prec_comm == 0) { 1601 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1602 } 1603 } 1604 1605 /* adapt coarse problem type */ 1606 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1607 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1608 1609 switch(pcbddc->coarse_problem_type){ 1610 1611 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1612 { 1613 /* we need additional variables */ 1614 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1615 MetisInt *metis_coarse_subdivision; 1616 MetisInt options[METIS_NOPTIONS]; 1617 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1618 PetscMPIInt procs_jumps_coarse_comm; 1619 PetscMPIInt *coarse_subdivision; 1620 PetscMPIInt *total_count_recv; 1621 PetscMPIInt *total_ranks_recv; 1622 PetscMPIInt *displacements_recv; 1623 PetscMPIInt *my_faces_connectivity; 1624 PetscMPIInt *petsc_faces_adjncy; 1625 MetisInt *faces_adjncy; 1626 MetisInt *faces_xadj; 1627 PetscMPIInt *number_of_faces; 1628 PetscMPIInt *faces_displacements; 1629 PetscInt *array_int; 1630 PetscMPIInt my_faces=0; 1631 PetscMPIInt total_faces=0; 1632 PetscInt ranks_stretching_ratio; 1633 1634 /* define some quantities */ 1635 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1636 coarse_mat_type = MATIS; 1637 coarse_pc_type = PCBDDC; 1638 coarse_ksp_type = KSPRICHARDSON; 1639 1640 /* details of coarse decomposition */ 1641 n_subdomains = pcbddc->active_procs; 1642 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1643 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 1644 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 1645 1646 printf("Coarse algorithm details: \n"); 1647 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1)); 1648 1649 /* build CSR graph of subdomains' connectivity through faces */ 1650 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 1651 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 1652 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 1653 for(j=0;j<pcis->n_shared[i];j++){ 1654 array_int[ pcis->shared[i][j] ]+=1; 1655 } 1656 } 1657 for(i=1;i<pcis->n_neigh;i++){ 1658 for(j=0;j<pcis->n_shared[i];j++){ 1659 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1660 my_faces++; 1661 break; 1662 } 1663 } 1664 } 1665 //printf("I found %d faces.\n",my_faces); 1666 1667 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 1668 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 1669 my_faces=0; 1670 for(i=1;i<pcis->n_neigh;i++){ 1671 for(j=0;j<pcis->n_shared[i];j++){ 1672 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1673 my_faces_connectivity[my_faces]=pcis->neigh[i]; 1674 my_faces++; 1675 break; 1676 } 1677 } 1678 } 1679 if(rank_prec_comm == master_proc) { 1680 //printf("I found %d total faces.\n",total_faces); 1681 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 1682 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 1683 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 1684 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 1685 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 1686 } 1687 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1688 if(rank_prec_comm == master_proc) { 1689 faces_xadj[0]=0; 1690 faces_displacements[0]=0; 1691 j=0; 1692 for(i=1;i<size_prec_comm+1;i++) { 1693 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 1694 if(number_of_faces[i-1]) { 1695 j++; 1696 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 1697 } 1698 } 1699 printf("The J I count is %d and should be %d\n",j,n_subdomains); 1700 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 1701 } 1702 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1703 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 1704 ierr = PetscFree(array_int);CHKERRQ(ierr); 1705 if(rank_prec_comm == master_proc) { 1706 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 1707 printf("This is the face connectivity (actual ranks)\n"); 1708 for(i=0;i<n_subdomains;i++){ 1709 printf("proc %d is connected with \n",i); 1710 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 1711 printf("%d ",faces_adjncy[j]); 1712 printf("\n"); 1713 } 1714 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 1715 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 1716 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 1717 } 1718 1719 if( rank_prec_comm == master_proc ) { 1720 1721 PetscInt heuristic_for_metis=3; 1722 1723 ncon=1; 1724 faces_nvtxs=n_subdomains; 1725 /* partition graoh induced by face connectivity */ 1726 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 1727 ierr = METIS_SetDefaultOptions(options); 1728 /* we need a contiguous partition of the coarse mesh */ 1729 options[METIS_OPTION_CONTIG]=1; 1730 options[METIS_OPTION_DBGLVL]=1; 1731 options[METIS_OPTION_NITER]=30; 1732 //options[METIS_OPTION_NCUTS]=1; 1733 printf("METIS PART GRAPH\n"); 1734 if(n_subdomains>n_parts*heuristic_for_metis) { 1735 printf("Using Kway\n"); 1736 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 1737 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 1738 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 1739 } else { 1740 printf("Using Recursive\n"); 1741 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 1742 } 1743 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 1744 printf("Partition done!\n"); 1745 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 1746 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 1747 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 1748 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 1749 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 1750 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 1751 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 1752 } 1753 1754 /* Create new communicator for coarse problem splitting the old one */ 1755 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 1756 coarse_color=0; //for communicator splitting 1757 active_rank=rank_prec_comm; //for insertion of matrix values 1758 } 1759 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 1760 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 1761 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 1762 1763 if( coarse_color == 0 ) { 1764 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 1765 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 1766 printf("Details of coarse comm\n"); 1767 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 1768 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 1769 } else { 1770 rank_coarse_comm = MPI_PROC_NULL; 1771 } 1772 1773 /* master proc take care of arranging and distributing coarse informations */ 1774 if(rank_coarse_comm == master_proc) { 1775 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 1776 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 1777 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 1778 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 1779 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 1780 /* some initializations */ 1781 displacements_recv[0]=0; 1782 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 1783 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 1784 for(j=0;j<size_coarse_comm;j++) 1785 for(i=0;i<size_prec_comm;i++) 1786 if(coarse_subdivision[i]==j) 1787 total_count_recv[j]++; 1788 /* displacements needed for scatterv of total_ranks_recv */ 1789 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 1790 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 1791 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 1792 for(j=0;j<size_coarse_comm;j++) { 1793 for(i=0;i<size_prec_comm;i++) { 1794 if(coarse_subdivision[i]==j) { 1795 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 1796 total_count_recv[j]+=1; 1797 } 1798 } 1799 } 1800 for(j=0;j<size_coarse_comm;j++) { 1801 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 1802 for(i=0;i<total_count_recv[j];i++) { 1803 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 1804 } 1805 printf("\n"); 1806 } 1807 1808 /* identify new decomposition in terms of ranks in the old communicator */ 1809 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 1810 printf("coarse_subdivision in old end new ranks\n"); 1811 for(i=0;i<size_prec_comm;i++) 1812 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 1813 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 1814 } else { 1815 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 1816 } 1817 printf("\n"); 1818 } 1819 1820 /* Scatter new decomposition for send details */ 1821 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1822 /* Scatter receiving details to members of coarse decomposition */ 1823 if( coarse_color == 0) { 1824 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 1825 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 1826 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 1827 } 1828 1829 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 1830 //if(coarse_color == 0) { 1831 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 1832 // for(i=0;i<count_recv;i++) 1833 // printf("%d ",ranks_recv[i]); 1834 // printf("\n"); 1835 //} 1836 1837 if(rank_prec_comm == master_proc) { 1838 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 1839 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 1840 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 1841 free(coarse_subdivision); 1842 free(total_count_recv); 1843 free(total_ranks_recv); 1844 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 1845 } 1846 break; 1847 } 1848 1849 case(REPLICATED_BDDC): 1850 1851 pcbddc->coarse_communications_type = GATHERS_BDDC; 1852 coarse_mat_type = MATSEQAIJ; 1853 coarse_pc_type = PCLU; 1854 coarse_ksp_type = KSPPREONLY; 1855 coarse_comm = PETSC_COMM_SELF; 1856 active_rank = rank_prec_comm; 1857 break; 1858 1859 case(PARALLEL_BDDC): 1860 1861 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1862 coarse_mat_type = MATMPIAIJ; 1863 coarse_pc_type = PCREDUNDANT; 1864 coarse_ksp_type = KSPPREONLY; 1865 coarse_comm = prec_comm; 1866 active_rank = rank_prec_comm; 1867 break; 1868 1869 case(SEQUENTIAL_BDDC): 1870 pcbddc->coarse_communications_type = GATHERS_BDDC; 1871 coarse_mat_type = MATSEQAIJ; 1872 coarse_pc_type = PCLU; 1873 coarse_ksp_type = KSPPREONLY; 1874 coarse_comm = PETSC_COMM_SELF; 1875 active_rank = master_proc; 1876 break; 1877 } 1878 1879 switch(pcbddc->coarse_communications_type){ 1880 1881 case(SCATTERS_BDDC): 1882 { 1883 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 1884 1885 PetscMPIInt send_size; 1886 PetscInt *aux_ins_indices; 1887 PetscInt ii,jj; 1888 MPI_Request *requests; 1889 1890 /* allocate auxiliary space */ 1891 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 1892 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 1893 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 1894 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 1895 /* allocate stuffs for message massing */ 1896 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 1897 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 1898 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 1899 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 1900 /* fill up quantities */ 1901 j=0; 1902 for(i=0;i<count_recv;i++){ 1903 ii = ranks_recv[i]; 1904 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 1905 localdispl2[i]=j; 1906 j+=localsizes2[i]; 1907 jj = pcbddc->local_primal_displacements[ii]; 1908 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 1909 } 1910 //printf("aux_ins_indices 1\n"); 1911 //for(i=0;i<pcbddc->coarse_size;i++) 1912 // printf("%d ",aux_ins_indices[i]); 1913 //printf("\n"); 1914 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 1915 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 1916 /* evaluate how many values I will insert in coarse mat */ 1917 ins_local_primal_size=0; 1918 for(i=0;i<pcbddc->coarse_size;i++) 1919 if(aux_ins_indices[i]) 1920 ins_local_primal_size++; 1921 /* evaluate indices I will insert in coarse mat */ 1922 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 1923 j=0; 1924 for(i=0;i<pcbddc->coarse_size;i++) 1925 if(aux_ins_indices[i]) 1926 ins_local_primal_indices[j++]=i; 1927 /* use aux_ins_indices to realize a global to local mapping */ 1928 j=0; 1929 for(i=0;i<pcbddc->coarse_size;i++){ 1930 if(aux_ins_indices[i]==0){ 1931 aux_ins_indices[i]=-1; 1932 } else { 1933 aux_ins_indices[i]=j; 1934 j++; 1935 } 1936 } 1937 1938 //printf("New details localsizes2 localdispl2\n"); 1939 //for(i=0;i<count_recv;i++) 1940 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 1941 //printf("\n"); 1942 //printf("aux_ins_indices 2\n"); 1943 //for(i=0;i<pcbddc->coarse_size;i++) 1944 // printf("%d ",aux_ins_indices[i]); 1945 //printf("\n"); 1946 //printf("ins_local_primal_indices\n"); 1947 //for(i=0;i<ins_local_primal_size;i++) 1948 // printf("%d ",ins_local_primal_indices[i]); 1949 //printf("\n"); 1950 //printf("coarse_submat_vals\n"); 1951 //for(i=0;i<pcbddc->local_primal_size;i++) 1952 // for(j=0;j<pcbddc->local_primal_size;j++) 1953 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 1954 //printf("\n"); 1955 1956 /* processes partecipating in coarse problem receive matrix data from their friends */ 1957 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 1958 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 1959 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 1960 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 1961 } 1962 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1963 1964 //if(coarse_color == 0) { 1965 // printf("temp_coarse_mat_vals\n"); 1966 // for(k=0;k<count_recv;k++){ 1967 // printf("---- %d ----\n",ranks_recv[k]); 1968 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 1969 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 1970 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 1971 // printf("\n"); 1972 // } 1973 //} 1974 /* calculate data to insert in coarse mat */ 1975 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 1976 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 1977 1978 PetscMPIInt rr,kk,lps,lpd; 1979 PetscInt row_ind,col_ind; 1980 for(k=0;k<count_recv;k++){ 1981 rr = ranks_recv[k]; 1982 kk = localdispl2[k]; 1983 lps = pcbddc->local_primal_sizes[rr]; 1984 lpd = pcbddc->local_primal_displacements[rr]; 1985 //printf("Inserting the following indices (received from %d)\n",rr); 1986 for(j=0;j<lps;j++){ 1987 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 1988 for(i=0;i<lps;i++){ 1989 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 1990 //printf("%d %d\n",row_ind,col_ind); 1991 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 1992 } 1993 } 1994 } 1995 ierr = PetscFree(requests);CHKERRQ(ierr); 1996 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 1997 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 1998 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 1999 2000 /* create local to global mapping needed by coarse MATIS */ 2001 { 2002 IS coarse_IS; 2003 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2004 coarse_comm = prec_comm; 2005 active_rank=rank_prec_comm; 2006 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2007 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2008 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2009 } 2010 } 2011 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2012 /* arrays for values insertion */ 2013 ins_local_primal_size = pcbddc->local_primal_size; 2014 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2015 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2016 for(j=0;j<ins_local_primal_size;j++){ 2017 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2018 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2019 } 2020 } 2021 break; 2022 2023 } 2024 2025 case(GATHERS_BDDC): 2026 { 2027 2028 PetscMPIInt mysize,mysize2; 2029 2030 if(rank_prec_comm==active_rank) { 2031 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2032 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2033 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2034 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2035 /* arrays for values insertion */ 2036 ins_local_primal_size = pcbddc->coarse_size; 2037 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2038 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2039 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2040 localdispl2[0]=0; 2041 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2042 j=0; 2043 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2044 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2045 } 2046 2047 mysize=pcbddc->local_primal_size; 2048 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2049 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2050 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2051 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2052 } else { 2053 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2054 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2055 } 2056 2057 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2058 if(rank_prec_comm==active_rank) { 2059 PetscInt offset,offset2,row_ind,col_ind; 2060 for(j=0;j<ins_local_primal_size;j++){ 2061 ins_local_primal_indices[j]=j; 2062 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2063 } 2064 for(k=0;k<size_prec_comm;k++){ 2065 offset=pcbddc->local_primal_displacements[k]; 2066 offset2=localdispl2[k]; 2067 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2068 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2069 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2070 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2071 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2072 } 2073 } 2074 } 2075 } 2076 break; 2077 }//switch on coarse problem and communications associated with finished 2078 } 2079 2080 /* Now create and fill up coarse matrix */ 2081 if( rank_prec_comm == active_rank ) { 2082 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2083 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2084 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2085 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2086 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2087 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2088 } else { 2089 Mat matis_coarse_local_mat; 2090 ierr = MatCreateIS(coarse_comm,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2091 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2092 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2093 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2094 } 2095 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2096 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2097 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2098 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2099 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2100 Mat matis_coarse_local_mat; 2101 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2102 ierr = MatSetBlockSize(matis_coarse_local_mat,bs);CHKERRQ(ierr); 2103 } 2104 2105 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2106 /* Preconditioner for coarse problem */ 2107 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2108 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2109 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2110 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2111 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2112 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2113 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2114 /* Allow user's customization */ 2115 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2116 /* Set Up PC for coarse problem BDDC */ 2117 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2118 if(dbg_flag) { 2119 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2120 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2121 } 2122 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2123 } 2124 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2125 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2126 if(dbg_flag) { 2127 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2128 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2129 } 2130 } 2131 } 2132 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2133 IS local_IS,global_IS; 2134 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2135 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2136 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2137 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2138 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2139 } 2140 2141 2142 /* Check condition number of coarse problem */ 2143 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && dbg_flag && rank_prec_comm == active_rank ) { 2144 PetscScalar m_one=-1.0; 2145 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2146 2147 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2148 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2149 ierr = KSPSetType(pcbddc->coarse_ksp,KSPGMRES);CHKERRQ(ierr); 2150 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2151 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2152 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2153 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2154 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2155 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2156 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2157 kappa_2=lambda_max/lambda_min; 2158 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2159 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2160 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2161 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of gmres is: % 1.14e\n",k,kappa_2);CHKERRQ(ierr); 2162 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2164 /* restore coarse ksp to default values */ 2165 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2166 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2167 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 2168 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2169 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2170 } 2171 2172 /* free data structures no longer needed */ 2173 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2174 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2175 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2176 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2177 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2178 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2179 2180 PetscFunctionReturn(0); 2181 } 2182 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2185 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2186 { 2187 2188 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2189 PC_IS *pcis = (PC_IS*)pc->data; 2190 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2191 PetscInt **neighbours_set; 2192 PetscInt bs,ierr,i,j,s,k,iindex; 2193 PetscInt total_counts; 2194 PetscBool flg_row; 2195 PCBDDCGraph mat_graph; 2196 PetscInt neumann_bsize; 2197 const PetscInt* neumann_nodes; 2198 Mat mat_adj; 2199 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE; 2200 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2201 PetscInt* queue_in_global_numbering; 2202 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2203 2204 PetscFunctionBegin; 2205 2206 /* allocate and initialize needed graph structure */ 2207 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2208 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2209 /* ierr = MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj);CHKERRQ(ierr); */ 2210 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2211 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2212 i = mat_graph->nvtxs; 2213 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2214 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2215 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2216 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2217 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2218 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2219 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2220 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2221 2222 /* Setting dofs splitting in mat_graph->which_dof */ 2223 if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */ 2224 PetscInt *is_indices; 2225 PetscInt is_size; 2226 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2227 ierr = ISGetSize(pcbddc->ISForDofs[i],&is_size);CHKERRQ(ierr); 2228 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2229 for(j=0;j<is_size;j++) { 2230 mat_graph->which_dof[is_indices[j]]=i; 2231 } 2232 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2233 } 2234 } else { /* otherwise assume constant block size */ 2235 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2236 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2237 for(s=0;s<bs;s++) { 2238 mat_graph->which_dof[i*bs+s]=s; 2239 } 2240 } 2241 } 2242 2243 total_counts=0; 2244 for(i=0;i<pcis->n_neigh;i++){ 2245 s=pcis->n_shared[i]; 2246 total_counts+=s; 2247 for(j=0;j<s;j++){ 2248 mat_graph->count[pcis->shared[i][j]] += 1; 2249 } 2250 } 2251 /* Take into account Neumann data incrementing number of sharing subdomains for all but faces nodes lying on the interface */ 2252 if(pcbddc->NeumannBoundaries) { 2253 ierr = ISGetSize(pcbddc->NeumannBoundaries,&neumann_bsize);CHKERRQ(ierr); 2254 ierr = ISGetIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2255 for(i=0;i<neumann_bsize;i++){ 2256 iindex = neumann_nodes[i]; 2257 if(mat_graph->count[iindex] > 2){ 2258 mat_graph->count[iindex]+=1; 2259 total_counts++; 2260 } 2261 } 2262 } 2263 2264 /* allocate space for storing neighbours' set */ 2265 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2266 if(mat_graph->nvtxs) ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); 2267 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2268 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2269 for(i=0;i<pcis->n_neigh;i++){ 2270 s=pcis->n_shared[i]; 2271 for(j=0;j<s;j++) { 2272 k=pcis->shared[i][j]; 2273 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2274 mat_graph->count[k]+=1; 2275 } 2276 } 2277 /* set -1 fake neighbour */ 2278 if(pcbddc->NeumannBoundaries) { 2279 for(i=0;i<neumann_bsize;i++){ 2280 iindex = neumann_nodes[i]; 2281 if( mat_graph->count[iindex] > 2){ 2282 neighbours_set[iindex][mat_graph->count[iindex]] = -1; /* An additional fake neighbour (with rank -1) */ 2283 mat_graph->count[iindex]+=1; 2284 } 2285 } 2286 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2287 } 2288 /* sort set of sharing subdomains for comparison */ 2289 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2290 /* start analyzing local interface */ 2291 PetscInt nodes_touched=0; 2292 for(i=0;i<mat_graph->nvtxs;i++){ 2293 if(!mat_graph->count[i]){ /* internal nodes */ 2294 mat_graph->touched[i]=PETSC_TRUE; 2295 mat_graph->where[i]=0; 2296 nodes_touched++; 2297 } 2298 } 2299 PetscInt where_values=1; 2300 PetscBool same_set; 2301 mat_graph->ncmps = 0; 2302 while(nodes_touched<mat_graph->nvtxs) { 2303 /* find first untouched node in local ordering */ 2304 i=0; 2305 while(mat_graph->touched[i]) i++; 2306 mat_graph->touched[i]=PETSC_TRUE; 2307 mat_graph->where[i]=where_values; 2308 nodes_touched++; 2309 /* now find all other nodes having the same set of sharing subdomains */ 2310 for(j=i+1;j<mat_graph->nvtxs;j++){ 2311 /* check for same number of sharing subdomains and dof number */ 2312 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2313 /* check for same set of sharing subdomains */ 2314 same_set=PETSC_TRUE; 2315 for(k=0;k<mat_graph->count[j];k++){ 2316 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2317 same_set=PETSC_FALSE; 2318 } 2319 } 2320 /* I found a friend of mine */ 2321 if(same_set) { 2322 mat_graph->where[j]=where_values; 2323 mat_graph->touched[j]=PETSC_TRUE; 2324 nodes_touched++; 2325 } 2326 } 2327 } 2328 where_values++; 2329 } 2330 where_values--; if(where_values<0) where_values=0; 2331 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2332 /* Find connected components defined on the shared interface */ 2333 if(where_values) { 2334 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2335 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component contained in the global queue (and carring together the local queue) */ 2336 for(i=0;i<mat_graph->ncmps;i++) { 2337 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2338 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2339 } 2340 /* for(i=0;i<mat_graph->ncmps;i++) { 2341 printf("[queue num %d] ptr %d, length %d: global node (local node)\n",i,mat_graph->cptr[i],mat_graph->cptr[i+1]-mat_graph->cptr[i]); 2342 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++){ 2343 printf("%d (%d) ",queue_in_global_numbering[mat_graph->cptr[i]+j],mat_graph->queue[mat_graph->cptr[i]+j]); 2344 } 2345 printf("\n"); 2346 } */ 2347 } 2348 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2349 for(i=0;i<where_values;i++) { 2350 if(mat_graph->where_ncmps[i]>1) { /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2351 adapt_interface=1; 2352 break; 2353 } 2354 } 2355 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2356 if(where_values && adapt_interface_reduced) { 2357 2358 PetscInt sum_requests=0,my_rank; 2359 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2360 PetscInt temp_buffer_size,ins_val,global_where_counter; 2361 PetscInt *cum_recv_counts; 2362 PetscInt *where_to_nodes_indices; 2363 PetscInt *petsc_buffer; 2364 PetscMPIInt *recv_buffer; 2365 PetscMPIInt *recv_buffer_where; 2366 PetscMPIInt *send_buffer; 2367 PetscMPIInt size_of_send; 2368 PetscInt *sizes_of_sends; 2369 MPI_Request *send_requests; 2370 MPI_Request *recv_requests; 2371 PetscInt *where_cc_adapt; 2372 PetscInt **temp_buffer; 2373 PetscInt *nodes_to_temp_buffer_indices; 2374 PetscInt *add_to_where; 2375 2376 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2377 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2378 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2379 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2380 /* first count how many neighbours per connected component I will receive from */ 2381 cum_recv_counts[0]=0; 2382 for(i=1;i<where_values+1;i++){ 2383 j=0; 2384 while(mat_graph->where[j] != i) j++; 2385 where_to_nodes_indices[i-1]=j; 2386 if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2387 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-2; } 2388 } 2389 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2390 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2391 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2392 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2393 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2394 for(i=0;i<cum_recv_counts[where_values];i++) { 2395 send_requests[i]=MPI_REQUEST_NULL; 2396 recv_requests[i]=MPI_REQUEST_NULL; 2397 } 2398 /* printf("MPI sizes: buffer size for send %d, number of requests %d\n",buffer_size,cum_recv_counts[where_values]); */ 2399 2400 /* exchange with my neighbours the number of my connected components on the shared interface */ 2401 for(i=0;i<where_values;i++){ 2402 j=where_to_nodes_indices[i]; 2403 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2404 for(;k<mat_graph->count[j];k++){ 2405 if(neighbours_set[j][k] != my_rank) { 2406 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2407 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2408 /*printf("SENDREQ[%d]: to %d (tag %d) the value %d\n",sum_requests,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],mat_graph->where_ncmps[i]); 2409 printf("RECVREQ[%d]: from %d (tag %d) at buffer address %d\n",sum_requests,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],sum_requests);*/ 2410 sum_requests++; 2411 } 2412 } 2413 } 2414 /* printf("Final number of request: s=r=%d (should be equal to %d)\n",sum_requests,cum_recv_counts[where_values]); */ 2415 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2416 /*for(i=0;i<where_values;i++){ 2417 printf("my where_ncmps[%d]=%d\n",i,mat_graph->where_ncmps[i]); 2418 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2419 printf(" recv where_ncmps[%d]=%d\n",j,recv_buffer_where[j]); 2420 } 2421 }*/ 2422 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2423 2424 /* determine the connected component I need to adapt */ 2425 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2426 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2427 for(i=0;i<where_values;i++){ 2428 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2429 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2430 where_cc_adapt[i]=PETSC_TRUE; 2431 break; 2432 } 2433 } 2434 } 2435 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2436 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2437 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2438 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2439 sum_requests=0; 2440 start_of_send=0; 2441 start_of_recv=cum_recv_counts[where_values]; 2442 for(i=0;i<where_values;i++) { 2443 if(where_cc_adapt[i]) { 2444 size_of_send=0; 2445 for(j=i;j<mat_graph->ncmps;j++) { 2446 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2447 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2448 size_of_send+=1; 2449 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2450 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2451 } 2452 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2453 } 2454 } 2455 j = where_to_nodes_indices[i]; 2456 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2457 for(;k<mat_graph->count[j];k++){ 2458 if(neighbours_set[j][k] != my_rank) { 2459 /* printf("SENDREQ[%d]: to %d (tag %d) the value %d\n",sum_requests,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],size_of_send); 2460 printf("RECVREQ[%d]: from %d (tag %d) at buffer address %d\n",sum_requests,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],sum_requests+start_of_recv);*/ 2461 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2462 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2463 sum_requests++; 2464 } 2465 } 2466 sizes_of_sends[i]=size_of_send; 2467 start_of_send+=size_of_send; 2468 } 2469 } 2470 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2471 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2472 /*j=0; 2473 for(k=0;k<where_values;k++) { j+=sizes_of_sends[k]; } 2474 printf("This is the send buffer (size %d): \n",j); 2475 for(k=0;k<j;k++) { printf("%d ",send_buffer[k]); } 2476 printf("\n");*/ 2477 buffer_size=0; 2478 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2479 /* printf("Allocating recv buffer of size %d\n",buffer_size); */ 2480 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2481 /* now exchange the data */ 2482 start_of_recv=0; 2483 start_of_send=0; 2484 sum_requests=0; 2485 for(i=0;i<where_values;i++) { 2486 if(where_cc_adapt[i]) { 2487 size_of_send = sizes_of_sends[i]; 2488 j = where_to_nodes_indices[i]; 2489 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2490 for(;k<mat_graph->count[j];k++){ 2491 if(neighbours_set[j][k] != my_rank) { 2492 /* printf("SENDREQ[%d]: to %d (tag %d) with size %d at buffer address %d\n",sum_requests,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],size_of_send,start_of_send); 2493 printf("RECVREQ[%d]: from %d (tag %d) at buffer address %d\n",sum_requests,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],start_of_recv); */ 2494 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2495 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2496 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2497 start_of_recv+=size_of_recv; 2498 sum_requests++; 2499 } 2500 } 2501 start_of_send+=size_of_send; 2502 } 2503 } 2504 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2505 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2506 /*printf("This is what I received (size %d): \n",start_of_recv); 2507 for(k=0;k<start_of_recv;k++) { printf("%d ",recv_buffer[k]); } 2508 printf("\n");*/ 2509 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2510 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2511 for(j=0;j<buffer_size;) { 2512 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2513 k=petsc_buffer[j]+1; 2514 j+=k; 2515 } 2516 /*printf("This is what I received in local numbering (unrolled): \n"); 2517 i=0; 2518 for(j=0;j<buffer_size;) { 2519 printf("queue num %d (j=%d)\n",i,j); 2520 for(k=1;k<petsc_buffer[j]+1;k++) { printf("%d ",petsc_buffer[k+j]); } 2521 printf("\n"); 2522 i++;j+=k; 2523 }*/ 2524 sum_requests=cum_recv_counts[where_values]; 2525 start_of_recv=0; 2526 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2527 global_where_counter=0; 2528 for(i=0;i<where_values;i++){ 2529 if(where_cc_adapt[i]){ 2530 temp_buffer_size=0; 2531 /* find nodes on the shared interface we need to adapt */ 2532 for(j=0;j<mat_graph->nvtxs;j++){ 2533 if(mat_graph->where[j]==i+1) { 2534 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2535 temp_buffer_size++; 2536 } else { 2537 nodes_to_temp_buffer_indices[j]=-1; 2538 } 2539 } 2540 /* allocate some temporary space */ 2541 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2542 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2543 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2544 for(j=1;j<temp_buffer_size;j++){ 2545 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2546 } 2547 /* analyze contributions from neighbouring subdomains for i-th conn comp 2548 temp buffer structure: 2549 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2550 3 neighs procs with structured connected components: 2551 neigh 0: [0 1 4], [2 3]; (2 connected components) 2552 neigh 1: [0 1], [2 3 4]; (2 connected components) 2553 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2554 tempbuffer (row-oriented) should be filled as: 2555 [ 0, 0, 0; 2556 0, 0, 1; 2557 1, 1, 2; 2558 1, 1, 2; 2559 0, 1, 0; ]; 2560 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2561 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2562 */ 2563 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2564 ins_val=0; 2565 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2566 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2567 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2568 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2569 } 2570 buffer_size+=k; 2571 ins_val++; 2572 } 2573 start_of_recv+=size_of_recv; 2574 sum_requests++; 2575 } 2576 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2577 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2578 for(j=0;j<temp_buffer_size;j++){ 2579 if(!add_to_where[j]){ /* found a new cc */ 2580 global_where_counter++; 2581 add_to_where[j]=global_where_counter; 2582 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2583 same_set=PETSC_TRUE; 2584 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2585 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2586 same_set=PETSC_FALSE; 2587 break; 2588 } 2589 } 2590 if(same_set) add_to_where[k]=global_where_counter; 2591 } 2592 } 2593 } 2594 /* insert new data in where array */ 2595 temp_buffer_size=0; 2596 for(j=0;j<mat_graph->nvtxs;j++){ 2597 if(mat_graph->where[j]==i+1) { 2598 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2599 temp_buffer_size++; 2600 } 2601 } 2602 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 2603 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 2604 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 2605 } 2606 } 2607 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2608 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 2609 ierr = PetscFree(send_requests);CHKERRQ(ierr); 2610 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 2611 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 2612 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 2613 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 2614 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2615 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 2616 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 2617 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 2618 if(global_where_counter) { 2619 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 2620 global_where_counter=0; 2621 for(i=0;i<mat_graph->nvtxs;i++){ 2622 if(mat_graph->where[i] && !mat_graph->touched[i]) { 2623 global_where_counter++; 2624 for(j=i+1;j<mat_graph->nvtxs;j++){ 2625 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 2626 mat_graph->where[j]=global_where_counter; 2627 mat_graph->touched[j]=PETSC_TRUE; 2628 } 2629 } 2630 mat_graph->where[i]=global_where_counter; 2631 mat_graph->touched[i]=PETSC_TRUE; 2632 } 2633 } 2634 where_values=global_where_counter; 2635 } 2636 if(global_where_counter) { 2637 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2638 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2639 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2640 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2641 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2642 for(i=0;i<mat_graph->ncmps;i++) { 2643 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2644 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2645 } 2646 } 2647 } 2648 2649 /* count vertices, edges and faces */ 2650 PetscInt nfc=0; 2651 PetscInt nec=0; 2652 PetscInt nvc=0; 2653 for (i=0; i<mat_graph->ncmps; i++) { 2654 if( !pcbddc->vertices_flag ) { 2655 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2656 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==2){ 2657 nfc++; 2658 } else { 2659 nec++; 2660 } 2661 } 2662 } 2663 if( !pcbddc->constraints_flag ){ 2664 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] == 1 ){ 2665 nvc++; 2666 } 2667 } 2668 } 2669 2670 pcbddc->n_constraints = nec+nfc; 2671 pcbddc->n_vertices = nvc; 2672 2673 if(pcbddc->n_constraints){ 2674 /* allocate space for local constraints of BDDC */ 2675 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscInt*),&pcbddc->indices_to_constraint);CHKERRQ(ierr); 2676 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscScalar*),&pcbddc->quadrature_constraint);CHKERRQ(ierr); 2677 ierr = PetscMalloc (pcbddc->n_constraints*sizeof(PetscInt),&pcbddc->sizes_of_constraint);CHKERRQ(ierr); 2678 k=0; 2679 for (i=0; i<mat_graph->ncmps; i++) { 2680 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2681 pcbddc->sizes_of_constraint[k] = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2682 k++; 2683 } 2684 } 2685 k=0; 2686 for (i=0; i<pcbddc->n_constraints; i++) k+=pcbddc->sizes_of_constraint[i]; 2687 ierr = PetscMalloc (k*sizeof(PetscInt),&pcbddc->indices_to_constraint[0]);CHKERRQ(ierr); 2688 ierr = PetscMalloc (k*sizeof(PetscScalar),&pcbddc->quadrature_constraint[0]);CHKERRQ(ierr); 2689 for (i=1; i<pcbddc->n_constraints; i++) { 2690 pcbddc->indices_to_constraint[i] = pcbddc->indices_to_constraint[i-1] + pcbddc->sizes_of_constraint[i-1]; 2691 pcbddc->quadrature_constraint[i] = pcbddc->quadrature_constraint[i-1] + pcbddc->sizes_of_constraint[i-1]; 2692 } 2693 k=0; 2694 PetscScalar quad_value; 2695 for (i=0; i<mat_graph->ncmps; i++) { 2696 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > 1 ){ 2697 quad_value=1.0/( (PetscReal) (mat_graph->cptr[i+1]-mat_graph->cptr[i]) ); 2698 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 2699 pcbddc->indices_to_constraint[k][j] = mat_graph->queue[mat_graph->cptr[i]+j]; 2700 pcbddc->quadrature_constraint[k][j] = quad_value; 2701 } 2702 k++; 2703 } 2704 } 2705 } 2706 if(pcbddc->n_vertices){ 2707 /* allocate space for local vertices of BDDC */ 2708 ierr = PetscMalloc (pcbddc->n_vertices*sizeof(PetscInt),&pcbddc->vertices);CHKERRQ(ierr); 2709 k=0; 2710 for (i=0; i<mat_graph->ncmps; i++) { 2711 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] == 1 ){ 2712 pcbddc->vertices[k] = mat_graph->queue[mat_graph->cptr[i]]; 2713 k++; 2714 } 2715 } 2716 /* sort vertex set (by local ordering) */ 2717 ierr = PetscSortInt(pcbddc->n_vertices,pcbddc->vertices);CHKERRQ(ierr); 2718 } 2719 2720 if(pcbddc->dbg_flag) { 2721 PetscViewer viewer=pcbddc->dbg_viewer; 2722 2723 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2724 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2725 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2726 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 2727 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2728 for(i=0;i<mat_graph->nvtxs;i++) { 2729 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 2730 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 2731 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 2732 } 2733 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 2734 }*/ 2735 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 2736 for(i=0;i<mat_graph->ncmps;i++) { 2737 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nSize and count for connected component %02d : %04d %01d\n", i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 2738 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 2739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); 2740 } 2741 } 2742 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 2743 if( pcbddc->n_vertices ) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,pcbddc->n_vertices);CHKERRQ(ierr); } 2744 if( nfc ) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); } 2745 if( nec ) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); } 2746 if( pcbddc->n_vertices ) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Global indices for subdomain vertices follows\n",PetscGlobalRank,pcbddc->n_vertices);CHKERRQ(ierr); } 2747 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->n_vertices,pcbddc->vertices,queue_in_global_numbering);CHKERRQ(ierr); 2748 for(i=0;i<pcbddc->n_vertices;i++){ ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",queue_in_global_numbering[i]);CHKERRQ(ierr); } 2749 if( pcbddc->n_vertices ) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); } 2750 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2751 } 2752 2753 /* Restore CSR structure into sequantial matrix and free memory space no longer needed */ 2754 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2755 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2756 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2757 /* Free graph structure */ 2758 if(mat_graph->nvtxs){ 2759 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 2760 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 2761 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 2762 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 2763 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2764 } 2765 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 2766 2767 PetscFunctionReturn(0); 2768 2769 } 2770 2771 /* -------------------------------------------------------------------------- */ 2772 2773 /* The following code has been adapted from function IsConnectedSubdomain contained 2774 in source file contig.c of METIS library (version 5.0.1) */ 2775 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "PCBDDCFindConnectedComponents" 2778 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 2779 { 2780 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 2781 PetscInt *xadj, *adjncy, *where, *queue; 2782 PetscInt *cptr; 2783 PetscBool *touched; 2784 2785 PetscFunctionBegin; 2786 2787 nvtxs = graph->nvtxs; 2788 xadj = graph->xadj; 2789 adjncy = graph->adjncy; 2790 where = graph->where; 2791 touched = graph->touched; 2792 queue = graph->queue; 2793 cptr = graph->cptr; 2794 2795 for (i=0; i<nvtxs; i++) 2796 touched[i] = PETSC_FALSE; 2797 2798 cum_queue=0; 2799 ncmps=0; 2800 2801 for(n=0; n<n_dist; n++) { 2802 //pid = dist_vals[n]; 2803 pid = n+1; 2804 nleft = 0; 2805 for (i=0; i<nvtxs; i++) { 2806 if (where[i] == pid) 2807 nleft++; 2808 } 2809 for (i=0; i<nvtxs; i++) { 2810 if (where[i] == pid) 2811 break; 2812 } 2813 touched[i] = PETSC_TRUE; 2814 queue[cum_queue] = i; 2815 first = 0; last = 1; 2816 cptr[ncmps] = cum_queue; /* This actually points to queue */ 2817 ncmps_pid = 0; 2818 while (first != nleft) { 2819 if (first == last) { /* Find another starting vertex */ 2820 cptr[++ncmps] = first+cum_queue; 2821 ncmps_pid++; 2822 for (i=0; i<nvtxs; i++) { 2823 if (where[i] == pid && !touched[i]) 2824 break; 2825 } 2826 queue[cum_queue+last] = i; 2827 last++; 2828 touched[i] = PETSC_TRUE; 2829 } 2830 i = queue[cum_queue+first]; 2831 first++; 2832 for (j=xadj[i]; j<xadj[i+1]; j++) { 2833 k = adjncy[j]; 2834 if (where[k] == pid && !touched[k]) { 2835 queue[cum_queue+last] = k; 2836 last++; 2837 touched[k] = PETSC_TRUE; 2838 } 2839 } 2840 } 2841 cptr[++ncmps] = first+cum_queue; 2842 ncmps_pid++; 2843 cum_queue=cptr[ncmps]; 2844 graph->where_ncmps[n] = ncmps_pid; 2845 } 2846 graph->ncmps = ncmps; 2847 2848 PetscFunctionReturn(0); 2849 } 2850 2851