1 /* TODOLIST 2 Exact solvers: Solve local saddle point directly for very hard problems 3 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 4 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 5 - mind the problem with coarsening_factor 6 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 7 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 8 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 9 - Add levels' slot to bddc data structure and associated Set/Get functions 10 code refactoring: 11 - pick up better names for static functions 12 check log_summary for leaking (actually: 1 Vector per level ) 13 change options structure: 14 - insert BDDC into MG framework? 15 provide other ops? Ask to developers 16 remove all unused printf 17 remove // commments and adhere to PETSc code requirements 18 man pages 19 */ 20 21 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 22 Implementation of BDDC preconditioner based on: 23 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 24 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 25 26 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 27 #include <petscblaslapack.h> 28 29 /* -------------------------------------------------------------------------- */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "PCSetFromOptions_BDDC" 32 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 33 { 34 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 35 PetscErrorCode ierr; 36 37 PetscFunctionBegin; 38 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 39 /* Verbose debugging of main data structures */ 40 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 41 /* Some customization for default primal space */ 42 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use vertices only in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 43 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use constraints only in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use faces only in coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use edges only in coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 46 /* Coarse solver context */ 47 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; //order of choiches depends on ENUM defined in bddc.h 48 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 49 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 50 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 51 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsTail();CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 /* -------------------------------------------------------------------------- */ 57 EXTERN_C_BEGIN 58 #undef __FUNCT__ 59 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 60 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 61 { 62 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 63 64 PetscFunctionBegin; 65 pcbddc->coarse_problem_type = CPT; 66 PetscFunctionReturn(0); 67 } 68 EXTERN_C_END 69 70 /* -------------------------------------------------------------------------- */ 71 #undef __FUNCT__ 72 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 73 /*@ 74 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 75 76 Not collective 77 78 Input Parameters: 79 + pc - the preconditioning context 80 - CoarseProblemType - pick a better name and explain what this is 81 82 Level: intermediate 83 84 Notes: 85 Not collective but all procs must call this. 86 87 .seealso: PCBDDC 88 @*/ 89 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 90 { 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 95 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 99 /* -------------------------------------------------------------------------- */ 100 EXTERN_C_BEGIN 101 #undef __FUNCT__ 102 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 103 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 104 { 105 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 110 ierr = ISDuplicate(DirichletBoundaries,&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 111 ierr = ISCopy(DirichletBoundaries,pcbddc->DirichletBoundaries);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 /* -------------------------------------------------------------------------- */ 117 #undef __FUNCT__ 118 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 119 /*@ 120 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part of 121 Dirichlet boundaries for the global problem. 122 123 Not collective 124 125 Input Parameters: 126 + pc - the preconditioning context 127 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 128 129 Level: intermediate 130 131 Notes: 132 The sequential IS is copied; the user must destroy the IS object passed in. 133 134 .seealso: PCBDDC 135 @*/ 136 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 137 { 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 142 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 143 PetscFunctionReturn(0); 144 } 145 /* -------------------------------------------------------------------------- */ 146 EXTERN_C_BEGIN 147 #undef __FUNCT__ 148 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 149 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 150 { 151 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 152 PetscErrorCode ierr; 153 154 PetscFunctionBegin; 155 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 156 ierr = ISDuplicate(NeumannBoundaries,&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 157 ierr = ISCopy(NeumannBoundaries,pcbddc->NeumannBoundaries);CHKERRQ(ierr); 158 PetscFunctionReturn(0); 159 } 160 EXTERN_C_END 161 162 /* -------------------------------------------------------------------------- */ 163 #undef __FUNCT__ 164 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 165 /*@ 166 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part of 167 Neumann boundaries for the global problem. 168 169 Not collective 170 171 Input Parameters: 172 + pc - the preconditioning context 173 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 174 175 Level: intermediate 176 177 Notes: 178 The sequential IS is copied; the user must destroy the IS object passed in. 179 180 .seealso: PCBDDC 181 @*/ 182 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 183 { 184 PetscErrorCode ierr; 185 186 PetscFunctionBegin; 187 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 188 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 189 PetscFunctionReturn(0); 190 } 191 /* -------------------------------------------------------------------------- */ 192 EXTERN_C_BEGIN 193 #undef __FUNCT__ 194 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 195 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 196 { 197 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 198 199 PetscFunctionBegin; 200 if(pcbddc->NeumannBoundaries) { 201 *NeumannBoundaries = pcbddc->NeumannBoundaries; 202 } else { 203 *NeumannBoundaries = PETSC_NULL; 204 //SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Error in %s: Neumann boundaries not set!.\n",__FUNCT__); 205 } 206 PetscFunctionReturn(0); 207 } 208 EXTERN_C_END 209 210 /* -------------------------------------------------------------------------- */ 211 #undef __FUNCT__ 212 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 213 /*@ 214 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part of 215 Neumann boundaries for the global problem. 216 217 Not collective 218 219 Input Parameters: 220 + pc - the preconditioning context 221 222 Output Parameters: 223 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 224 225 Level: intermediate 226 227 Notes: 228 If the user has not yet provided such information, PETSC_NULL is returned. 229 230 .seealso: PCBDDC 231 @*/ 232 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 233 { 234 PetscErrorCode ierr; 235 236 PetscFunctionBegin; 237 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 238 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 /* -------------------------------------------------------------------------- */ 243 EXTERN_C_BEGIN 244 #undef __FUNCT__ 245 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 246 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 247 { 248 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 249 PetscInt i; 250 PetscErrorCode ierr; 251 252 PetscFunctionBegin; 253 /* Destroy IS if already set */ 254 for(i=0;i<pcbddc->n_ISForDofs;i++) { 255 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 256 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 257 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 258 } 259 /* allocate space then copy ISs */ 260 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 261 for(i=0;i<n_is;i++) { 262 ierr = ISDuplicate(ISForDofs[i],&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 263 ierr = ISCopy(ISForDofs[i],pcbddc->ISForDofs[i]);CHKERRQ(ierr); 264 } 265 pcbddc->n_ISForDofs=n_is; 266 PetscFunctionReturn(0); 267 } 268 EXTERN_C_END 269 270 /* -------------------------------------------------------------------------- */ 271 #undef __FUNCT__ 272 #define __FUNCT__ "PCBDDCSetDofsSplitting" 273 /*@ 274 PCBDDCSetDofsSplitting - Set index set defining how dofs are splitted. 275 276 Not collective 277 278 Input Parameters: 279 + pc - the preconditioning context 280 - n - number of index sets defining dofs spltting 281 - IS[] - array of IS describing dofs splitting 282 283 Level: intermediate 284 285 Notes: 286 Sequential ISs are copied, the user must destroy the array of IS passed in. 287 288 .seealso: PCBDDC 289 @*/ 290 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 291 { 292 PetscErrorCode ierr; 293 294 PetscFunctionBegin; 295 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 296 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "PCSetUp_BDDC" 302 /* -------------------------------------------------------------------------- */ 303 /* 304 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 305 by setting data structures and options. 306 307 Input Parameter: 308 + pc - the preconditioner context 309 310 Application Interface Routine: PCSetUp() 311 312 Notes: 313 The interface routine PCSetUp() is not usually called directly by 314 the user, but instead is called by PCApply() if necessary. 315 */ 316 PetscErrorCode PCSetUp_BDDC(PC pc) 317 { 318 PetscErrorCode ierr; 319 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 320 PC_IS *pcis = (PC_IS*)(pc->data); 321 322 PetscFunctionBegin; 323 if (!pc->setupcalled) { 324 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 325 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 326 Also, we decide to directly build the (same) Dirichlet problem */ 327 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 328 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 329 /* Set up all the "iterative substructuring" common block */ 330 ierr = PCISSetUp(pc);CHKERRQ(ierr); 331 /* Get stdout for dbg */ 332 if(pcbddc->dbg_flag) { 333 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 334 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 335 } 336 /* TODO MOVE CODE FRAGMENT */ 337 PetscInt im_active=0; 338 if(pcis->n) im_active = 1; 339 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 340 /* Analyze local interface */ 341 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 342 /* Set up local constraint matrix */ 343 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 344 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 345 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 346 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 347 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 348 } 349 PetscFunctionReturn(0); 350 } 351 352 /* -------------------------------------------------------------------------- */ 353 /* 354 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 355 356 Input Parameters: 357 . pc - the preconditioner context 358 . r - input vector (global) 359 360 Output Parameter: 361 . z - output vector (global) 362 363 Application Interface Routine: PCApply() 364 */ 365 #undef __FUNCT__ 366 #define __FUNCT__ "PCApply_BDDC" 367 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 368 { 369 PC_IS *pcis = (PC_IS*)(pc->data); 370 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 371 PetscErrorCode ierr; 372 const PetscScalar one = 1.0; 373 const PetscScalar m_one = -1.0; 374 375 /* This code is similar to that provided in nn.c for PCNN 376 NN interface preconditioner changed to BDDC 377 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 378 379 PetscFunctionBegin; 380 /* First Dirichlet solve */ 381 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 382 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 383 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 384 /* 385 Assembling right hand side for BDDC operator 386 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 387 - the interface part of the global vector z 388 */ 389 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 390 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 391 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 392 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 393 ierr = VecCopy(r,z);CHKERRQ(ierr); 394 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 395 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 396 397 /* 398 Apply interface preconditioner 399 Results are stored in: 400 - vec1_D (if needed, i.e. with prec_type = PETSC_TRUE) 401 - the interface part of the global vector z 402 */ 403 ierr = PCBDDCApplyInterfacePreconditioner(pc,z);CHKERRQ(ierr); 404 405 /* Second Dirichlet solve and assembling of output */ 406 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 407 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 408 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 409 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 410 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 411 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 412 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 413 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 414 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 415 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 416 417 PetscFunctionReturn(0); 418 419 } 420 421 /* -------------------------------------------------------------------------- */ 422 /* 423 PCBDDCApplyInterfacePreconditioner - Apply the BDDC preconditioner at the interface. 424 425 */ 426 #undef __FUNCT__ 427 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 428 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, Vec z) 429 { 430 PetscErrorCode ierr; 431 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 432 PC_IS* pcis = (PC_IS*) (pc->data); 433 const PetscScalar zero = 0.0; 434 435 PetscFunctionBegin; 436 /* Get Local boundary and apply partition of unity */ 437 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 438 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 439 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 440 441 /* Application of PHI^T */ 442 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 443 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 444 445 /* Scatter data of coarse_rhs */ 446 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 447 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 448 449 /* Local solution on R nodes */ 450 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 451 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 452 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 453 if(pcbddc->prec_type) { 454 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 456 } 457 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 458 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 459 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 460 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 461 if(pcbddc->prec_type) { 462 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 463 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 464 } 465 466 /* Coarse solution */ 467 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 469 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 472 /* Sum contributions from two levels */ 473 /* Apply partition of unity and sum boundary values */ 474 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 475 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 476 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 477 ierr = VecSet(z,zero);CHKERRQ(ierr); 478 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 479 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 480 481 PetscFunctionReturn(0); 482 } 483 484 485 /* -------------------------------------------------------------------------- */ 486 /* 487 PCBDDCSolveSaddlePoint 488 489 */ 490 #undef __FUNCT__ 491 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 492 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 493 { 494 PetscErrorCode ierr; 495 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 496 497 PetscFunctionBegin; 498 499 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 500 if(pcbddc->n_constraints) { 501 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 502 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 503 } 504 505 PetscFunctionReturn(0); 506 } 507 /* -------------------------------------------------------------------------- */ 508 /* 509 PCBDDCScatterCoarseDataBegin 510 511 */ 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 514 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 515 { 516 PetscErrorCode ierr; 517 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 518 519 PetscFunctionBegin; 520 521 switch(pcbddc->coarse_communications_type){ 522 case SCATTERS_BDDC: 523 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 524 break; 525 case GATHERS_BDDC: 526 break; 527 } 528 PetscFunctionReturn(0); 529 530 } 531 /* -------------------------------------------------------------------------- */ 532 /* 533 PCBDDCScatterCoarseDataEnd 534 535 */ 536 #undef __FUNCT__ 537 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 538 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 539 { 540 PetscErrorCode ierr; 541 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 542 PetscScalar* array_to; 543 PetscScalar* array_from; 544 MPI_Comm comm=((PetscObject)pc)->comm; 545 PetscInt i; 546 547 PetscFunctionBegin; 548 549 switch(pcbddc->coarse_communications_type){ 550 case SCATTERS_BDDC: 551 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 552 break; 553 case GATHERS_BDDC: 554 if(vec_from) VecGetArray(vec_from,&array_from); 555 if(vec_to) VecGetArray(vec_to,&array_to); 556 switch(pcbddc->coarse_problem_type){ 557 case SEQUENTIAL_BDDC: 558 if(smode == SCATTER_FORWARD) { 559 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 560 if(vec_to) { 561 for(i=0;i<pcbddc->replicated_primal_size;i++) 562 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 563 } 564 } else { 565 if(vec_from) 566 for(i=0;i<pcbddc->replicated_primal_size;i++) 567 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 568 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 569 } 570 break; 571 case REPLICATED_BDDC: 572 if(smode == SCATTER_FORWARD) { 573 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 574 for(i=0;i<pcbddc->replicated_primal_size;i++) 575 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 576 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 577 for(i=0;i<pcbddc->local_primal_size;i++) 578 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 579 } 580 break; 581 case MULTILEVEL_BDDC: 582 break; 583 case PARALLEL_BDDC: 584 break; 585 } 586 if(vec_from) VecRestoreArray(vec_from,&array_from); 587 if(vec_to) VecRestoreArray(vec_to,&array_to); 588 break; 589 } 590 PetscFunctionReturn(0); 591 592 } 593 594 /* -------------------------------------------------------------------------- */ 595 /* 596 PCDestroy_BDDC - Destroys the private context for the NN preconditioner 597 that was created with PCCreate_BDDC(). 598 599 Input Parameter: 600 . pc - the preconditioner context 601 602 Application Interface Routine: PCDestroy() 603 */ 604 #undef __FUNCT__ 605 #define __FUNCT__ "PCDestroy_BDDC" 606 PetscErrorCode PCDestroy_BDDC(PC pc) 607 { 608 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 609 PetscErrorCode ierr; 610 611 PetscFunctionBegin; 612 /* free data created by PCIS */ 613 ierr = PCISDestroy(pc);CHKERRQ(ierr); 614 /* free BDDC data */ 615 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 616 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 617 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 618 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 619 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 620 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 621 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 622 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 623 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 624 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 625 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 626 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 627 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 628 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 629 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 630 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 631 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 632 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 633 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 634 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 635 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 636 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 637 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 638 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 639 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 640 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 641 PetscInt i; 642 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 643 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 644 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 645 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 646 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 647 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 648 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 649 /* Free the private data structure that was hanging off the PC */ 650 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 /* -------------------------------------------------------------------------- */ 655 /*MC 656 PCBDDC - Balancing Domain Decomposition by Constraints. 657 658 Options Database Keys: 659 . -pcbddc ??? - 660 661 Level: intermediate 662 663 Notes: The matrix used with this preconditioner must be of type MATIS 664 665 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 666 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 667 on the subdomains). 668 669 Options for the coarse grid preconditioner can be set with - 670 Options for the Dirichlet subproblem can be set with - 671 Options for the Neumann subproblem can be set with - 672 673 Contributed by Stefano Zampini 674 675 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 676 M*/ 677 EXTERN_C_BEGIN 678 #undef __FUNCT__ 679 #define __FUNCT__ "PCCreate_BDDC" 680 PetscErrorCode PCCreate_BDDC(PC pc) 681 { 682 PetscErrorCode ierr; 683 PC_BDDC *pcbddc; 684 685 PetscFunctionBegin; 686 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 687 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 688 pc->data = (void*)pcbddc; 689 /* create PCIS data structure */ 690 ierr = PCISCreate(pc);CHKERRQ(ierr); 691 /* BDDC specific */ 692 pcbddc->coarse_vec = 0; 693 pcbddc->coarse_rhs = 0; 694 pcbddc->coarse_ksp = 0; 695 pcbddc->coarse_phi_B = 0; 696 pcbddc->coarse_phi_D = 0; 697 pcbddc->vec1_P = 0; 698 pcbddc->vec1_R = 0; 699 pcbddc->vec2_R = 0; 700 pcbddc->local_auxmat1 = 0; 701 pcbddc->local_auxmat2 = 0; 702 pcbddc->R_to_B = 0; 703 pcbddc->R_to_D = 0; 704 pcbddc->ksp_D = 0; 705 pcbddc->ksp_R = 0; 706 pcbddc->local_primal_indices = 0; 707 pcbddc->prec_type = PETSC_FALSE; 708 pcbddc->NeumannBoundaries = 0; 709 pcbddc->ISForDofs = 0; 710 pcbddc->ISForVertices = 0; 711 pcbddc->n_ISForFaces = 0; 712 pcbddc->n_ISForEdges = 0; 713 pcbddc->ConstraintMatrix = 0; 714 pcbddc->use_nnsp_true = PETSC_FALSE; 715 pcbddc->local_primal_sizes = 0; 716 pcbddc->local_primal_displacements = 0; 717 pcbddc->replicated_local_primal_indices = 0; 718 pcbddc->replicated_local_primal_values = 0; 719 pcbddc->coarse_loc_to_glob = 0; 720 pcbddc->dbg_flag = PETSC_FALSE; 721 pcbddc->coarsening_ratio = 8; 722 /* function pointers */ 723 pc->ops->apply = PCApply_BDDC; 724 pc->ops->applytranspose = 0; 725 pc->ops->setup = PCSetUp_BDDC; 726 pc->ops->destroy = PCDestroy_BDDC; 727 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 728 pc->ops->view = 0; 729 pc->ops->applyrichardson = 0; 730 pc->ops->applysymmetricleft = 0; 731 pc->ops->applysymmetricright = 0; 732 /* composing function */ 733 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 734 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 736 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 738 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 740 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 742 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 743 PetscFunctionReturn(0); 744 } 745 EXTERN_C_END 746 747 /* -------------------------------------------------------------------------- */ 748 /* 749 Create C matrix [I 0; 0 const] 750 */ 751 /* for testing only */ 752 #if defined(PETSC_MISSING_LAPACK_GESVD) 753 #define BDDC_USE_POD 754 #define PETSC_MISSING_LAPACK_GESVD 1 755 #endif 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 759 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 760 { 761 PetscErrorCode ierr; 762 PC_IS* pcis = (PC_IS*)(pc->data); 763 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 764 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 765 PetscInt *nnz,*vertices,*is_indices; 766 PetscScalar *temp_quadrature_constraint; 767 PetscInt *temp_indices,*temp_indices_to_constraint; 768 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 769 PetscInt n_constraints,n_vertices,size_of_constraint; 770 PetscReal quad_value; 771 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 772 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 773 IS *used_IS; 774 const MatType impMatType=MATSEQAIJ; 775 PetscBLASInt Bs,Bt,lwork,lierr; 776 PetscReal tol=1.0e-8; 777 Vec *localnearnullsp; 778 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 779 PetscReal *rwork,*singular_vals; 780 /* some conditional variables */ 781 #if defined(PETSC_MISSING_LAPACK_GESVD) 782 PetscScalar dot_result; 783 PetscScalar one=1.0,zero=0.0; 784 PetscInt ii; 785 #if defined(PETSC_USE_COMPLEX) 786 PetscScalar val1,val2; 787 #else 788 PetscBLASInt Bone=1; 789 #endif 790 #else 791 PetscBLASInt dummy_int; 792 PetscScalar dummy_scalar; 793 #endif 794 795 796 PetscFunctionBegin; 797 /* check if near null space is attached to global mat */ 798 if(pc->pmat->nearnullsp) { 799 nnsp_has_cnst = pc->pmat->nearnullsp->has_cnst; 800 nnsp_size = pc->pmat->nearnullsp->n; 801 } else { /* if near null space is not provided it uses constants */ 802 nnsp_has_cnst = PETSC_TRUE; 803 use_nnsp_true = PETSC_TRUE; 804 } 805 if(nnsp_has_cnst) { 806 nnsp_addone = 1; 807 } 808 /* 809 Evaluate maximum storage size needed by the procedure 810 - temp_indices will contain start index of each constraint stored as follows 811 - temp_indices_to_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 812 - temp_quadrature_constraint[temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 813 */ 814 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 815 total_counts *= (nnsp_addone+nnsp_size); 816 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 817 total_counts = 0; 818 max_size_of_constraint = 0; 819 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 820 if(i<pcbddc->n_ISForEdges){ 821 used_IS = &pcbddc->ISForEdges[i]; 822 } else { 823 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 824 } 825 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 826 total_counts += j; 827 if(j>max_size_of_constraint) max_size_of_constraint=j; 828 } 829 total_counts *= (nnsp_addone+nnsp_size); 830 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 831 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 832 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 833 rwork = 0; 834 work = 0; 835 singular_vals = 0; 836 temp_basis = 0; 837 correlation_mat = 0; 838 if(!pcbddc->use_nnsp_true) { 839 PetscScalar temp_work; 840 #if defined(PETSC_MISSING_LAPACK_GESVD) 841 /* POD */ 842 PetscInt max_n; 843 max_n = nnsp_addone+nnsp_size; 844 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 845 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 846 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 847 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 848 #if defined(PETSC_USE_COMPLEX) 849 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 850 #endif 851 /* now we evaluate the optimal workspace using query with lwork=-1 */ 852 Bt = PetscBLASIntCast(max_n); 853 lwork=-1; 854 #if !defined(PETSC_USE_COMPLEX) 855 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 856 #else 857 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 858 #endif 859 #else /* on missing GESVD */ 860 /* SVD */ 861 PetscInt max_n,min_n; 862 max_n = max_size_of_constraint; 863 min_n = nnsp_addone+nnsp_size; 864 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 865 min_n = max_size_of_constraint; 866 max_n = nnsp_addone+nnsp_size; 867 } 868 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 869 #if defined(PETSC_USE_COMPLEX) 870 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 871 #endif 872 /* now we evaluate the optimal workspace using query with lwork=-1 */ 873 lwork=-1; 874 Bs = PetscBLASIntCast(max_n); 875 Bt = PetscBLASIntCast(min_n); 876 dummy_int = Bs; 877 #if !defined(PETSC_USE_COMPLEX) 878 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 879 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 880 #else 881 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 882 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 883 #endif 884 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 885 #endif 886 /* Allocate optimal workspace */ 887 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 888 total_counts = (PetscInt)lwork; 889 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 890 } 891 /* get local part of global near null space vectors */ 892 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 893 for(k=0;k<nnsp_size;k++) { 894 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 895 ierr = VecScatterBegin(matis->ctx,pc->pmat->nearnullsp->vecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd (matis->ctx,pc->pmat->nearnullsp->vecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 } 898 /* Now we can loop on constraining sets */ 899 total_counts=0; 900 temp_indices[0]=0; 901 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 902 if(i<pcbddc->n_ISForEdges){ 903 used_IS = &pcbddc->ISForEdges[i]; 904 } else { 905 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 906 } 907 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 908 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 909 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 910 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 911 if(nnsp_has_cnst) { 912 temp_constraints++; 913 quad_value = 1.0/PetscSqrtReal((PetscReal)size_of_constraint); 914 for(j=0;j<size_of_constraint;j++) { 915 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 916 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 917 } 918 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 919 total_counts++; 920 } 921 for(k=0;k<nnsp_size;k++) { 922 temp_constraints++; 923 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 924 for(j=0;j<size_of_constraint;j++) { 925 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 926 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 927 } 928 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 929 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 930 total_counts++; 931 } 932 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 933 /* perform SVD on the constraint if use true has not be requested by the user */ 934 if(!use_nnsp_true) { 935 Bs = PetscBLASIntCast(size_of_constraint); 936 Bt = PetscBLASIntCast(temp_constraints); 937 #if defined(PETSC_MISSING_LAPACK_GESVD) 938 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 939 /* Store upper triangular part of correlation matrix */ 940 for(j=0;j<temp_constraints;j++) { 941 for(k=0;k<j+1;k++) { 942 #if defined(PETSC_USE_COMPLEX) 943 /* hand made complex dot product */ 944 dot_result = 0.0; 945 for (ii=0; ii<size_of_constraint; ii++) { 946 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 947 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 948 dot_result += val1*PetscConj(val2); 949 } 950 #else 951 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 952 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 953 #endif 954 correlation_mat[j*temp_constraints+k]=dot_result; 955 } 956 } 957 #if !defined(PETSC_USE_COMPLEX) 958 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 959 #else 960 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 961 #endif 962 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 963 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 964 j=0; 965 while( j < Bt && singular_vals[j] < tol) j++; 966 total_counts=total_counts-j; 967 if(j<temp_constraints) { 968 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 969 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 970 /* copy POD basis into used quadrature memory */ 971 for(k=0;k<Bt-j;k++) { 972 for(ii=0;ii<size_of_constraint;ii++) { 973 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 974 } 975 } 976 } 977 #else /* on missing GESVD */ 978 PetscInt min_n = temp_constraints; 979 if(min_n > size_of_constraint) min_n = size_of_constraint; 980 dummy_int = Bs; 981 #if !defined(PETSC_USE_COMPLEX) 982 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 983 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 984 #else 985 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 986 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 987 #endif 988 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 989 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 990 j=0; 991 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 992 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 993 #endif 994 } 995 } 996 n_constraints=total_counts; 997 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 998 local_primal_size = n_vertices+n_constraints; 999 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1000 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1001 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1002 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1003 for(i=0;i<n_vertices;i++) { nnz[i]= 1; } 1004 for(i=0;i<n_constraints;i++) { nnz[i+n_vertices]=temp_indices[i+1]-temp_indices[i]; } 1005 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1006 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1007 for(i=0;i<n_vertices;i++) { ierr = MatSetValue(pcbddc->ConstraintMatrix,i,vertices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); } 1008 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1009 for(i=0;i<n_constraints;i++) { 1010 j=i+n_vertices; 1011 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1012 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&j,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1013 } 1014 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1015 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1016 /* set quantities in pcbddc data structure */ 1017 pcbddc->n_vertices = n_vertices; 1018 pcbddc->n_constraints = n_constraints; 1019 pcbddc->local_primal_size = n_vertices+n_constraints; 1020 /* free workspace no longer needed */ 1021 ierr = PetscFree(rwork);CHKERRQ(ierr); 1022 ierr = PetscFree(work);CHKERRQ(ierr); 1023 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1024 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1025 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1026 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1027 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1028 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1029 ierr = PetscFree(nnz);CHKERRQ(ierr); 1030 for(k=0;k<nnsp_size;k++) { ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); } 1031 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } 1034 #ifdef BDDC_USE_POD 1035 #undef PETSC_MISSING_LAPACK_GESVD 1036 #endif 1037 /* -------------------------------------------------------------------------- */ 1038 /* 1039 PCBDDCCoarseSetUp - 1040 */ 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "PCBDDCCoarseSetUp" 1043 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1044 { 1045 PetscErrorCode ierr; 1046 1047 PC_IS* pcis = (PC_IS*)(pc->data); 1048 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1049 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1050 IS is_R_local; 1051 IS is_V_local; 1052 IS is_C_local; 1053 IS is_aux1; 1054 IS is_aux2; 1055 const VecType impVecType; 1056 const MatType impMatType; 1057 PetscInt n_R=0; 1058 PetscInt n_D=0; 1059 PetscInt n_B=0; 1060 PetscMPIInt totprocs; 1061 PetscScalar zero=0.0; 1062 PetscScalar one=1.0; 1063 PetscScalar m_one=-1.0; 1064 PetscScalar* array; 1065 PetscScalar *coarse_submat_vals; 1066 PetscInt *idx_R_local; 1067 PetscInt *idx_V_B; 1068 PetscScalar *coarsefunctions_errors; 1069 PetscScalar *constraints_errors; 1070 /* auxiliary indices */ 1071 PetscInt s,i,j,k; 1072 /* for verbose output of bddc */ 1073 PetscViewer viewer=pcbddc->dbg_viewer; 1074 PetscBool dbg_flag=pcbddc->dbg_flag; 1075 /* for counting coarse dofs */ 1076 PetscScalar coarsesum; 1077 PetscInt n_vertices=pcbddc->n_vertices,n_constraints=pcbddc->n_constraints; 1078 PetscInt size_of_constraint; 1079 PetscInt *row_cmat_indices; 1080 PetscScalar *row_cmat_values; 1081 const PetscInt *vertices; 1082 1083 PetscFunctionBegin; 1084 /* Set Non-overlapping dimensions */ 1085 n_B = pcis->n_B; n_D = pcis->n - n_B; 1086 ierr = ISGetIndices(pcbddc->ISForVertices,&vertices);CHKERRQ(ierr); 1087 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) */ 1088 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1089 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1090 for(i=0;i<n_vertices;i++) { array[ vertices[i] ] = one; } 1091 1092 for(i=0;i<n_constraints;i++) { 1093 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1094 for (j=0; j<size_of_constraint; j++) { 1095 k = row_cmat_indices[j]; 1096 if( array[k] == zero ) { 1097 array[k] = one; 1098 break; 1099 } 1100 } 1101 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1102 } 1103 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1104 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1105 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1106 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1107 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1109 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1110 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 1111 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1112 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1113 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1114 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1115 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 1116 pcbddc->coarse_size = (PetscInt) coarsesum; 1117 1118 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1119 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1120 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1121 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1122 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1123 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1124 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1125 if(dbg_flag) { 1126 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1127 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1128 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1129 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1130 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1131 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1132 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1133 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem = %d (%f)\n",pcbddc->coarse_size,coarsesum);CHKERRQ(ierr); 1134 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1135 } 1136 /* Allocate needed vectors */ 1137 /* Set Mat type for local matrices needed by BDDC precondtioner */ 1138 impMatType = MATSEQDENSE; 1139 impVecType = VECSEQ; 1140 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1141 ierr = VecDuplicate(pcis->vec1_N,&pcis->vec2_N);CHKERRQ(ierr); 1142 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1143 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1144 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1145 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1146 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1147 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1148 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1149 1150 /* Creating some index sets needed */ 1151 /* For submatrices */ 1152 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_COPY_VALUES,&is_R_local);CHKERRQ(ierr); 1153 if(n_vertices) { 1154 ierr = ISDuplicate(pcbddc->ISForVertices,&is_V_local);CHKERRQ(ierr); 1155 ierr = ISCopy(pcbddc->ISForVertices,is_V_local);CHKERRQ(ierr); 1156 } 1157 if(n_constraints) { ierr = ISCreateStride (PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); } 1158 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1159 { 1160 PetscInt *aux_array1; 1161 PetscInt *aux_array2; 1162 PetscScalar value; 1163 1164 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1165 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1166 1167 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1168 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1171 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1172 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1173 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1174 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1175 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[s] = i; s++; } } 1176 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1177 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1178 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1179 for (i=0, s=0; i<n_B; i++) { if (array[i] > one) { aux_array2[s] = i; s++; } } 1180 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1181 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1182 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1183 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1184 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1185 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1186 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1187 1188 if(pcbddc->prec_type || dbg_flag ) { 1189 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1190 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1191 for (i=0, s=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[s] = i; s++; } } 1192 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1193 ierr = ISCreateGeneral(PETSC_COMM_SELF,s,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1194 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1195 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1196 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1197 } 1198 1199 /* Check scatters */ 1200 if(dbg_flag) { 1201 1202 Vec vec_aux; 1203 1204 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1205 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_B scatter\n");CHKERRQ(ierr); 1206 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1207 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1208 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1209 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1210 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1211 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1212 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1214 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1216 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1217 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1218 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1219 1220 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1221 ierr = VecSetRandom(pcis->vec1_B,PETSC_NULL);CHKERRQ(ierr); 1222 ierr = VecDuplicate(pcis->vec1_B,&vec_aux);CHKERRQ(ierr); 1223 ierr = VecCopy(pcis->vec1_B,vec_aux);CHKERRQ(ierr); 1224 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1226 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1227 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_B);CHKERRQ(ierr); 1229 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1230 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_B REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1231 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1232 1233 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1234 ierr = PetscViewerASCIIPrintf(viewer,"Checking pcbddc->R_to_D scatter\n");CHKERRQ(ierr); 1235 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1236 1237 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1238 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1239 ierr = VecDuplicate(pcbddc->vec1_R,&vec_aux);CHKERRQ(ierr); 1240 ierr = VecCopy(pcbddc->vec1_R,vec_aux);CHKERRQ(ierr); 1241 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1242 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1244 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,vec_aux,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1245 ierr = VecAXPY(vec_aux,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1246 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1247 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D FORWARD error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1248 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1249 1250 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1251 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1252 ierr = VecDuplicate(pcis->vec1_D,&vec_aux);CHKERRQ(ierr); 1253 ierr = VecCopy(pcis->vec1_D,vec_aux);CHKERRQ(ierr); 1254 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1255 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec1_R,vec_aux,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1258 ierr = VecAXPY(vec_aux,m_one,pcis->vec1_D);CHKERRQ(ierr); 1259 ierr = VecNorm(vec_aux,NORM_INFINITY,&value);CHKERRQ(ierr); 1260 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d R_to_D REVERSE error = % 1.14e\n",PetscGlobalRank,value);CHKERRQ(ierr); 1261 ierr = VecDestroy(&vec_aux);CHKERRQ(ierr); 1262 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1263 1264 } 1265 } 1266 1267 /* vertices in boundary numbering */ 1268 if(n_vertices) { 1269 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1270 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1271 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1272 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1273 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1275 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1276 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1277 for (i=0; i<n_vertices; i++) { 1278 s=0; 1279 while (array[s] != i ) {s++;} 1280 idx_V_B[i]=s; 1281 } 1282 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1283 } 1284 1285 1286 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1287 { 1288 Mat A_RR; 1289 PC pc_temp; 1290 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1291 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1292 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1293 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1294 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1295 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1296 /* default */ 1297 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1298 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1299 /* Allow user's customization */ 1300 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1301 /* Set Up KSP for Dirichlet problem of BDDC */ 1302 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1303 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1304 ierr = MatGetSubMatrix(matis->A,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1305 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1306 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1307 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1308 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1309 //ierr = KSPSetOptionsPrefix();CHKERRQ(ierr); 1310 /* default */ 1311 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1312 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1313 /* Allow user's customization */ 1314 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1315 /* Set Up KSP for Neumann problem of BDDC */ 1316 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1317 /* check Dirichlet and Neumann solvers */ 1318 if(pcbddc->dbg_flag) { 1319 Vec temp_vec; 1320 PetscScalar value; 1321 1322 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1323 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1324 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1325 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1326 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1327 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1328 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1329 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1331 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1332 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1333 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1334 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1335 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1336 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1337 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1338 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1339 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1340 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1341 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1342 } 1343 /* free Neumann problem's matrix */ 1344 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1345 } 1346 1347 /* Assemble all remaining stuff needed to apply BDDC */ 1348 { 1349 Mat A_RV,A_VR,A_VV; 1350 Mat M1,M2; 1351 Mat C_CR; 1352 Mat AUXMAT; 1353 Vec vec1_C; 1354 Vec vec2_C; 1355 Vec vec1_V; 1356 Vec vec2_V; 1357 PetscInt *nnz; 1358 PetscInt *auxindices; 1359 PetscInt index; 1360 PetscScalar* array2; 1361 MatFactorInfo matinfo; 1362 1363 /* Allocating some extra storage just to be safe */ 1364 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1365 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1366 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1367 1368 /* some work vectors on vertices and/or constraints */ 1369 if(n_vertices) { 1370 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1371 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1372 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1373 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1374 } 1375 if(pcbddc->n_constraints) { 1376 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1377 ierr = VecSetSizes(vec1_C,pcbddc->n_constraints,pcbddc->n_constraints);CHKERRQ(ierr); 1378 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1379 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1380 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1381 } 1382 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1383 if(n_constraints) { 1384 /* some work vectors */ 1385 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1386 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1387 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1388 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1389 1390 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1391 for(i=0;i<n_constraints;i++) { 1392 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1393 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1394 /* Get row of constraint matrix in R numbering */ 1395 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1396 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1397 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1398 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1399 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1400 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1401 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1402 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1403 /* Solve for row of constraint matrix in R numbering */ 1404 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1405 /* Set values */ 1406 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1407 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1408 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1409 } 1410 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 1413 /* Create Constraint matrix on R nodes: C_{CR} */ 1414 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1415 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1416 1417 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1418 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1419 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1420 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1421 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1422 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1423 1424 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1425 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1426 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1427 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1428 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1429 for(i=0;i<n_constraints;i++) { 1430 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1431 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1432 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1433 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1434 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1435 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1436 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1437 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1438 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1439 } 1440 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1441 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1442 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1443 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1444 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1445 1446 } 1447 1448 /* Get submatrices from subdomain matrix */ 1449 if(n_vertices){ 1450 ierr = MatGetSubMatrix(matis->A,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1451 ierr = MatGetSubMatrix(matis->A,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1452 ierr = MatGetSubMatrix(matis->A,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1453 /* Assemble M2 = A_RR^{-1}A_RV */ 1454 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1455 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1456 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1457 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1458 for(i=0;i<n_vertices;i++) { 1459 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1460 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1461 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1462 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1463 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1464 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1465 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1466 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1467 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1468 } 1469 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1470 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1471 } 1472 1473 /* Matrix of coarse basis functions (local) */ 1474 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1475 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1476 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1477 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1478 if(pcbddc->prec_type || dbg_flag ) { 1479 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1480 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1481 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1482 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1483 } 1484 1485 if(dbg_flag) { 1486 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1487 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1488 } 1489 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1490 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1491 1492 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1493 for(i=0;i<n_vertices;i++){ 1494 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1495 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1496 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1497 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1498 /* solution of saddle point problem */ 1499 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1500 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1501 if(n_constraints) { 1502 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1503 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1504 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1505 } 1506 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1507 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1508 1509 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1510 /* coarse basis functions */ 1511 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1512 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1515 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1516 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1517 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1518 if( pcbddc->prec_type || dbg_flag ) { 1519 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1522 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1523 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1524 } 1525 /* subdomain contribution to coarse matrix */ 1526 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1527 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } //WARNING -> column major ordering 1528 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1529 if(n_constraints) { 1530 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1531 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } //WARNING -> column major ordering 1532 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1533 } 1534 1535 if( dbg_flag ) { 1536 /* assemble subdomain vector on nodes */ 1537 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1538 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1539 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1540 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1541 array[ vertices[i] ] = one; 1542 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1543 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1544 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1545 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1546 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1547 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1548 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1549 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1550 if(n_constraints) { 1551 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1552 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1553 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1554 } 1555 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1556 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 1557 /* check saddle point solution */ 1558 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1559 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1560 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 1561 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1562 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1563 array[i]=array[i]+m_one; /* shift by the identity matrix */ 1564 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1565 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 1566 } 1567 } 1568 1569 for(i=0;i<n_constraints;i++){ 1570 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 1571 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 1572 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 1573 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 1574 /* solution of saddle point problem */ 1575 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 1576 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 1577 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1578 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 1579 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1580 /* coarse basis functions */ 1581 index=i+n_vertices; 1582 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1583 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1586 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1587 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1588 if( pcbddc->prec_type || dbg_flag ) { 1589 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1591 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1592 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 1593 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1594 } 1595 /* subdomain contribution to coarse matrix */ 1596 if(n_vertices) { 1597 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1598 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} //WARNING -> column major ordering 1599 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1600 } 1601 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1602 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} //WARNING -> column major ordering 1603 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1604 1605 if( dbg_flag ) { 1606 /* assemble subdomain vector on nodes */ 1607 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1608 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1609 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1610 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 1611 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1612 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1613 /* assemble subdomain vector of lagrange multipliers */ 1614 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1615 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1616 if( n_vertices) { 1617 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1618 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 1619 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1620 } 1621 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1622 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 1623 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1624 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1625 /* check saddle point solution */ 1626 ierr = MatMult(matis->A,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 1627 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 1628 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 1629 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 1630 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1631 array[index]=array[index]+m_one; /* shift by the identity matrix */ 1632 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 1633 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 1634 } 1635 } 1636 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1637 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1638 if( pcbddc->prec_type || dbg_flag ) { 1639 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1640 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 } 1642 /* Checking coarse_sub_mat and coarse basis functios */ 1643 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1644 if(dbg_flag) { 1645 1646 Mat coarse_sub_mat; 1647 Mat TM1,TM2,TM3,TM4; 1648 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 1649 const MatType checkmattype=MATSEQAIJ; 1650 PetscScalar value; 1651 1652 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1653 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1654 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1655 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1656 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1657 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1658 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1659 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 1660 1661 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1662 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 1663 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1664 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1665 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1666 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1667 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1668 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1669 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1670 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1672 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1673 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1674 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1675 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1676 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 1677 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 1678 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 1679 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 1680 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 1681 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 1682 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 1683 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 1684 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1685 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1686 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1687 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1688 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1689 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1690 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1691 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1692 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1693 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1694 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1695 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1696 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 1697 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 1698 } 1699 1700 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 1701 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 1702 /* free memory */ 1703 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 1704 ierr = PetscFree(auxindices);CHKERRQ(ierr); 1705 ierr = PetscFree(nnz);CHKERRQ(ierr); 1706 if(n_vertices) { 1707 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 1708 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 1709 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1710 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1711 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1712 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1713 } 1714 if(pcbddc->n_constraints) { 1715 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 1716 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 1717 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1718 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1719 } 1720 } 1721 /* free memory */ 1722 if(n_vertices) { 1723 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1724 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 1725 } 1726 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1727 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 1728 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1729 1730 PetscFunctionReturn(0); 1731 } 1732 1733 /* -------------------------------------------------------------------------- */ 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 1737 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 1738 { 1739 1740 1741 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1742 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1743 PC_IS *pcis = (PC_IS*)pc->data; 1744 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 1745 MPI_Comm coarse_comm; 1746 1747 /* common to all choiches */ 1748 PetscScalar *temp_coarse_mat_vals; 1749 PetscScalar *ins_coarse_mat_vals; 1750 PetscInt *ins_local_primal_indices; 1751 PetscMPIInt *localsizes2,*localdispl2; 1752 PetscMPIInt size_prec_comm; 1753 PetscMPIInt rank_prec_comm; 1754 PetscMPIInt active_rank=MPI_PROC_NULL; 1755 PetscMPIInt master_proc=0; 1756 PetscInt ins_local_primal_size; 1757 /* specific to MULTILEVEL_BDDC */ 1758 PetscMPIInt *ranks_recv; 1759 PetscMPIInt count_recv=0; 1760 PetscMPIInt rank_coarse_proc_send_to; 1761 PetscMPIInt coarse_color = MPI_UNDEFINED; 1762 ISLocalToGlobalMapping coarse_ISLG; 1763 /* some other variables */ 1764 PetscErrorCode ierr; 1765 const MatType coarse_mat_type; 1766 const PCType coarse_pc_type; 1767 const KSPType coarse_ksp_type; 1768 PC pc_temp; 1769 PetscInt i,j,k,bs; 1770 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 1771 /* verbose output viewer */ 1772 PetscViewer viewer=pcbddc->dbg_viewer; 1773 PetscBool dbg_flag=pcbddc->dbg_flag; 1774 1775 PetscFunctionBegin; 1776 1777 ins_local_primal_indices = 0; 1778 ins_coarse_mat_vals = 0; 1779 localsizes2 = 0; 1780 localdispl2 = 0; 1781 temp_coarse_mat_vals = 0; 1782 coarse_ISLG = 0; 1783 1784 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 1785 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 1786 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 1787 1788 /* Assign global numbering to coarse dofs */ 1789 { 1790 PetscScalar one=1.,zero=0.; 1791 PetscScalar *array; 1792 PetscMPIInt *auxlocal_primal; 1793 PetscMPIInt *auxglobal_primal; 1794 PetscMPIInt *all_auxglobal_primal; 1795 PetscMPIInt *all_auxglobal_primal_dummy; 1796 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 1797 PetscInt *vertices,*row_cmat_indices; 1798 PetscInt size_of_constraint; 1799 1800 /* Construct needed data structures for message passing */ 1801 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 1802 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 1803 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 1804 /* Gather local_primal_size information for all processes */ 1805 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 1806 pcbddc->replicated_primal_size = 0; 1807 for (i=0; i<size_prec_comm; i++) { 1808 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 1809 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 1810 } 1811 if(rank_prec_comm == 0) { 1812 /* allocate some auxiliary space */ 1813 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 1814 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 1815 } 1816 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 1817 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 1818 1819 /* First let's count coarse dofs: note that we allow to have a constraint on a subdomain and not its counterpart on the neighbour subdomain (if user wants) 1820 This code fragment assumes that the number of local constraints per connected component 1821 is not greater than the number of nodes defined for the connected component 1822 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 1823 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 1824 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1825 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1826 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1827 for(i=0;i<pcbddc->n_vertices;i++) { /* note that pcbddc->n_vertices can be different from size of ISForVertices */ 1828 array[ vertices[i] ] = one; 1829 auxlocal_primal[i] = vertices[i]; 1830 } 1831 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&vertices);CHKERRQ(ierr); 1832 for(i=0;i<pcbddc->n_constraints;i++) { 1833 ierr = MatGetRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1834 for (j=0; j<size_of_constraint; j++) { 1835 k = row_cmat_indices[j]; 1836 if( array[k] == zero ) { 1837 array[k] = one; 1838 auxlocal_primal[i+pcbddc->n_vertices] = k; 1839 break; 1840 } 1841 } 1842 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,pcbddc->n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1843 } 1844 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1845 1846 /* Now assign them a global numbering */ 1847 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 1848 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 1849 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 1850 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1851 1852 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 1853 /* It implements a function similar to PetscSortRemoveDupsInt */ 1854 if(rank_prec_comm==0) { 1855 /* dummy argument since PetscSortMPIInt doesn't exist! */ 1856 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 1857 k=1; 1858 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 1859 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 1860 if(j != all_auxglobal_primal[i] ) { 1861 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 1862 k++; 1863 j=all_auxglobal_primal[i]; 1864 } 1865 } 1866 } else { 1867 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 1868 } 1869 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 1870 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 1871 1872 /* Now get global coarse numbering of local primal nodes */ 1873 for(i=0;i<pcbddc->local_primal_size;i++) { 1874 k=0; 1875 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 1876 pcbddc->local_primal_indices[i]=k; 1877 } 1878 if(dbg_flag) { 1879 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1880 ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 1881 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1882 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1883 for(i=0;i<pcbddc->local_primal_size;i++) { 1884 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 1885 } 1886 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1887 } 1888 /* free allocated memory */ 1889 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 1890 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 1891 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 1892 if(rank_prec_comm == 0) { 1893 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 1894 } 1895 } 1896 1897 /* adapt coarse problem type */ 1898 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 1899 pcbddc->coarse_problem_type = PARALLEL_BDDC; 1900 1901 switch(pcbddc->coarse_problem_type){ 1902 1903 case(MULTILEVEL_BDDC): //we define a coarse mesh where subdomains are elements 1904 { 1905 /* we need additional variables */ 1906 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 1907 MetisInt *metis_coarse_subdivision; 1908 MetisInt options[METIS_NOPTIONS]; 1909 PetscMPIInt size_coarse_comm,rank_coarse_comm; 1910 PetscMPIInt procs_jumps_coarse_comm; 1911 PetscMPIInt *coarse_subdivision; 1912 PetscMPIInt *total_count_recv; 1913 PetscMPIInt *total_ranks_recv; 1914 PetscMPIInt *displacements_recv; 1915 PetscMPIInt *my_faces_connectivity; 1916 PetscMPIInt *petsc_faces_adjncy; 1917 MetisInt *faces_adjncy; 1918 MetisInt *faces_xadj; 1919 PetscMPIInt *number_of_faces; 1920 PetscMPIInt *faces_displacements; 1921 PetscInt *array_int; 1922 PetscMPIInt my_faces=0; 1923 PetscMPIInt total_faces=0; 1924 PetscInt ranks_stretching_ratio; 1925 1926 /* define some quantities */ 1927 pcbddc->coarse_communications_type = SCATTERS_BDDC; 1928 coarse_mat_type = MATIS; 1929 coarse_pc_type = PCBDDC; 1930 coarse_ksp_type = KSPCHEBYCHEV; 1931 1932 /* details of coarse decomposition */ 1933 n_subdomains = pcbddc->active_procs; 1934 n_parts = n_subdomains/pcbddc->coarsening_ratio; 1935 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 1936 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 1937 1938 printf("Coarse algorithm details: \n"); 1939 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1)); 1940 1941 /* build CSR graph of subdomains' connectivity through faces */ 1942 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 1943 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 1944 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 1945 for(j=0;j<pcis->n_shared[i];j++){ 1946 array_int[ pcis->shared[i][j] ]+=1; 1947 } 1948 } 1949 for(i=1;i<pcis->n_neigh;i++){ 1950 for(j=0;j<pcis->n_shared[i];j++){ 1951 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1952 my_faces++; 1953 break; 1954 } 1955 } 1956 } 1957 //printf("I found %d faces.\n",my_faces); 1958 1959 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 1960 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 1961 my_faces=0; 1962 for(i=1;i<pcis->n_neigh;i++){ 1963 for(j=0;j<pcis->n_shared[i];j++){ 1964 if(array_int[ pcis->shared[i][j] ] == 1 ){ 1965 my_faces_connectivity[my_faces]=pcis->neigh[i]; 1966 my_faces++; 1967 break; 1968 } 1969 } 1970 } 1971 if(rank_prec_comm == master_proc) { 1972 //printf("I found %d total faces.\n",total_faces); 1973 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 1974 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 1975 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 1976 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 1977 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 1978 } 1979 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1980 if(rank_prec_comm == master_proc) { 1981 faces_xadj[0]=0; 1982 faces_displacements[0]=0; 1983 j=0; 1984 for(i=1;i<size_prec_comm+1;i++) { 1985 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 1986 if(number_of_faces[i-1]) { 1987 j++; 1988 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 1989 } 1990 } 1991 printf("The J I count is %d and should be %d\n",j,n_subdomains); 1992 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces); 1993 } 1994 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 1995 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 1996 ierr = PetscFree(array_int);CHKERRQ(ierr); 1997 if(rank_prec_comm == master_proc) { 1998 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 1999 printf("This is the face connectivity (actual ranks)\n"); 2000 for(i=0;i<n_subdomains;i++){ 2001 printf("proc %d is connected with \n",i); 2002 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2003 printf("%d ",faces_adjncy[j]); 2004 printf("\n"); 2005 } 2006 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2007 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2008 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2009 } 2010 2011 if( rank_prec_comm == master_proc ) { 2012 2013 PetscInt heuristic_for_metis=3; 2014 2015 ncon=1; 2016 faces_nvtxs=n_subdomains; 2017 /* partition graoh induced by face connectivity */ 2018 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2019 ierr = METIS_SetDefaultOptions(options); 2020 /* we need a contiguous partition of the coarse mesh */ 2021 options[METIS_OPTION_CONTIG]=1; 2022 options[METIS_OPTION_DBGLVL]=1; 2023 options[METIS_OPTION_NITER]=30; 2024 //options[METIS_OPTION_NCUTS]=1; 2025 printf("METIS PART GRAPH\n"); 2026 if(n_subdomains>n_parts*heuristic_for_metis) { 2027 printf("Using Kway\n"); 2028 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2029 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2030 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2031 } else { 2032 printf("Using Recursive\n"); 2033 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2034 } 2035 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2036 printf("Partition done!\n"); 2037 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2038 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2039 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2040 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2041 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2042 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2043 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2044 } 2045 2046 /* Create new communicator for coarse problem splitting the old one */ 2047 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2048 coarse_color=0; //for communicator splitting 2049 active_rank=rank_prec_comm; //for insertion of matrix values 2050 } 2051 // procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2052 // key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator 2053 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2054 2055 if( coarse_color == 0 ) { 2056 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2057 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2058 printf("Details of coarse comm\n"); 2059 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2060 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts); 2061 } else { 2062 rank_coarse_comm = MPI_PROC_NULL; 2063 } 2064 2065 /* master proc take care of arranging and distributing coarse informations */ 2066 if(rank_coarse_comm == master_proc) { 2067 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2068 //ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2069 //ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 2070 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2071 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2072 /* some initializations */ 2073 displacements_recv[0]=0; 2074 //PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero 2075 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2076 for(j=0;j<size_coarse_comm;j++) 2077 for(i=0;i<size_prec_comm;i++) 2078 if(coarse_subdivision[i]==j) 2079 total_count_recv[j]++; 2080 /* displacements needed for scatterv of total_ranks_recv */ 2081 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2082 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2083 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2084 for(j=0;j<size_coarse_comm;j++) { 2085 for(i=0;i<size_prec_comm;i++) { 2086 if(coarse_subdivision[i]==j) { 2087 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2088 total_count_recv[j]+=1; 2089 } 2090 } 2091 } 2092 for(j=0;j<size_coarse_comm;j++) { 2093 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2094 for(i=0;i<total_count_recv[j];i++) { 2095 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2096 } 2097 printf("\n"); 2098 } 2099 2100 /* identify new decomposition in terms of ranks in the old communicator */ 2101 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2102 printf("coarse_subdivision in old end new ranks\n"); 2103 for(i=0;i<size_prec_comm;i++) 2104 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2105 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2106 } else { 2107 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2108 } 2109 printf("\n"); 2110 } 2111 2112 /* Scatter new decomposition for send details */ 2113 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2114 /* Scatter receiving details to members of coarse decomposition */ 2115 if( coarse_color == 0) { 2116 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2117 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2118 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2119 } 2120 2121 //printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2122 //if(coarse_color == 0) { 2123 // printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2124 // for(i=0;i<count_recv;i++) 2125 // printf("%d ",ranks_recv[i]); 2126 // printf("\n"); 2127 //} 2128 2129 if(rank_prec_comm == master_proc) { 2130 //ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2131 //ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2132 //ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 2133 free(coarse_subdivision); 2134 free(total_count_recv); 2135 free(total_ranks_recv); 2136 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2137 } 2138 break; 2139 } 2140 2141 case(REPLICATED_BDDC): 2142 2143 pcbddc->coarse_communications_type = GATHERS_BDDC; 2144 coarse_mat_type = MATSEQAIJ; 2145 coarse_pc_type = PCLU; 2146 coarse_ksp_type = KSPPREONLY; 2147 coarse_comm = PETSC_COMM_SELF; 2148 active_rank = rank_prec_comm; 2149 break; 2150 2151 case(PARALLEL_BDDC): 2152 2153 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2154 coarse_mat_type = MATMPIAIJ; 2155 coarse_pc_type = PCREDUNDANT; 2156 coarse_ksp_type = KSPPREONLY; 2157 coarse_comm = prec_comm; 2158 active_rank = rank_prec_comm; 2159 break; 2160 2161 case(SEQUENTIAL_BDDC): 2162 pcbddc->coarse_communications_type = GATHERS_BDDC; 2163 coarse_mat_type = MATSEQAIJ; 2164 coarse_pc_type = PCLU; 2165 coarse_ksp_type = KSPPREONLY; 2166 coarse_comm = PETSC_COMM_SELF; 2167 active_rank = master_proc; 2168 break; 2169 } 2170 2171 switch(pcbddc->coarse_communications_type){ 2172 2173 case(SCATTERS_BDDC): 2174 { 2175 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2176 2177 PetscMPIInt send_size; 2178 PetscInt *aux_ins_indices; 2179 PetscInt ii,jj; 2180 MPI_Request *requests; 2181 2182 /* allocate auxiliary space */ 2183 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2184 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2185 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2186 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2187 /* allocate stuffs for message massing */ 2188 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2189 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2190 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2191 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2192 /* fill up quantities */ 2193 j=0; 2194 for(i=0;i<count_recv;i++){ 2195 ii = ranks_recv[i]; 2196 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2197 localdispl2[i]=j; 2198 j+=localsizes2[i]; 2199 jj = pcbddc->local_primal_displacements[ii]; 2200 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; // it counts the coarse subdomains sharing the coarse node 2201 } 2202 //printf("aux_ins_indices 1\n"); 2203 //for(i=0;i<pcbddc->coarse_size;i++) 2204 // printf("%d ",aux_ins_indices[i]); 2205 //printf("\n"); 2206 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2207 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2208 /* evaluate how many values I will insert in coarse mat */ 2209 ins_local_primal_size=0; 2210 for(i=0;i<pcbddc->coarse_size;i++) 2211 if(aux_ins_indices[i]) 2212 ins_local_primal_size++; 2213 /* evaluate indices I will insert in coarse mat */ 2214 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2215 j=0; 2216 for(i=0;i<pcbddc->coarse_size;i++) 2217 if(aux_ins_indices[i]) 2218 ins_local_primal_indices[j++]=i; 2219 /* use aux_ins_indices to realize a global to local mapping */ 2220 j=0; 2221 for(i=0;i<pcbddc->coarse_size;i++){ 2222 if(aux_ins_indices[i]==0){ 2223 aux_ins_indices[i]=-1; 2224 } else { 2225 aux_ins_indices[i]=j; 2226 j++; 2227 } 2228 } 2229 2230 //printf("New details localsizes2 localdispl2\n"); 2231 //for(i=0;i<count_recv;i++) 2232 // printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2233 //printf("\n"); 2234 //printf("aux_ins_indices 2\n"); 2235 //for(i=0;i<pcbddc->coarse_size;i++) 2236 // printf("%d ",aux_ins_indices[i]); 2237 //printf("\n"); 2238 //printf("ins_local_primal_indices\n"); 2239 //for(i=0;i<ins_local_primal_size;i++) 2240 // printf("%d ",ins_local_primal_indices[i]); 2241 //printf("\n"); 2242 //printf("coarse_submat_vals\n"); 2243 //for(i=0;i<pcbddc->local_primal_size;i++) 2244 // for(j=0;j<pcbddc->local_primal_size;j++) 2245 // printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2246 //printf("\n"); 2247 2248 /* processes partecipating in coarse problem receive matrix data from their friends */ 2249 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2250 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2251 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2252 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2253 } 2254 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2255 2256 //if(coarse_color == 0) { 2257 // printf("temp_coarse_mat_vals\n"); 2258 // for(k=0;k<count_recv;k++){ 2259 // printf("---- %d ----\n",ranks_recv[k]); 2260 // for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2261 // for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2262 // printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2263 // printf("\n"); 2264 // } 2265 //} 2266 /* calculate data to insert in coarse mat */ 2267 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2268 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2269 2270 PetscMPIInt rr,kk,lps,lpd; 2271 PetscInt row_ind,col_ind; 2272 for(k=0;k<count_recv;k++){ 2273 rr = ranks_recv[k]; 2274 kk = localdispl2[k]; 2275 lps = pcbddc->local_primal_sizes[rr]; 2276 lpd = pcbddc->local_primal_displacements[rr]; 2277 //printf("Inserting the following indices (received from %d)\n",rr); 2278 for(j=0;j<lps;j++){ 2279 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2280 for(i=0;i<lps;i++){ 2281 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2282 //printf("%d %d\n",row_ind,col_ind); 2283 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2284 } 2285 } 2286 } 2287 ierr = PetscFree(requests);CHKERRQ(ierr); 2288 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2289 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2290 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2291 2292 /* create local to global mapping needed by coarse MATIS */ 2293 { 2294 IS coarse_IS; 2295 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2296 coarse_comm = prec_comm; 2297 active_rank=rank_prec_comm; 2298 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2299 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2300 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2301 } 2302 } 2303 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2304 /* arrays for values insertion */ 2305 ins_local_primal_size = pcbddc->local_primal_size; 2306 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2307 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2308 for(j=0;j<ins_local_primal_size;j++){ 2309 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2310 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2311 } 2312 } 2313 break; 2314 2315 } 2316 2317 case(GATHERS_BDDC): 2318 { 2319 2320 PetscMPIInt mysize,mysize2; 2321 2322 if(rank_prec_comm==active_rank) { 2323 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2324 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2325 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2326 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2327 /* arrays for values insertion */ 2328 ins_local_primal_size = pcbddc->coarse_size; 2329 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2330 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2331 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2332 localdispl2[0]=0; 2333 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2334 j=0; 2335 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2336 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2337 } 2338 2339 mysize=pcbddc->local_primal_size; 2340 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2341 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2342 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2343 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2344 } else { 2345 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2346 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2347 } 2348 2349 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2350 if(rank_prec_comm==active_rank) { 2351 PetscInt offset,offset2,row_ind,col_ind; 2352 for(j=0;j<ins_local_primal_size;j++){ 2353 ins_local_primal_indices[j]=j; 2354 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2355 } 2356 for(k=0;k<size_prec_comm;k++){ 2357 offset=pcbddc->local_primal_displacements[k]; 2358 offset2=localdispl2[k]; 2359 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2360 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2361 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2362 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2363 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2364 } 2365 } 2366 } 2367 } 2368 break; 2369 }//switch on coarse problem and communications associated with finished 2370 } 2371 2372 /* Now create and fill up coarse matrix */ 2373 if( rank_prec_comm == active_rank ) { 2374 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2375 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2376 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2377 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2378 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2379 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2380 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2381 } else { 2382 Mat matis_coarse_local_mat; 2383 ierr = MatCreateIS(coarse_comm,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2384 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2385 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2386 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2387 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); //local values stored in column major 2388 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2389 } 2390 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2391 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2392 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2395 Mat matis_coarse_local_mat; 2396 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2397 ierr = MatSetBlockSize(matis_coarse_local_mat,bs);CHKERRQ(ierr); 2398 } 2399 2400 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2401 /* Preconditioner for coarse problem */ 2402 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2403 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2404 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2405 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2406 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2407 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2408 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2409 /* Allow user's customization */ 2410 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2411 /* Set Up PC for coarse problem BDDC */ 2412 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2413 if(dbg_flag) { 2414 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2415 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2416 } 2417 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2418 } 2419 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2420 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2421 if(dbg_flag) { 2422 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2423 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2424 } 2425 } 2426 } 2427 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2428 IS local_IS,global_IS; 2429 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2430 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2431 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2432 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2433 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2434 } 2435 2436 2437 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2438 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2439 PetscScalar m_one=-1.0; 2440 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2441 const KSPType check_ksp_type=KSPGMRES; 2442 2443 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2444 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2445 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2446 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2447 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2448 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2449 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2450 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2451 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2452 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2453 if(dbg_flag) { 2454 kappa_2=lambda_max/lambda_min; 2455 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2456 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2457 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2458 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2459 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2460 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2461 } 2462 /* restore coarse ksp to default values */ 2463 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2464 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2465 ierr = KSPChebychevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2466 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2467 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2468 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2469 } 2470 2471 /* free data structures no longer needed */ 2472 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2473 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2474 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2475 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2476 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2477 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2478 2479 PetscFunctionReturn(0); 2480 } 2481 2482 #undef __FUNCT__ 2483 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2484 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2485 { 2486 2487 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2488 PC_IS *pcis = (PC_IS*)pc->data; 2489 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2490 PCBDDCGraph mat_graph; 2491 Mat mat_adj; 2492 PetscInt **neighbours_set; 2493 PetscInt *queue_in_global_numbering; 2494 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2495 PetscInt total_counts,nodes_touched=0,where_values=1,vertex_size; 2496 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0; 2497 PetscBool same_set,flg_row; 2498 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE; 2499 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2500 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2501 const PetscInt *neumann_nodes; 2502 const PetscInt *dirichlet_nodes; 2503 2504 PetscFunctionBegin; 2505 /* allocate and initialize needed graph structure */ 2506 ierr = PetscMalloc(sizeof(*mat_graph),&mat_graph);CHKERRQ(ierr); 2507 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2508 /* ierr = MatDuplicate(matis->A,MAT_COPY_VALUES,&mat_adj);CHKERRQ(ierr); */ 2509 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 2510 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called from PCBDDCManageLocalBoundaries.\n"); 2511 i = mat_graph->nvtxs; 2512 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2513 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2514 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2515 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2516 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2517 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2518 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2519 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 2520 2521 /* Setting dofs splitting in mat_graph->which_dof */ 2522 if(pcbddc->n_ISForDofs) { /* get information about dofs' splitting if provided by the user */ 2523 PetscInt *is_indices; 2524 PetscInt is_size; 2525 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2526 ierr = ISGetSize(pcbddc->ISForDofs[i],&is_size);CHKERRQ(ierr); 2527 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2528 for(j=0;j<is_size;j++) { 2529 mat_graph->which_dof[is_indices[j]]=i; 2530 } 2531 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2532 } 2533 /* use mat block size as vertex size */ 2534 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2535 } else { /* otherwise it assumes a constant block size */ 2536 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2537 for(i=0;i<mat_graph->nvtxs/bs;i++) { 2538 for(s=0;s<bs;s++) { 2539 mat_graph->which_dof[i*bs+s]=s; 2540 } 2541 } 2542 vertex_size=1; 2543 } 2544 /* count number of neigh per node */ 2545 total_counts=0; 2546 for(i=1;i<pcis->n_neigh;i++){ 2547 s=pcis->n_shared[i]; 2548 total_counts+=s; 2549 for(j=0;j<s;j++){ 2550 mat_graph->count[pcis->shared[i][j]] += 1; 2551 } 2552 } 2553 /* Take into account Neumann data -> it increments number of sharing subdomains for all but faces nodes lying on the interface */ 2554 if(pcbddc->NeumannBoundaries) { 2555 ierr = ISGetSize(pcbddc->NeumannBoundaries,&neumann_bsize);CHKERRQ(ierr); 2556 ierr = ISGetIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2557 for(i=0;i<neumann_bsize;i++){ 2558 iindex = neumann_nodes[i]; 2559 if(mat_graph->count[iindex] > 1){ 2560 mat_graph->count[iindex]+=1; 2561 total_counts++; 2562 } 2563 } 2564 } 2565 /* allocate space for storing the set of neighbours of each node */ 2566 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&neighbours_set);CHKERRQ(ierr); 2567 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&neighbours_set[0]);CHKERRQ(ierr); } 2568 for(i=1;i<mat_graph->nvtxs;i++) neighbours_set[i]=neighbours_set[i-1]+mat_graph->count[i-1]; 2569 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2570 for(i=1;i<pcis->n_neigh;i++){ 2571 s=pcis->n_shared[i]; 2572 for(j=0;j<s;j++) { 2573 k=pcis->shared[i][j]; 2574 neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 2575 mat_graph->count[k]+=1; 2576 } 2577 } 2578 /* set -1 fake neighbour to mimic Neumann boundary */ 2579 if(pcbddc->NeumannBoundaries) { 2580 for(i=0;i<neumann_bsize;i++){ 2581 iindex = neumann_nodes[i]; 2582 if(mat_graph->count[iindex] > 1){ 2583 neighbours_set[iindex][mat_graph->count[iindex]] = -1; 2584 mat_graph->count[iindex]+=1; 2585 } 2586 } 2587 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,&neumann_nodes);CHKERRQ(ierr); 2588 } 2589 /* sort set of sharing subdomains (needed for comparison below) */ 2590 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],neighbours_set[i]);CHKERRQ(ierr); } 2591 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 2592 if(pcbddc->DirichletBoundaries) { 2593 ierr = ISGetSize(pcbddc->DirichletBoundaries,&dirichlet_bsize);CHKERRQ(ierr); 2594 ierr = ISGetIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2595 for(i=0;i<dirichlet_bsize;i++){ 2596 mat_graph->count[dirichlet_nodes[i]]=0; 2597 } 2598 ierr = ISRestoreIndices(pcbddc->DirichletBoundaries,&dirichlet_nodes);CHKERRQ(ierr); 2599 } 2600 for(i=0;i<mat_graph->nvtxs;i++){ 2601 if(!mat_graph->count[i]){ /* interior nodes */ 2602 mat_graph->touched[i]=PETSC_TRUE; 2603 mat_graph->where[i]=0; 2604 nodes_touched++; 2605 } 2606 } 2607 mat_graph->ncmps = 0; 2608 while(nodes_touched<mat_graph->nvtxs) { 2609 /* find first untouched node in local ordering */ 2610 i=0; 2611 while(mat_graph->touched[i]) i++; 2612 mat_graph->touched[i]=PETSC_TRUE; 2613 mat_graph->where[i]=where_values; 2614 nodes_touched++; 2615 /* now find all other nodes having the same set of sharing subdomains */ 2616 for(j=i+1;j<mat_graph->nvtxs;j++){ 2617 /* check for same number of sharing subdomains and dof number */ 2618 if(mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 2619 /* check for same set of sharing subdomains */ 2620 same_set=PETSC_TRUE; 2621 for(k=0;k<mat_graph->count[j];k++){ 2622 if(neighbours_set[i][k]!=neighbours_set[j][k]) { 2623 same_set=PETSC_FALSE; 2624 } 2625 } 2626 /* I found a friend of mine */ 2627 if(same_set) { 2628 mat_graph->where[j]=where_values; 2629 mat_graph->touched[j]=PETSC_TRUE; 2630 nodes_touched++; 2631 } 2632 } 2633 } 2634 where_values++; 2635 } 2636 where_values--; if(where_values<0) where_values=0; 2637 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2638 /* Find connected components defined on the shared interface */ 2639 if(where_values) { 2640 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2641 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 2642 for(i=0;i<mat_graph->ncmps;i++) { 2643 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2644 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2645 } 2646 } 2647 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 2648 for(i=0;i<where_values;i++) { 2649 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 2650 if(mat_graph->where_ncmps[i]>1) { 2651 adapt_interface=1; 2652 break; 2653 } 2654 } 2655 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 2656 if(where_values && adapt_interface_reduced) { 2657 2658 printf("Adapting Interface\n"); 2659 2660 PetscInt sum_requests=0,my_rank; 2661 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 2662 PetscInt temp_buffer_size,ins_val,global_where_counter; 2663 PetscInt *cum_recv_counts; 2664 PetscInt *where_to_nodes_indices; 2665 PetscInt *petsc_buffer; 2666 PetscMPIInt *recv_buffer; 2667 PetscMPIInt *recv_buffer_where; 2668 PetscMPIInt *send_buffer; 2669 PetscMPIInt size_of_send; 2670 PetscInt *sizes_of_sends; 2671 MPI_Request *send_requests; 2672 MPI_Request *recv_requests; 2673 PetscInt *where_cc_adapt; 2674 PetscInt **temp_buffer; 2675 PetscInt *nodes_to_temp_buffer_indices; 2676 PetscInt *add_to_where; 2677 2678 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 2679 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 2680 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 2681 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 2682 /* first count how many neighbours per connected component I will receive from */ 2683 cum_recv_counts[0]=0; 2684 for(i=1;i<where_values+1;i++){ 2685 j=0; 2686 while(mat_graph->where[j] != i) j++; 2687 where_to_nodes_indices[i-1]=j; 2688 if(neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 2689 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 2690 } 2691 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 2692 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 2693 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 2694 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 2695 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 2696 for(i=0;i<cum_recv_counts[where_values];i++) { 2697 send_requests[i]=MPI_REQUEST_NULL; 2698 recv_requests[i]=MPI_REQUEST_NULL; 2699 } 2700 /* exchange with my neighbours the number of my connected components on the shared interface */ 2701 for(i=0;i<where_values;i++){ 2702 j=where_to_nodes_indices[i]; 2703 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2704 for(;k<mat_graph->count[j];k++){ 2705 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2706 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2707 sum_requests++; 2708 } 2709 } 2710 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2711 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2712 /* determine the connected component I need to adapt */ 2713 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 2714 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2715 for(i=0;i<where_values;i++){ 2716 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 2717 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 2718 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 2719 where_cc_adapt[i]=PETSC_TRUE; 2720 break; 2721 } 2722 } 2723 } 2724 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 2725 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 2726 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 2727 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 2728 sum_requests=0; 2729 start_of_send=0; 2730 start_of_recv=cum_recv_counts[where_values]; 2731 for(i=0;i<where_values;i++) { 2732 if(where_cc_adapt[i]) { 2733 size_of_send=0; 2734 for(j=i;j<mat_graph->ncmps;j++) { 2735 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 2736 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2737 size_of_send+=1; 2738 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 2739 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 2740 } 2741 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 2742 } 2743 } 2744 j = where_to_nodes_indices[i]; 2745 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2746 for(;k<mat_graph->count[j];k++){ 2747 ierr = MPI_Isend(&size_of_send,1,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2748 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2749 sum_requests++; 2750 } 2751 sizes_of_sends[i]=size_of_send; 2752 start_of_send+=size_of_send; 2753 } 2754 } 2755 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2756 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2757 buffer_size=0; 2758 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 2759 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 2760 /* now exchange the data */ 2761 start_of_recv=0; 2762 start_of_send=0; 2763 sum_requests=0; 2764 for(i=0;i<where_values;i++) { 2765 if(where_cc_adapt[i]) { 2766 size_of_send = sizes_of_sends[i]; 2767 j = where_to_nodes_indices[i]; 2768 k = (neighbours_set[j][0] == -1 ? 1 : 0); 2769 for(;k<mat_graph->count[j];k++){ 2770 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 2771 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 2772 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,neighbours_set[j][k],(neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 2773 start_of_recv+=size_of_recv; 2774 sum_requests++; 2775 } 2776 start_of_send+=size_of_send; 2777 } 2778 } 2779 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2780 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2781 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 2782 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 2783 for(j=0;j<buffer_size;) { 2784 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 2785 k=petsc_buffer[j]+1; 2786 j+=k; 2787 } 2788 sum_requests=cum_recv_counts[where_values]; 2789 start_of_recv=0; 2790 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2791 global_where_counter=0; 2792 for(i=0;i<where_values;i++){ 2793 if(where_cc_adapt[i]){ 2794 temp_buffer_size=0; 2795 /* find nodes on the shared interface we need to adapt */ 2796 for(j=0;j<mat_graph->nvtxs;j++){ 2797 if(mat_graph->where[j]==i+1) { 2798 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 2799 temp_buffer_size++; 2800 } else { 2801 nodes_to_temp_buffer_indices[j]=-1; 2802 } 2803 } 2804 /* allocate some temporary space */ 2805 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 2806 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 2807 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 2808 for(j=1;j<temp_buffer_size;j++){ 2809 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 2810 } 2811 /* analyze contributions from neighbouring subdomains for i-th conn comp 2812 temp buffer structure: 2813 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 2814 3 neighs procs with structured connected components: 2815 neigh 0: [0 1 4], [2 3]; (2 connected components) 2816 neigh 1: [0 1], [2 3 4]; (2 connected components) 2817 neigh 2: [0 4], [1], [2 3]; (3 connected components) 2818 tempbuffer (row-oriented) should be filled as: 2819 [ 0, 0, 0; 2820 0, 0, 1; 2821 1, 1, 2; 2822 1, 1, 2; 2823 0, 1, 0; ]; 2824 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 2825 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 2826 */ 2827 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 2828 ins_val=0; 2829 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 2830 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 2831 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 2832 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 2833 } 2834 buffer_size+=k; 2835 ins_val++; 2836 } 2837 start_of_recv+=size_of_recv; 2838 sum_requests++; 2839 } 2840 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 2841 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 2842 for(j=0;j<temp_buffer_size;j++){ 2843 if(!add_to_where[j]){ /* found a new cc */ 2844 global_where_counter++; 2845 add_to_where[j]=global_where_counter; 2846 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 2847 same_set=PETSC_TRUE; 2848 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 2849 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 2850 same_set=PETSC_FALSE; 2851 break; 2852 } 2853 } 2854 if(same_set) add_to_where[k]=global_where_counter; 2855 } 2856 } 2857 } 2858 /* insert new data in where array */ 2859 temp_buffer_size=0; 2860 for(j=0;j<mat_graph->nvtxs;j++){ 2861 if(mat_graph->where[j]==i+1) { 2862 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 2863 temp_buffer_size++; 2864 } 2865 } 2866 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 2867 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 2868 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 2869 } 2870 } 2871 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 2872 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 2873 ierr = PetscFree(send_requests);CHKERRQ(ierr); 2874 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 2875 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 2876 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 2877 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 2878 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 2879 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 2880 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 2881 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 2882 if(global_where_counter) { 2883 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 2884 global_where_counter=0; 2885 for(i=0;i<mat_graph->nvtxs;i++){ 2886 if(mat_graph->where[i] && !mat_graph->touched[i]) { 2887 global_where_counter++; 2888 for(j=i+1;j<mat_graph->nvtxs;j++){ 2889 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 2890 mat_graph->where[j]=global_where_counter; 2891 mat_graph->touched[j]=PETSC_TRUE; 2892 } 2893 } 2894 mat_graph->where[i]=global_where_counter; 2895 mat_graph->touched[i]=PETSC_TRUE; 2896 } 2897 } 2898 where_values=global_where_counter; 2899 } 2900 if(global_where_counter) { 2901 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2902 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2903 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 2904 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 2905 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 2906 for(i=0;i<mat_graph->ncmps;i++) { 2907 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 2908 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 2909 } 2910 } 2911 } /* Finished adapting interface */ 2912 PetscInt nfc=0; 2913 PetscInt nec=0; 2914 PetscInt nvc=0; 2915 PetscBool twodim_flag=PETSC_FALSE; 2916 for (i=0; i<mat_graph->ncmps; i++) { 2917 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2918 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh */ 2919 nfc++; 2920 } else { /* note that nec will be zero in 2d */ 2921 nec++; 2922 } 2923 } else { 2924 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 2925 } 2926 } 2927 2928 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 2929 nec = nfc; 2930 nfc = 0; 2931 twodim_flag = PETSC_TRUE; 2932 } 2933 /* allocate IS arrays for faces, edges. Vertices need a single index set. 2934 Reusing space allocated in mat_graph->where for creating IS objects */ 2935 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 2936 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 2937 use_faces=PETSC_TRUE; 2938 } 2939 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 2940 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 2941 use_edges=PETSC_TRUE; 2942 } 2943 nfc=0; 2944 nec=0; 2945 for (i=0; i<mat_graph->ncmps; i++) { 2946 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 2947 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 2948 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 2949 } 2950 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 2951 if(twodim_flag) { 2952 if(use_edges) { 2953 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2954 nec++; 2955 } 2956 } else { 2957 if(use_faces) { 2958 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 2959 nfc++; 2960 } 2961 } 2962 } else { 2963 if(use_edges) { 2964 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 2965 nec++; 2966 } 2967 } 2968 } 2969 } 2970 pcbddc->n_ISForFaces=nfc; 2971 pcbddc->n_ISForEdges=nec; 2972 nvc=0; 2973 if( !pcbddc->constraints_flag ) { 2974 for (i=0; i<mat_graph->ncmps; i++) { 2975 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 2976 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 2977 mat_graph->where[nvc]=mat_graph->queue[j]; 2978 nvc++; 2979 } 2980 } 2981 } 2982 } 2983 /* sort vertex set (by local ordering) */ 2984 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 2985 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 2986 2987 if(pcbddc->dbg_flag) { 2988 PetscViewer viewer=pcbddc->dbg_viewer; 2989 2990 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2991 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2992 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2993 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 2994 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2995 for(i=0;i<mat_graph->nvtxs;i++) { 2996 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 2997 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 2998 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 2999 } 3000 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3001 } 3002 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3003 for(i=0;i<mat_graph->ncmps;i++) { 3004 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3005 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3006 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3007 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); 3008 } 3009 } 3010 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr);*/ 3011 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3012 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3013 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3014 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3015 } 3016 3017 /* Restore CSR structure into sequantial matrix and free memory space no longer needed */ 3018 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&mat_graph->nvtxs,&mat_graph->xadj,&mat_graph->adjncy,&flg_row);CHKERRQ(ierr); 3019 if(!flg_row) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called from PCBDDCManageLocalBoundaries.\n"); 3020 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 3021 /* Free graph structure */ 3022 if(mat_graph->nvtxs){ 3023 ierr = PetscFree(neighbours_set[0]);CHKERRQ(ierr); 3024 ierr = PetscFree(neighbours_set);CHKERRQ(ierr); 3025 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3026 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3027 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3028 } 3029 ierr = PetscFree(mat_graph);CHKERRQ(ierr); 3030 3031 PetscFunctionReturn(0); 3032 3033 } 3034 3035 /* -------------------------------------------------------------------------- */ 3036 3037 /* The following code has been adapted from function IsConnectedSubdomain contained 3038 in source file contig.c of METIS library (version 5.0.1) */ 3039 3040 #undef __FUNCT__ 3041 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3042 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3043 { 3044 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3045 PetscInt *xadj, *adjncy, *where, *queue; 3046 PetscInt *cptr; 3047 PetscBool *touched; 3048 3049 PetscFunctionBegin; 3050 3051 nvtxs = graph->nvtxs; 3052 xadj = graph->xadj; 3053 adjncy = graph->adjncy; 3054 where = graph->where; 3055 touched = graph->touched; 3056 queue = graph->queue; 3057 cptr = graph->cptr; 3058 3059 for (i=0; i<nvtxs; i++) 3060 touched[i] = PETSC_FALSE; 3061 3062 cum_queue=0; 3063 ncmps=0; 3064 3065 for(n=0; n<n_dist; n++) { 3066 pid = n+1; 3067 nleft = 0; 3068 for (i=0; i<nvtxs; i++) { 3069 if (where[i] == pid) 3070 nleft++; 3071 } 3072 for (i=0; i<nvtxs; i++) { 3073 if (where[i] == pid) 3074 break; 3075 } 3076 touched[i] = PETSC_TRUE; 3077 queue[cum_queue] = i; 3078 first = 0; last = 1; 3079 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3080 ncmps_pid = 0; 3081 while (first != nleft) { 3082 if (first == last) { /* Find another starting vertex */ 3083 cptr[++ncmps] = first+cum_queue; 3084 ncmps_pid++; 3085 for (i=0; i<nvtxs; i++) { 3086 if (where[i] == pid && !touched[i]) 3087 break; 3088 } 3089 queue[cum_queue+last] = i; 3090 last++; 3091 touched[i] = PETSC_TRUE; 3092 } 3093 i = queue[cum_queue+first]; 3094 first++; 3095 for (j=xadj[i]; j<xadj[i+1]; j++) { 3096 k = adjncy[j]; 3097 if (where[k] == pid && !touched[k]) { 3098 queue[cum_queue+last] = k; 3099 last++; 3100 touched[k] = PETSC_TRUE; 3101 } 3102 } 3103 } 3104 cptr[++ncmps] = first+cum_queue; 3105 ncmps_pid++; 3106 cum_queue=cptr[ncmps]; 3107 graph->where_ncmps[n] = ncmps_pid; 3108 } 3109 graph->ncmps = ncmps; 3110 3111 PetscFunctionReturn(0); 3112 } 3113 3114