1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 Exact solvers: Solve local saddle point directly 5 - change prec_type to switch_inexact_prec_type 6 - add bool solve_exact_saddle_point slot to pdbddc data 7 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 8 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 9 - mind the problem with coarsening_factor 10 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 11 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 12 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 13 - Add levels' slot to bddc data structure and associated Set/Get functions 14 code refactoring: 15 - pick up better names for static functions 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 man pages 21 */ 22 23 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 24 Implementation of BDDC preconditioner based on: 25 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 26 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 27 28 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 29 #include <petscblaslapack.h> 30 31 /* -------------------------------------------------------------------------- */ 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCSetFromOptions_BDDC" 34 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 35 { 36 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 37 PetscErrorCode ierr; 38 39 PetscFunctionBegin; 40 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 41 /* Verbose debugging of main data structures */ 42 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 43 /* Some customization for default primal space */ 44 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 47 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 48 /* Coarse solver context */ 49 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 50 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 51 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 52 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 55 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 56 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 57 ierr = PetscOptionsTail();CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 /* -------------------------------------------------------------------------- */ 61 EXTERN_C_BEGIN 62 #undef __FUNCT__ 63 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 64 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 65 { 66 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 67 68 PetscFunctionBegin; 69 pcbddc->coarse_problem_type = CPT; 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 #undef __FUNCT__ 74 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 75 /*@ 76 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 77 78 Not collective 79 80 Input Parameters: 81 + pc - the preconditioning context 82 - CoarseProblemType - pick a better name and explain what this is 83 84 Level: intermediate 85 86 Notes: 87 Not collective but all procs must call with same arguments. 88 89 .seealso: PCBDDC 90 @*/ 91 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 92 { 93 PetscErrorCode ierr; 94 95 PetscFunctionBegin; 96 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 97 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 /* -------------------------------------------------------------------------- */ 101 EXTERN_C_BEGIN 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 104 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 105 { 106 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 111 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 112 pcbddc->DirichletBoundaries=DirichletBoundaries; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 118 /*@ 119 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 120 of Dirichlet boundaries for the global problem. 121 122 Not collective 123 124 Input Parameters: 125 + pc - the preconditioning context 126 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 127 128 Level: intermediate 129 130 Notes: 131 132 .seealso: PCBDDC 133 @*/ 134 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 135 { 136 PetscErrorCode ierr; 137 138 PetscFunctionBegin; 139 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 140 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 141 PetscFunctionReturn(0); 142 } 143 /* -------------------------------------------------------------------------- */ 144 EXTERN_C_BEGIN 145 #undef __FUNCT__ 146 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 147 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 148 { 149 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 154 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 155 pcbddc->NeumannBoundaries=NeumannBoundaries; 156 PetscFunctionReturn(0); 157 } 158 EXTERN_C_END 159 #undef __FUNCT__ 160 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 161 /*@ 162 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 163 of Neumann boundaries for the global problem. 164 165 Not collective 166 167 Input Parameters: 168 + pc - the preconditioning context 169 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 170 171 Level: intermediate 172 173 Notes: 174 175 .seealso: PCBDDC 176 @*/ 177 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 178 { 179 PetscErrorCode ierr; 180 181 PetscFunctionBegin; 182 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 183 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 /* -------------------------------------------------------------------------- */ 187 EXTERN_C_BEGIN 188 #undef __FUNCT__ 189 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 190 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 191 { 192 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 193 194 PetscFunctionBegin; 195 *DirichletBoundaries = pcbddc->DirichletBoundaries; 196 PetscFunctionReturn(0); 197 } 198 EXTERN_C_END 199 #undef __FUNCT__ 200 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 201 /*@ 202 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 203 of Dirichlet boundaries for the global problem. 204 205 Not collective 206 207 Input Parameters: 208 + pc - the preconditioning context 209 210 Output Parameters: 211 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 212 213 Level: intermediate 214 215 Notes: 216 217 .seealso: PCBDDC 218 @*/ 219 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 220 { 221 PetscErrorCode ierr; 222 223 PetscFunctionBegin; 224 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 225 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 /* -------------------------------------------------------------------------- */ 229 EXTERN_C_BEGIN 230 #undef __FUNCT__ 231 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 232 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 233 { 234 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 235 236 PetscFunctionBegin; 237 *NeumannBoundaries = pcbddc->NeumannBoundaries; 238 PetscFunctionReturn(0); 239 } 240 EXTERN_C_END 241 #undef __FUNCT__ 242 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 243 /*@ 244 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 245 of Neumann boundaries for the global problem. 246 247 Not collective 248 249 Input Parameters: 250 + pc - the preconditioning context 251 252 Output Parameters: 253 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 254 255 Level: intermediate 256 257 Notes: 258 259 .seealso: PCBDDC 260 @*/ 261 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 262 { 263 PetscErrorCode ierr; 264 265 PetscFunctionBegin; 266 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 267 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 /* -------------------------------------------------------------------------- */ 271 EXTERN_C_BEGIN 272 #undef __FUNCT__ 273 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 274 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 275 { 276 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 277 PCBDDCGraph mat_graph=pcbddc->mat_graph; 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 mat_graph->nvtxs=nvtxs; 282 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 284 if(copymode == PETSC_COPY_VALUES) { 285 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 286 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 287 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 288 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 289 } else if(copymode == PETSC_OWN_POINTER) { 290 mat_graph->xadj=xadj; 291 mat_graph->adjncy=adjncy; 292 } else { 293 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 294 } 295 PetscFunctionReturn(0); 296 } 297 EXTERN_C_END 298 #undef __FUNCT__ 299 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 300 /*@ 301 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 302 303 Not collective 304 305 Input Parameters: 306 + pc - the preconditioning context 307 - nvtxs - number of local vertices of the graph 308 - xadj, adjncy - the CSR graph 309 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 310 in the latter case, memory must be obtained with PetscMalloc. 311 312 Level: intermediate 313 314 Notes: 315 316 .seealso: PCBDDC 317 @*/ 318 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 319 { 320 PetscInt nrows,ncols; 321 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 322 PetscErrorCode ierr; 323 324 PetscFunctionBegin; 325 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 326 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 327 if(nvtxs != nrows) { 328 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 329 } else { 330 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 331 } 332 PetscFunctionReturn(0); 333 } 334 /* -------------------------------------------------------------------------- */ 335 EXTERN_C_BEGIN 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 338 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 339 { 340 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 341 PetscInt i; 342 PetscErrorCode ierr; 343 344 PetscFunctionBegin; 345 /* Destroy ISes if they were already set */ 346 for(i=0;i<pcbddc->n_ISForDofs;i++) { 347 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 348 } 349 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 350 /* allocate space then set */ 351 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 352 for(i=0;i<n_is;i++) { 353 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 354 pcbddc->ISForDofs[i]=ISForDofs[i]; 355 } 356 pcbddc->n_ISForDofs=n_is; 357 PetscFunctionReturn(0); 358 } 359 EXTERN_C_END 360 #undef __FUNCT__ 361 #define __FUNCT__ "PCBDDCSetDofsSplitting" 362 /*@ 363 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 364 365 Not collective 366 367 Input Parameters: 368 + pc - the preconditioning context 369 - n - number of index sets defining the fields 370 - IS[] - array of IS describing the fields 371 372 Level: intermediate 373 374 Notes: 375 376 .seealso: PCBDDC 377 @*/ 378 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 379 { 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 384 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 /* -------------------------------------------------------------------------- */ 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCPreSolve_BDDC" 390 /* -------------------------------------------------------------------------- */ 391 /* 392 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 393 guess if a transformation of basis approach has been selected. 394 395 Input Parameter: 396 + pc - the preconditioner contex 397 398 Application Interface Routine: PCPreSolve() 399 400 Notes: 401 The interface routine PCPreSolve() is not usually called directly by 402 the user, but instead is called by KSPSolve(). 403 */ 404 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 405 { 406 PetscErrorCode ierr; 407 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 408 PC_IS *pcis = (PC_IS*)(pc->data); 409 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 410 Mat temp_mat; 411 412 PetscFunctionBegin; 413 if(pcbddc->usechangeofbasis) { 414 /* swap pointers for local matrices */ 415 temp_mat = matis->A; 416 matis->A = pcbddc->local_mat; 417 pcbddc->local_mat = temp_mat; 418 /* store the original rhs */ 419 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 420 /* Get local rhs and apply transformation of basis */ 421 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 422 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 423 /* from original basis to modified basis */ 424 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 425 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 426 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 427 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 428 } 429 PetscFunctionReturn(0); 430 } 431 /* -------------------------------------------------------------------------- */ 432 #undef __FUNCT__ 433 #define __FUNCT__ "PCPostSolve_BDDC" 434 /* -------------------------------------------------------------------------- */ 435 /* 436 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 437 approach has been selected. Also, restores rhs to its original state. 438 439 Input Parameter: 440 + pc - the preconditioner contex 441 442 Application Interface Routine: PCPostSolve() 443 444 Notes: 445 The interface routine PCPostSolve() is not usually called directly by 446 the user, but instead is called by KSPSolve(). 447 */ 448 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 449 { 450 PetscErrorCode ierr; 451 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 452 PC_IS *pcis = (PC_IS*)(pc->data); 453 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 454 Mat temp_mat; 455 456 PetscFunctionBegin; 457 if(pcbddc->usechangeofbasis) { 458 /* swap pointers for local matrices */ 459 temp_mat = matis->A; 460 matis->A = pcbddc->local_mat; 461 pcbddc->local_mat = temp_mat; 462 /* restore rhs to its original state */ 463 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 464 /* Get Local boundary and apply transformation of basis to solution vector */ 465 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 /* from modified basis to original basis */ 468 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 469 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 470 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 472 } 473 PetscFunctionReturn(0); 474 } 475 /* -------------------------------------------------------------------------- */ 476 #undef __FUNCT__ 477 #define __FUNCT__ "PCSetUp_BDDC" 478 /* -------------------------------------------------------------------------- */ 479 /* 480 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 481 by setting data structures and options. 482 483 Input Parameter: 484 + pc - the preconditioner context 485 486 Application Interface Routine: PCSetUp() 487 488 Notes: 489 The interface routine PCSetUp() is not usually called directly by 490 the user, but instead is called by PCApply() if necessary. 491 */ 492 PetscErrorCode PCSetUp_BDDC(PC pc) 493 { 494 PetscErrorCode ierr; 495 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 496 PC_IS *pcis = (PC_IS*)(pc->data); 497 498 PetscFunctionBegin; 499 if (!pc->setupcalled) { 500 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 501 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 502 Also, we decide to directly build the (same) Dirichlet problem */ 503 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 504 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 505 /* Set up all the "iterative substructuring" common block */ 506 ierr = PCISSetUp(pc);CHKERRQ(ierr); 507 /* Get stdout for dbg */ 508 if(pcbddc->dbg_flag) { 509 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 510 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 511 } 512 /* TODO MOVE CODE FRAGMENT */ 513 PetscInt im_active=0; 514 if(pcis->n) im_active = 1; 515 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 516 /* Analyze local interface */ 517 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 518 /* Set up local constraint matrix */ 519 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 520 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 521 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 522 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 523 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 524 } 525 PetscFunctionReturn(0); 526 } 527 528 /* -------------------------------------------------------------------------- */ 529 /* 530 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 531 532 Input Parameters: 533 . pc - the preconditioner context 534 . r - input vector (global) 535 536 Output Parameter: 537 . z - output vector (global) 538 539 Application Interface Routine: PCApply() 540 */ 541 #undef __FUNCT__ 542 #define __FUNCT__ "PCApply_BDDC" 543 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 544 { 545 PC_IS *pcis = (PC_IS*)(pc->data); 546 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 547 PetscErrorCode ierr; 548 const PetscScalar one = 1.0; 549 const PetscScalar m_one = -1.0; 550 const PetscScalar zero = 0.0; 551 552 /* This code is similar to that provided in nn.c for PCNN 553 NN interface preconditioner changed to BDDC 554 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 555 556 PetscFunctionBegin; 557 /* First Dirichlet solve */ 558 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 559 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 560 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 561 /* 562 Assembling right hand side for BDDC operator 563 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 564 - the interface part of the global vector z 565 */ 566 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 567 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 568 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 569 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 570 ierr = VecCopy(r,z);CHKERRQ(ierr); 571 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 572 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 573 574 /* Get Local boundary and apply partition of unity */ 575 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 576 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 577 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 578 579 /* Apply interface preconditioner 580 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 581 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 582 583 /* Apply partition of unity and sum boundary values */ 584 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 585 ierr = VecSet(z,zero);CHKERRQ(ierr); 586 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 587 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 588 589 /* Second Dirichlet solve and assembling of output */ 590 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 591 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 592 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 593 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 594 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 595 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 596 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 597 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 598 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 599 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 600 601 PetscFunctionReturn(0); 602 603 } 604 /* -------------------------------------------------------------------------- */ 605 #undef __FUNCT__ 606 #define __FUNCT__ "PCDestroy_BDDC" 607 PetscErrorCode PCDestroy_BDDC(PC pc) 608 { 609 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 610 PetscErrorCode ierr; 611 612 PetscFunctionBegin; 613 /* free data created by PCIS */ 614 ierr = PCISDestroy(pc);CHKERRQ(ierr); 615 /* free BDDC data */ 616 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 617 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 618 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 619 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 620 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 621 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 622 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 623 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 624 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 625 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 626 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 627 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 628 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 629 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 630 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 631 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 632 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 633 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 634 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 635 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 636 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 637 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 638 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 639 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 640 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 641 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 642 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 643 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 644 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 645 PetscInt i; 646 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 647 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 648 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 649 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 650 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 651 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 652 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 653 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 654 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 655 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 656 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 657 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 658 /* Free the private data structure that was hanging off the PC */ 659 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 660 PetscFunctionReturn(0); 661 } 662 663 /* -------------------------------------------------------------------------- */ 664 /*MC 665 PCBDDC - Balancing Domain Decomposition by Constraints. 666 667 Options Database Keys: 668 . -pcbddc ??? - 669 670 Level: intermediate 671 672 Notes: The matrix used with this preconditioner must be of type MATIS 673 674 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 675 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 676 on the subdomains). 677 678 Options for the coarse grid preconditioner can be set with - 679 Options for the Dirichlet subproblem can be set with - 680 Options for the Neumann subproblem can be set with - 681 682 Contributed by Stefano Zampini 683 684 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 685 M*/ 686 EXTERN_C_BEGIN 687 #undef __FUNCT__ 688 #define __FUNCT__ "PCCreate_BDDC" 689 PetscErrorCode PCCreate_BDDC(PC pc) 690 { 691 PetscErrorCode ierr; 692 PC_BDDC *pcbddc; 693 PCBDDCGraph mat_graph; 694 695 PetscFunctionBegin; 696 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 697 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 698 pc->data = (void*)pcbddc; 699 700 /* create PCIS data structure */ 701 ierr = PCISCreate(pc);CHKERRQ(ierr); 702 703 /* BDDC specific */ 704 pcbddc->original_rhs = 0; 705 pcbddc->local_mat = 0; 706 pcbddc->ChangeOfBasisMatrix = 0; 707 pcbddc->usechangeofbasis = PETSC_TRUE; 708 pcbddc->usechangeonfaces = PETSC_FALSE; 709 pcbddc->coarse_vec = 0; 710 pcbddc->coarse_rhs = 0; 711 pcbddc->coarse_ksp = 0; 712 pcbddc->coarse_phi_B = 0; 713 pcbddc->coarse_phi_D = 0; 714 pcbddc->vec1_P = 0; 715 pcbddc->vec1_R = 0; 716 pcbddc->vec2_R = 0; 717 pcbddc->local_auxmat1 = 0; 718 pcbddc->local_auxmat2 = 0; 719 pcbddc->R_to_B = 0; 720 pcbddc->R_to_D = 0; 721 pcbddc->ksp_D = 0; 722 pcbddc->ksp_R = 0; 723 pcbddc->local_primal_indices = 0; 724 pcbddc->prec_type = PETSC_FALSE; 725 pcbddc->NeumannBoundaries = 0; 726 pcbddc->ISForDofs = 0; 727 pcbddc->ISForVertices = 0; 728 pcbddc->n_ISForFaces = 0; 729 pcbddc->n_ISForEdges = 0; 730 pcbddc->ConstraintMatrix = 0; 731 pcbddc->use_nnsp_true = PETSC_FALSE; 732 pcbddc->local_primal_sizes = 0; 733 pcbddc->local_primal_displacements = 0; 734 pcbddc->replicated_local_primal_indices = 0; 735 pcbddc->replicated_local_primal_values = 0; 736 pcbddc->coarse_loc_to_glob = 0; 737 pcbddc->dbg_flag = PETSC_FALSE; 738 pcbddc->coarsening_ratio = 8; 739 740 /* allocate and initialize needed graph structure */ 741 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 742 pcbddc->mat_graph->xadj = 0; 743 pcbddc->mat_graph->adjncy = 0; 744 745 /* function pointers */ 746 pc->ops->apply = PCApply_BDDC; 747 pc->ops->applytranspose = 0; 748 pc->ops->setup = PCSetUp_BDDC; 749 pc->ops->destroy = PCDestroy_BDDC; 750 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 751 pc->ops->view = 0; 752 pc->ops->applyrichardson = 0; 753 pc->ops->applysymmetricleft = 0; 754 pc->ops->applysymmetricright = 0; 755 pc->ops->presolve = PCPreSolve_BDDC; 756 pc->ops->postsolve = PCPostSolve_BDDC; 757 758 /* composing function */ 759 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 760 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 762 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 764 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 766 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 767 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 768 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 770 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 772 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 773 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 774 PCPreSolve_BDDC);CHKERRQ(ierr); 775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 776 PCPostSolve_BDDC);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 EXTERN_C_END 780 /* -------------------------------------------------------------------------- */ 781 /* All static functions from now on */ 782 /* -------------------------------------------------------------------------- */ 783 #undef __FUNCT__ 784 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 785 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 786 { 787 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 788 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 789 PetscInt nvtxs,*xadj,*adjncy; 790 Mat mat_adj; 791 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 792 PCBDDCGraph mat_graph=pcbddc->mat_graph; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 797 if(!mat_graph->xadj) { 798 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 799 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 800 if(!flg_row) { 801 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 802 } 803 /* Get adjacency into BDDC workspace */ 804 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 805 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 806 if(!flg_row) { 807 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 808 } 809 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 810 } 811 PetscFunctionReturn(0); 812 } 813 /* -------------------------------------------------------------------------- */ 814 #undef __FUNCT__ 815 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 816 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 817 { 818 PetscErrorCode ierr; 819 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 820 PC_IS* pcis = (PC_IS*) (pc->data); 821 const PetscScalar zero = 0.0; 822 823 PetscFunctionBegin; 824 /* Application of PHI^T */ 825 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 826 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 827 828 /* Scatter data of coarse_rhs */ 829 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 830 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831 832 /* Local solution on R nodes */ 833 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 835 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 836 if(pcbddc->prec_type) { 837 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 838 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 839 } 840 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 841 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 if(pcbddc->prec_type) { 845 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 846 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 847 } 848 849 /* Coarse solution */ 850 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 851 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 852 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 853 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 854 855 /* Sum contributions from two levels */ 856 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 857 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 858 PetscFunctionReturn(0); 859 } 860 /* -------------------------------------------------------------------------- */ 861 #undef __FUNCT__ 862 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 863 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 864 { 865 PetscErrorCode ierr; 866 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 867 868 PetscFunctionBegin; 869 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 870 if(pcbddc->local_auxmat1) { 871 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 872 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 873 } 874 PetscFunctionReturn(0); 875 } 876 /* -------------------------------------------------------------------------- */ 877 #undef __FUNCT__ 878 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 879 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 880 { 881 PetscErrorCode ierr; 882 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 883 884 PetscFunctionBegin; 885 switch(pcbddc->coarse_communications_type){ 886 case SCATTERS_BDDC: 887 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 888 break; 889 case GATHERS_BDDC: 890 break; 891 } 892 PetscFunctionReturn(0); 893 } 894 /* -------------------------------------------------------------------------- */ 895 #undef __FUNCT__ 896 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 897 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 898 { 899 PetscErrorCode ierr; 900 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 901 PetscScalar* array_to; 902 PetscScalar* array_from; 903 MPI_Comm comm=((PetscObject)pc)->comm; 904 PetscInt i; 905 906 PetscFunctionBegin; 907 908 switch(pcbddc->coarse_communications_type){ 909 case SCATTERS_BDDC: 910 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 911 break; 912 case GATHERS_BDDC: 913 if(vec_from) VecGetArray(vec_from,&array_from); 914 if(vec_to) VecGetArray(vec_to,&array_to); 915 switch(pcbddc->coarse_problem_type){ 916 case SEQUENTIAL_BDDC: 917 if(smode == SCATTER_FORWARD) { 918 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 919 if(vec_to) { 920 for(i=0;i<pcbddc->replicated_primal_size;i++) 921 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 922 } 923 } else { 924 if(vec_from) 925 for(i=0;i<pcbddc->replicated_primal_size;i++) 926 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 927 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 928 } 929 break; 930 case REPLICATED_BDDC: 931 if(smode == SCATTER_FORWARD) { 932 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 933 for(i=0;i<pcbddc->replicated_primal_size;i++) 934 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 935 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 936 for(i=0;i<pcbddc->local_primal_size;i++) 937 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 938 } 939 break; 940 case MULTILEVEL_BDDC: 941 break; 942 case PARALLEL_BDDC: 943 break; 944 } 945 if(vec_from) VecRestoreArray(vec_from,&array_from); 946 if(vec_to) VecRestoreArray(vec_to,&array_to); 947 break; 948 } 949 PetscFunctionReturn(0); 950 } 951 /* -------------------------------------------------------------------------- */ 952 #ifdef BDDC_USE_POD 953 #if !defined(PETSC_MISSING_LAPACK_GESVD) 954 #define PETSC_MISSING_LAPACK_GESVD 1 955 #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 956 #endif 957 #endif 958 959 #undef __FUNCT__ 960 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 961 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 962 { 963 PetscErrorCode ierr; 964 PC_IS* pcis = (PC_IS*)(pc->data); 965 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 966 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 967 PetscInt *nnz,*vertices,*is_indices; 968 PetscScalar *temp_quadrature_constraint; 969 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 970 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 971 PetscInt n_constraints,n_vertices,size_of_constraint; 972 PetscScalar quad_value; 973 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 974 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 975 IS *used_IS; 976 const MatType impMatType=MATSEQAIJ; 977 PetscBLASInt Bs,Bt,lwork,lierr; 978 PetscReal tol=1.0e-8; 979 MatNullSpace nearnullsp; 980 const Vec *nearnullvecs; 981 Vec *localnearnullsp; 982 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 983 PetscReal *rwork,*singular_vals; 984 PetscBLASInt Bone=1,*ipiv; 985 Vec temp_vec; 986 Mat temp_mat; 987 KSP temp_ksp; 988 PetscInt s,start_constraint,dual_dofs; 989 PetscBool compute_submatrix,useksp=PETSC_FALSE; 990 PetscInt *aux_primal_permutation,*aux_primal_numbering; 991 PetscBool boolforface,*change_basis; 992 /* some ugly conditional declarations */ 993 #if defined(PETSC_MISSING_LAPACK_GESVD) 994 PetscScalar dot_result; 995 PetscScalar one=1.0,zero=0.0; 996 PetscInt ii; 997 #if defined(PETSC_USE_COMPLEX) 998 PetscScalar val1,val2; 999 #endif 1000 #else 1001 PetscBLASInt dummy_int; 1002 PetscScalar dummy_scalar; 1003 #endif 1004 1005 PetscFunctionBegin; 1006 /* check if near null space is attached to global mat */ 1007 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1008 if (nearnullsp) { 1009 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1010 } else { /* if near null space is not provided it uses constants */ 1011 nnsp_has_cnst = PETSC_TRUE; 1012 use_nnsp_true = PETSC_TRUE; 1013 } 1014 if(nnsp_has_cnst) { 1015 nnsp_addone = 1; 1016 } 1017 /* 1018 Evaluate maximum storage size needed by the procedure 1019 - temp_indices will contain start index of each constraint stored as follows 1020 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1021 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1022 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1023 */ 1024 1025 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 1026 total_counts *= (nnsp_addone+nnsp_size); 1027 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 1028 total_counts += n_vertices; 1029 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1030 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 1031 total_counts = 0; 1032 max_size_of_constraint = 0; 1033 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 1034 if(i<pcbddc->n_ISForEdges){ 1035 used_IS = &pcbddc->ISForEdges[i]; 1036 } else { 1037 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 1038 } 1039 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1040 total_counts += j; 1041 if(j>max_size_of_constraint) max_size_of_constraint=j; 1042 } 1043 total_counts *= (nnsp_addone+nnsp_size); 1044 total_counts += n_vertices; 1045 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1046 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1047 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1048 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1049 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1050 for(i=0;i<pcis->n;i++) { 1051 local_to_B[i]=-1; 1052 } 1053 for(i=0;i<pcis->n_B;i++) { 1054 local_to_B[is_indices[i]]=i; 1055 } 1056 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1057 1058 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 1059 rwork = 0; 1060 work = 0; 1061 singular_vals = 0; 1062 temp_basis = 0; 1063 correlation_mat = 0; 1064 if(!pcbddc->use_nnsp_true) { 1065 PetscScalar temp_work; 1066 #if defined(PETSC_MISSING_LAPACK_GESVD) 1067 /* POD */ 1068 PetscInt max_n; 1069 max_n = nnsp_addone+nnsp_size; 1070 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 1071 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1072 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1073 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1074 #if defined(PETSC_USE_COMPLEX) 1075 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1076 #endif 1077 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1078 Bt = PetscBLASIntCast(max_n); 1079 lwork=-1; 1080 #if !defined(PETSC_USE_COMPLEX) 1081 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 1082 #else 1083 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 1084 #endif 1085 #else /* on missing GESVD */ 1086 /* SVD */ 1087 PetscInt max_n,min_n; 1088 max_n = max_size_of_constraint; 1089 min_n = nnsp_addone+nnsp_size; 1090 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 1091 min_n = max_size_of_constraint; 1092 max_n = nnsp_addone+nnsp_size; 1093 } 1094 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1095 #if defined(PETSC_USE_COMPLEX) 1096 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1097 #endif 1098 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1099 lwork=-1; 1100 Bs = PetscBLASIntCast(max_n); 1101 Bt = PetscBLASIntCast(min_n); 1102 dummy_int = Bs; 1103 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1104 #if !defined(PETSC_USE_COMPLEX) 1105 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 1106 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 1107 #else 1108 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 1109 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 1110 #endif 1111 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 1112 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1113 #endif 1114 /* Allocate optimal workspace */ 1115 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 1116 total_counts = (PetscInt)lwork; 1117 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1118 } 1119 /* get local part of global near null space vectors */ 1120 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1121 for(k=0;k<nnsp_size;k++) { 1122 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1123 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1124 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1125 } 1126 /* Now we can loop on constraining sets */ 1127 total_counts=0; 1128 temp_indices[0]=0; 1129 /* vertices */ 1130 PetscBool used_vertex; 1131 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1132 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1133 if(nnsp_has_cnst) { /* consider all vertices */ 1134 for(i=0;i<n_vertices;i++) { 1135 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1136 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1137 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1138 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1139 vertices[total_counts]=is_indices[i]; 1140 change_basis[total_counts]=PETSC_FALSE; 1141 total_counts++; 1142 } 1143 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1144 for(i=0;i<n_vertices;i++) { 1145 used_vertex=PETSC_FALSE; 1146 k=0; 1147 while(!used_vertex && k<nnsp_size) { 1148 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1149 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 1150 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1151 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1152 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1153 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1154 vertices[total_counts]=is_indices[i]; 1155 change_basis[total_counts]=PETSC_FALSE; 1156 total_counts++; 1157 used_vertex=PETSC_TRUE; 1158 } 1159 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1160 k++; 1161 } 1162 } 1163 } 1164 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1165 n_vertices=total_counts; 1166 /* edges and faces */ 1167 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 1168 if(i<pcbddc->n_ISForEdges){ 1169 used_IS = &pcbddc->ISForEdges[i]; 1170 boolforface = pcbddc->usechangeofbasis; 1171 } else { 1172 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 1173 boolforface = pcbddc->usechangeonfaces; 1174 } 1175 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1176 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1177 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1178 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1179 if(nnsp_has_cnst) { 1180 temp_constraints++; 1181 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1182 for(j=0;j<size_of_constraint;j++) { 1183 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1184 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1185 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1186 } 1187 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1188 change_basis[total_counts]=boolforface; 1189 total_counts++; 1190 } 1191 for(k=0;k<nnsp_size;k++) { 1192 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1193 for(j=0;j<size_of_constraint;j++) { 1194 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1195 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1196 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 1197 } 1198 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1199 quad_value = 1.0; 1200 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 1201 Bs = PetscBLASIntCast(size_of_constraint); 1202 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 1203 } 1204 if ( quad_value > 0.0 ) { /* keep indices and values */ 1205 temp_constraints++; 1206 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1207 change_basis[total_counts]=boolforface; 1208 total_counts++; 1209 } 1210 } 1211 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1212 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 1213 if(!use_nnsp_true) { 1214 1215 Bs = PetscBLASIntCast(size_of_constraint); 1216 Bt = PetscBLASIntCast(temp_constraints); 1217 1218 #if defined(PETSC_MISSING_LAPACK_GESVD) 1219 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 1220 /* Store upper triangular part of correlation matrix */ 1221 for(j=0;j<temp_constraints;j++) { 1222 for(k=0;k<j+1;k++) { 1223 #if defined(PETSC_USE_COMPLEX) 1224 /* hand made complex dot product */ 1225 dot_result = 0.0; 1226 for (ii=0; ii<size_of_constraint; ii++) { 1227 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 1228 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 1229 dot_result += val1*PetscConj(val2); 1230 } 1231 #else 1232 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 1233 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 1234 #endif 1235 correlation_mat[j*temp_constraints+k]=dot_result; 1236 } 1237 } 1238 #if !defined(PETSC_USE_COMPLEX) 1239 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 1240 #else 1241 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 1242 #endif 1243 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 1244 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 1245 j=0; 1246 while( j < Bt && singular_vals[j] < tol) j++; 1247 total_counts=total_counts-j; 1248 if(j<temp_constraints) { 1249 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 1250 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 1251 /* copy POD basis into used quadrature memory */ 1252 for(k=0;k<Bt-j;k++) { 1253 for(ii=0;ii<size_of_constraint;ii++) { 1254 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 1255 } 1256 } 1257 } 1258 1259 #else /* on missing GESVD */ 1260 1261 PetscInt min_n = temp_constraints; 1262 if(min_n > size_of_constraint) min_n = size_of_constraint; 1263 dummy_int = Bs; 1264 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1265 #if !defined(PETSC_USE_COMPLEX) 1266 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1267 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 1268 #else 1269 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1270 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 1271 #endif 1272 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 1273 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1274 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 1275 j=0; 1276 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 1277 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 1278 #endif 1279 } 1280 } 1281 1282 n_constraints=total_counts-n_vertices; 1283 local_primal_size = total_counts; 1284 /* set quantities in pcbddc data structure */ 1285 pcbddc->n_vertices = n_vertices; 1286 pcbddc->n_constraints = n_constraints; 1287 pcbddc->local_primal_size = local_primal_size; 1288 1289 /* Create constraint matrix */ 1290 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1291 /* so we need to set it up properly either with or without change of basis */ 1292 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1293 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1294 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1295 /* compute a local numbering of constraints : vertices first then constraints */ 1296 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1297 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 1298 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1299 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 1300 total_counts=0; 1301 /* find vertices: subdomain corners plus dofs with basis changed */ 1302 for(i=0;i<local_primal_size;i++) { 1303 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1304 if(change_basis[i] || size_of_constraint == 1) { 1305 k=0; 1306 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 1307 k=k+1; 1308 } 1309 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 1310 array_vector[j] = 1.0; 1311 aux_primal_numbering[total_counts]=j; 1312 aux_primal_permutation[total_counts]=total_counts; 1313 total_counts++; 1314 } 1315 } 1316 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 1317 /* permute indices in order to have a sorted set of vertices */ 1318 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 1319 /* nonzero structure */ 1320 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1321 for(i=0;i<total_counts;i++) { 1322 nnz[i]=1; 1323 } 1324 j=total_counts; 1325 for(i=n_vertices;i<local_primal_size;i++) { 1326 if(!change_basis[i]) { 1327 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1328 j++; 1329 } 1330 } 1331 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1332 ierr = PetscFree(nnz);CHKERRQ(ierr); 1333 /* set values in constraint matrix */ 1334 for(i=0;i<total_counts;i++) { 1335 j = aux_primal_permutation[i]; 1336 k = aux_primal_numbering[j]; 1337 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 1338 } 1339 for(i=n_vertices;i<local_primal_size;i++) { 1340 if(!change_basis[i]) { 1341 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1342 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1343 total_counts++; 1344 } 1345 } 1346 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1347 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 1348 /* assembling */ 1349 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1350 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1351 1352 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 1353 if(pcbddc->usechangeofbasis) { 1354 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1355 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1356 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1357 /* work arrays */ 1358 /* we need to reuse these arrays, so we free them */ 1359 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1360 ierr = PetscFree(work);CHKERRQ(ierr); 1361 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1362 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1363 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1364 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 1365 for(i=0;i<pcis->n_B;i++) { 1366 nnz[i]=1; 1367 } 1368 /* Overestimated nonzeros per row */ 1369 k=1; 1370 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 1371 if(change_basis[i]) { 1372 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1373 if(k < size_of_constraint) { 1374 k = size_of_constraint; 1375 } 1376 for(j=0;j<size_of_constraint;j++) { 1377 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1378 } 1379 } 1380 } 1381 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1382 ierr = PetscFree(nnz);CHKERRQ(ierr); 1383 /* Temporary array to store indices */ 1384 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 1385 /* Set initial identity in the matrix */ 1386 for(i=0;i<pcis->n_B;i++) { 1387 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1388 } 1389 /* Now we loop on the constraints which need a change of basis */ 1390 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1391 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 1392 temp_constraints = 0; 1393 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 1394 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 1395 if(change_basis[i]) { 1396 compute_submatrix = PETSC_FALSE; 1397 useksp = PETSC_FALSE; 1398 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 1399 temp_constraints++; 1400 if(temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 1401 compute_submatrix = PETSC_TRUE; 1402 } 1403 } 1404 if(compute_submatrix) { 1405 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 1406 useksp = PETSC_TRUE; 1407 } 1408 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1409 if(useksp) { /* experimental */ 1410 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 1411 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 1412 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 1413 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 1414 } 1415 /* First _size_of_constraint-temp_constraints_ columns */ 1416 dual_dofs = size_of_constraint-temp_constraints; 1417 start_constraint = i+1-temp_constraints; 1418 for(s=0;s<dual_dofs;s++) { 1419 is_indices[0] = s; 1420 for(j=0;j<temp_constraints;j++) { 1421 for(k=0;k<temp_constraints;k++) { 1422 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 1423 } 1424 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 1425 is_indices[j+1]=s+j+1; 1426 } 1427 Bt = temp_constraints; 1428 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1429 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 1430 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 1431 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1432 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 1433 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 1434 if(useksp) { 1435 /* temp mat with transposed rows and columns */ 1436 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 1437 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 1438 } 1439 } 1440 if(useksp) { 1441 /* last rows of temp_mat */ 1442 for(j=0;j<size_of_constraint;j++) { 1443 is_indices[j] = j; 1444 } 1445 for(s=0;s<temp_constraints;s++) { 1446 k = s + dual_dofs; 1447 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 1448 } 1449 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1451 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 1452 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 1453 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1454 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 1455 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 1456 for(s=0;s<temp_constraints;s++) { 1457 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 1458 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 1459 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 1460 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 1461 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 1462 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 1463 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 1464 /* last columns of change of basis matrix associated to new primal dofs */ 1465 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 1466 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 1467 } 1468 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 1469 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 1470 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1471 } else { 1472 /* last columns of change of basis matrix associated to new primal dofs */ 1473 for(s=0;s<temp_constraints;s++) { 1474 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 1475 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 1476 } 1477 } 1478 /* prepare for the next cycle */ 1479 temp_constraints = 0; 1480 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 1481 } 1482 } 1483 } 1484 /* assembling */ 1485 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1486 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1487 ierr = PetscFree(ipiv);CHKERRQ(ierr); 1488 ierr = PetscFree(is_indices);CHKERRQ(ierr); 1489 } 1490 /* free workspace no longer needed */ 1491 ierr = PetscFree(rwork);CHKERRQ(ierr); 1492 ierr = PetscFree(work);CHKERRQ(ierr); 1493 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1494 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1495 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1496 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1497 ierr = PetscFree(change_basis);CHKERRQ(ierr); 1498 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1499 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1500 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1501 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1502 for(k=0;k<nnsp_size;k++) { 1503 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1504 } 1505 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD 1509 #undef PETSC_MISSING_LAPACK_GESVD 1510 #endif 1511 /* -------------------------------------------------------------------------- */ 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "PCBDDCCoarseSetUp" 1514 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1515 { 1516 PetscErrorCode ierr; 1517 1518 PC_IS* pcis = (PC_IS*)(pc->data); 1519 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1520 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1521 Mat change_mat_all; 1522 IS is_R_local; 1523 IS is_V_local; 1524 IS is_C_local; 1525 IS is_aux1; 1526 IS is_aux2; 1527 const VecType impVecType; 1528 const MatType impMatType; 1529 PetscInt n_R=0; 1530 PetscInt n_D=0; 1531 PetscInt n_B=0; 1532 PetscScalar zero=0.0; 1533 PetscScalar one=1.0; 1534 PetscScalar m_one=-1.0; 1535 PetscScalar* array; 1536 PetscScalar *coarse_submat_vals; 1537 PetscInt *idx_R_local; 1538 PetscInt *idx_V_B; 1539 PetscScalar *coarsefunctions_errors; 1540 PetscScalar *constraints_errors; 1541 /* auxiliary indices */ 1542 PetscInt i,j,k; 1543 /* for verbose output of bddc */ 1544 PetscViewer viewer=pcbddc->dbg_viewer; 1545 PetscBool dbg_flag=pcbddc->dbg_flag; 1546 /* for counting coarse dofs */ 1547 PetscInt n_vertices,n_constraints; 1548 PetscInt size_of_constraint; 1549 PetscInt *row_cmat_indices; 1550 PetscScalar *row_cmat_values; 1551 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 1552 1553 PetscFunctionBegin; 1554 /* Set Non-overlapping dimensions */ 1555 n_B = pcis->n_B; n_D = pcis->n - n_B; 1556 /* Set types for local objects needed by BDDC precondtioner */ 1557 impMatType = MATSEQDENSE; 1558 impVecType = VECSEQ; 1559 /* get vertex indices from constraint matrix */ 1560 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1561 n_vertices=0; 1562 for(i=0;i<pcbddc->local_primal_size;i++) { 1563 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1564 if(size_of_constraint == 1) { 1565 vertices[n_vertices]=row_cmat_indices[0]; 1566 n_vertices++; 1567 } 1568 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1569 } 1570 /* Set number of constraints */ 1571 n_constraints = pcbddc->local_primal_size-n_vertices; 1572 1573 /* vertices in boundary numbering */ 1574 if(n_vertices) { 1575 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1576 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1577 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1578 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1579 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1581 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1582 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1583 for (i=0; i<n_vertices; i++) { 1584 j=0; 1585 while (array[j] != i ) {j++;} 1586 idx_V_B[i]=j; 1587 } 1588 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1589 } 1590 1591 /* transform local matrices if needed */ 1592 if(pcbddc->usechangeofbasis) { 1593 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1594 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1595 for(i=0;i<n_D;i++) { 1596 nnz[is_indices[i]]=1; 1597 } 1598 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1599 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1600 k=1; 1601 for(i=0;i<n_B;i++) { 1602 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 1603 nnz[is_indices[i]]=j; 1604 if( k < j) { 1605 k = j; 1606 } 1607 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 1608 } 1609 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1610 /* assemble change of basis matrix on the whole set of local dofs */ 1611 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1612 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 1613 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 1614 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 1615 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 1616 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1617 for(i=0;i<n_D;i++) { 1618 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1619 } 1620 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1621 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1622 for(i=0;i<n_B;i++) { 1623 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1624 for(k=0;k<j;k++) { 1625 temp_indices[k]=is_indices[row_cmat_indices[k]]; 1626 } 1627 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 1628 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1629 } 1630 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1631 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1632 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 1633 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1634 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1635 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 1636 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 1637 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 1638 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 1639 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 1640 ierr = PetscFree(nnz);CHKERRQ(ierr); 1641 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1642 } else { 1643 /* without change of basis, the local matrix is unchanged */ 1644 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 1645 pcbddc->local_mat = matis->A; 1646 } 1647 1648 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1649 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1650 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1651 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1652 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1653 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1654 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1655 if(dbg_flag) { 1656 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1657 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1658 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1659 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1660 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1661 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1662 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1663 } 1664 1665 /* Allocate needed vectors */ 1666 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 1667 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1668 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1669 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1670 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1671 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1672 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1673 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1674 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1675 1676 /* Creating some index sets needed */ 1677 /* For submatrices */ 1678 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 1679 if(n_vertices) { 1680 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 1681 } 1682 if(n_constraints) { 1683 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 1684 } 1685 1686 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1687 { 1688 PetscInt *aux_array1; 1689 PetscInt *aux_array2; 1690 1691 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1692 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1693 1694 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1695 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1696 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1697 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1698 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1699 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1700 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1701 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1702 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 1703 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1704 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1705 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1706 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 1707 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1708 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1709 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1710 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1711 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1712 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1713 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1714 1715 if(pcbddc->prec_type || dbg_flag ) { 1716 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1717 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1718 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 1719 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1720 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1721 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1722 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1723 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1724 } 1725 } 1726 1727 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1728 { 1729 Mat A_RR; 1730 PC pc_temp; 1731 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1732 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1733 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1734 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1735 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1736 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 1737 /* default */ 1738 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1739 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1740 /* Allow user's customization */ 1741 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1742 /* Set Up KSP for Dirichlet problem of BDDC */ 1743 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1744 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_D,PETSC_VIEWER_STDOUT_SELF); 1745 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1746 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1747 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1748 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1749 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1750 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1751 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 1752 /* default */ 1753 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1754 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1755 /* Allow user's customization */ 1756 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1757 /* Set Up KSP for Neumann problem of BDDC */ 1758 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1759 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_R,PETSC_VIEWER_STDOUT_SELF); 1760 /* check Dirichlet and Neumann solvers */ 1761 if(pcbddc->dbg_flag) { 1762 Vec temp_vec; 1763 PetscScalar value; 1764 1765 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1766 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1767 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1768 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1769 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1770 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1771 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1772 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1773 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1774 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1775 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1776 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1777 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1778 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1779 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1780 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1781 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1782 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1783 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1784 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1785 } 1786 /* free Neumann problem's matrix */ 1787 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1788 } 1789 1790 /* Assemble all remaining stuff needed to apply BDDC */ 1791 { 1792 Mat A_RV,A_VR,A_VV; 1793 Mat M1,M2; 1794 Mat C_CR; 1795 Mat AUXMAT; 1796 Vec vec1_C; 1797 Vec vec2_C; 1798 Vec vec1_V; 1799 Vec vec2_V; 1800 PetscInt *nnz; 1801 PetscInt *auxindices; 1802 PetscInt index; 1803 PetscScalar* array2; 1804 MatFactorInfo matinfo; 1805 1806 /* Allocating some extra storage just to be safe */ 1807 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1808 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1809 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1810 1811 /* some work vectors on vertices and/or constraints */ 1812 if(n_vertices) { 1813 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1814 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1815 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1816 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1817 } 1818 if(n_constraints) { 1819 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1820 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 1821 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1822 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1823 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1824 } 1825 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1826 if(n_constraints) { 1827 /* some work vectors */ 1828 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1829 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1830 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1831 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1832 1833 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1834 for(i=0;i<n_constraints;i++) { 1835 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1836 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1837 /* Get row of constraint matrix in R numbering */ 1838 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1839 ierr = MatGetRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1840 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1841 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,n_vertices+i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1842 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1843 for(j=0;j<n_R;j++) { array2[j] = array[ idx_R_local[j] ]; } 1844 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1845 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1846 /* Solve for row of constraint matrix in R numbering */ 1847 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1848 /* Set values */ 1849 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1850 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1851 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1852 } 1853 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1854 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1855 1856 /* Create Constraint matrix on R nodes: C_{CR} */ 1857 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1858 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1859 1860 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1861 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1862 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1863 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1864 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1865 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1866 1867 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1868 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1869 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1870 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1871 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1872 for(i=0;i<n_constraints;i++) { 1873 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1874 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1875 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1876 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1877 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1878 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1879 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1880 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1881 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1882 } 1883 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1884 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1885 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1886 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1887 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1888 1889 } 1890 1891 /* Get submatrices from subdomain matrix */ 1892 if(n_vertices){ 1893 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1894 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1895 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1896 /* Assemble M2 = A_RR^{-1}A_RV */ 1897 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1898 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1899 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1900 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1901 for(i=0;i<n_vertices;i++) { 1902 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1903 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1904 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1905 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1906 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1907 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1908 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1909 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1910 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1911 } 1912 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1913 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1914 } 1915 1916 /* Matrix of coarse basis functions (local) */ 1917 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1918 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1919 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1920 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1921 if(pcbddc->prec_type || dbg_flag ) { 1922 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1923 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1924 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1925 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1926 } 1927 1928 if(dbg_flag) { 1929 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1930 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1931 } 1932 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1933 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1934 1935 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1936 for(i=0;i<n_vertices;i++){ 1937 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1938 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1939 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1940 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1941 /* solution of saddle point problem */ 1942 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1943 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 1944 if(n_constraints) { 1945 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 1946 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1947 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 1948 } 1949 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 1950 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 1951 1952 /* Set values in coarse basis function and subdomain part of coarse_mat */ 1953 /* coarse basis functions */ 1954 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1955 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1956 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1957 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1958 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1959 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1960 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 1961 if( pcbddc->prec_type || dbg_flag ) { 1962 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1963 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1964 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1965 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1966 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 1967 } 1968 /* subdomain contribution to coarse matrix */ 1969 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1970 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 1971 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1972 if(n_constraints) { 1973 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1974 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 1975 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1976 } 1977 1978 if( dbg_flag ) { 1979 /* assemble subdomain vector on nodes */ 1980 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 1981 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1982 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1983 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 1984 array[ vertices[i] ] = one; 1985 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 1986 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1987 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 1988 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 1989 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1990 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 1991 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 1992 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 1993 if(n_constraints) { 1994 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 1995 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 1996 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 1997 } 1998 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 1999 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 2000 /* check saddle point solution */ 2001 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2002 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 2003 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 2004 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 2005 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2006 array[i]=array[i]+m_one; /* shift by the identity matrix */ 2007 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2008 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 2009 } 2010 } 2011 2012 for(i=0;i<n_constraints;i++){ 2013 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 2014 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 2015 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 2016 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 2017 /* solution of saddle point problem */ 2018 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 2019 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 2020 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2021 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 2022 /* Set values in coarse basis function and subdomain part of coarse_mat */ 2023 /* coarse basis functions */ 2024 index=i+n_vertices; 2025 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2026 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2027 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2028 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2029 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 2030 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2031 if( pcbddc->prec_type || dbg_flag ) { 2032 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2033 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2035 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 2036 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2037 } 2038 /* subdomain contribution to coarse matrix */ 2039 if(n_vertices) { 2040 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2041 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 2042 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2043 } 2044 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2045 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 2046 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2047 2048 if( dbg_flag ) { 2049 /* assemble subdomain vector on nodes */ 2050 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2051 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2052 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2053 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 2054 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2055 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2056 /* assemble subdomain vector of lagrange multipliers */ 2057 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 2058 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2059 if( n_vertices) { 2060 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2061 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 2062 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2063 } 2064 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2065 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 2066 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2067 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2068 /* check saddle point solution CACCA*/ 2069 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2070 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 2071 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 2072 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 2073 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2074 array[index]=array[index]+m_one; /* shift by the identity matrix */ 2075 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2076 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 2077 } 2078 } 2079 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 if( pcbddc->prec_type || dbg_flag ) { 2082 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2084 } 2085 /* Checking coarse_sub_mat and coarse basis functios */ 2086 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2087 if(dbg_flag) { 2088 2089 Mat coarse_sub_mat; 2090 Mat TM1,TM2,TM3,TM4; 2091 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 2092 const MatType checkmattype=MATSEQAIJ; 2093 PetscScalar value; 2094 2095 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2096 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2097 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2098 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2099 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2100 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2101 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2102 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 2103 2104 /*PetscViewer view_out; 2105 PetscMPIInt myrank; 2106 char filename[256]; 2107 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 2108 sprintf(filename,"coarsesubmat_%04d.m",myrank); 2109 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 2110 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2111 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 2112 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 2113 2114 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2115 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 2116 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2117 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2118 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2119 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2120 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2121 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2122 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2123 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2124 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2125 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2126 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2127 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2128 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2129 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 2130 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 2131 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 2132 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 2133 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 2134 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 2135 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 2136 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 2137 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2138 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2139 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2140 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2141 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2142 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2143 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2144 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2145 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2146 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2147 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2148 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2149 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 2150 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 2151 } 2152 2153 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 2154 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 2155 /* free memory */ 2156 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2157 ierr = PetscFree(auxindices);CHKERRQ(ierr); 2158 ierr = PetscFree(nnz);CHKERRQ(ierr); 2159 if(n_vertices) { 2160 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 2161 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 2162 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2163 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2164 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2165 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2166 } 2167 if(n_constraints) { 2168 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 2169 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 2170 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2171 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2172 } 2173 } 2174 /* free memory */ 2175 if(n_vertices) { 2176 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2177 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 2178 } 2179 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 2180 2181 PetscFunctionReturn(0); 2182 } 2183 2184 /* -------------------------------------------------------------------------- */ 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 2188 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 2189 { 2190 2191 2192 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2193 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2194 PC_IS *pcis = (PC_IS*)pc->data; 2195 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 2196 MPI_Comm coarse_comm; 2197 2198 /* common to all choiches */ 2199 PetscScalar *temp_coarse_mat_vals; 2200 PetscScalar *ins_coarse_mat_vals; 2201 PetscInt *ins_local_primal_indices; 2202 PetscMPIInt *localsizes2,*localdispl2; 2203 PetscMPIInt size_prec_comm; 2204 PetscMPIInt rank_prec_comm; 2205 PetscMPIInt active_rank=MPI_PROC_NULL; 2206 PetscMPIInt master_proc=0; 2207 PetscInt ins_local_primal_size; 2208 /* specific to MULTILEVEL_BDDC */ 2209 PetscMPIInt *ranks_recv; 2210 PetscMPIInt count_recv=0; 2211 PetscMPIInt rank_coarse_proc_send_to; 2212 PetscMPIInt coarse_color = MPI_UNDEFINED; 2213 ISLocalToGlobalMapping coarse_ISLG; 2214 /* some other variables */ 2215 PetscErrorCode ierr; 2216 const MatType coarse_mat_type; 2217 const PCType coarse_pc_type; 2218 const KSPType coarse_ksp_type; 2219 PC pc_temp; 2220 PetscInt i,j,k,bs; 2221 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2222 /* verbose output viewer */ 2223 PetscViewer viewer=pcbddc->dbg_viewer; 2224 PetscBool dbg_flag=pcbddc->dbg_flag; 2225 2226 PetscFunctionBegin; 2227 2228 ins_local_primal_indices = 0; 2229 ins_coarse_mat_vals = 0; 2230 localsizes2 = 0; 2231 localdispl2 = 0; 2232 temp_coarse_mat_vals = 0; 2233 coarse_ISLG = 0; 2234 2235 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2236 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2237 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2238 2239 /* Assign global numbering to coarse dofs */ 2240 { 2241 PetscScalar one=1.,zero=0.; 2242 PetscScalar *array; 2243 PetscMPIInt *auxlocal_primal; 2244 PetscMPIInt *auxglobal_primal; 2245 PetscMPIInt *all_auxglobal_primal; 2246 PetscMPIInt *all_auxglobal_primal_dummy; 2247 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2248 PetscInt *row_cmat_indices; 2249 PetscInt size_of_constraint; 2250 PetscScalar coarsesum; 2251 2252 /* Construct needed data structures for message passing */ 2253 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 2254 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2255 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2256 /* Gather local_primal_size information for all processes */ 2257 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2258 pcbddc->replicated_primal_size = 0; 2259 for (i=0; i<size_prec_comm; i++) { 2260 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2261 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2262 } 2263 if(rank_prec_comm == 0) { 2264 /* allocate some auxiliary space */ 2265 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 2266 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 2267 } 2268 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 2269 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 2270 2271 /* First let's count coarse dofs. 2272 This code fragment assumes that the number of local constraints per connected component 2273 is not greater than the number of nodes defined for the connected component 2274 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2275 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 2276 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2277 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2278 for(i=0;i<pcbddc->local_primal_size;i++) { 2279 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2280 for (j=0; j<size_of_constraint; j++) { 2281 k = row_cmat_indices[j]; 2282 if( array[k] == zero ) { 2283 array[k] = one; 2284 auxlocal_primal[i] = k; 2285 break; 2286 } 2287 } 2288 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2289 } 2290 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2291 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2292 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2293 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2294 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2295 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2296 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2297 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 2298 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2299 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2300 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2301 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2302 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2303 pcbddc->coarse_size = (PetscInt) coarsesum; 2304 2305 /* Now assign them a global numbering */ 2306 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 2307 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 2308 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 2309 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2310 2311 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 2312 /* It implements a function similar to PetscSortRemoveDupsInt */ 2313 if(rank_prec_comm==0) { 2314 /* dummy argument since PetscSortMPIInt doesn't exist! */ 2315 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 2316 k=1; 2317 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 2318 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 2319 if(j != all_auxglobal_primal[i] ) { 2320 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 2321 k++; 2322 j=all_auxglobal_primal[i]; 2323 } 2324 } 2325 } else { 2326 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 2327 } 2328 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 2329 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2330 2331 /* Now get global coarse numbering of local primal nodes */ 2332 for(i=0;i<pcbddc->local_primal_size;i++) { 2333 k=0; 2334 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 2335 pcbddc->local_primal_indices[i]=k; 2336 } 2337 if(dbg_flag) { 2338 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2339 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 2340 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2341 } 2342 /* free allocated memory */ 2343 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2344 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 2345 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 2346 if(rank_prec_comm == 0) { 2347 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 2348 } 2349 } 2350 2351 /* adapt coarse problem type */ 2352 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 2353 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2354 2355 switch(pcbddc->coarse_problem_type){ 2356 2357 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2358 { 2359 /* we need additional variables */ 2360 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2361 MetisInt *metis_coarse_subdivision; 2362 MetisInt options[METIS_NOPTIONS]; 2363 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2364 PetscMPIInt procs_jumps_coarse_comm; 2365 PetscMPIInt *coarse_subdivision; 2366 PetscMPIInt *total_count_recv; 2367 PetscMPIInt *total_ranks_recv; 2368 PetscMPIInt *displacements_recv; 2369 PetscMPIInt *my_faces_connectivity; 2370 PetscMPIInt *petsc_faces_adjncy; 2371 MetisInt *faces_adjncy; 2372 MetisInt *faces_xadj; 2373 PetscMPIInt *number_of_faces; 2374 PetscMPIInt *faces_displacements; 2375 PetscInt *array_int; 2376 PetscMPIInt my_faces=0; 2377 PetscMPIInt total_faces=0; 2378 PetscInt ranks_stretching_ratio; 2379 2380 /* define some quantities */ 2381 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2382 coarse_mat_type = MATIS; 2383 coarse_pc_type = PCBDDC; 2384 coarse_ksp_type = KSPCHEBYSHEV; 2385 2386 /* details of coarse decomposition */ 2387 n_subdomains = pcbddc->active_procs; 2388 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2389 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 2390 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2391 2392 /*printf("Coarse algorithm details: \n"); 2393 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 2394 2395 /* build CSR graph of subdomains' connectivity through faces */ 2396 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2397 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2398 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2399 for(j=0;j<pcis->n_shared[i];j++){ 2400 array_int[ pcis->shared[i][j] ]+=1; 2401 } 2402 } 2403 for(i=1;i<pcis->n_neigh;i++){ 2404 for(j=0;j<pcis->n_shared[i];j++){ 2405 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2406 my_faces++; 2407 break; 2408 } 2409 } 2410 } 2411 2412 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2413 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2414 my_faces=0; 2415 for(i=1;i<pcis->n_neigh;i++){ 2416 for(j=0;j<pcis->n_shared[i];j++){ 2417 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2418 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2419 my_faces++; 2420 break; 2421 } 2422 } 2423 } 2424 if(rank_prec_comm == master_proc) { 2425 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2426 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2427 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2428 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2429 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2430 } 2431 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2432 if(rank_prec_comm == master_proc) { 2433 faces_xadj[0]=0; 2434 faces_displacements[0]=0; 2435 j=0; 2436 for(i=1;i<size_prec_comm+1;i++) { 2437 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2438 if(number_of_faces[i-1]) { 2439 j++; 2440 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2441 } 2442 } 2443 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 2444 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 2445 } 2446 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2447 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2448 ierr = PetscFree(array_int);CHKERRQ(ierr); 2449 if(rank_prec_comm == master_proc) { 2450 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2451 /*printf("This is the face connectivity (actual ranks)\n"); 2452 for(i=0;i<n_subdomains;i++){ 2453 printf("proc %d is connected with \n",i); 2454 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2455 printf("%d ",faces_adjncy[j]); 2456 printf("\n"); 2457 }*/ 2458 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2459 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2460 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2461 } 2462 2463 if( rank_prec_comm == master_proc ) { 2464 2465 PetscInt heuristic_for_metis=3; 2466 2467 ncon=1; 2468 faces_nvtxs=n_subdomains; 2469 /* partition graoh induced by face connectivity */ 2470 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2471 ierr = METIS_SetDefaultOptions(options); 2472 /* we need a contiguous partition of the coarse mesh */ 2473 options[METIS_OPTION_CONTIG]=1; 2474 options[METIS_OPTION_DBGLVL]=1; 2475 options[METIS_OPTION_NITER]=30; 2476 if(n_subdomains>n_parts*heuristic_for_metis) { 2477 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2478 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2479 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2480 } else { 2481 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2482 } 2483 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2484 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2485 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2486 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2487 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2488 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2489 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2490 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2491 } 2492 2493 /* Create new communicator for coarse problem splitting the old one */ 2494 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2495 coarse_color=0; /* for communicator splitting */ 2496 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2497 } 2498 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2499 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2500 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2501 2502 if( coarse_color == 0 ) { 2503 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2504 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2505 /*printf("Details of coarse comm\n"); 2506 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2507 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 2508 } else { 2509 rank_coarse_comm = MPI_PROC_NULL; 2510 } 2511 2512 /* master proc take care of arranging and distributing coarse informations */ 2513 if(rank_coarse_comm == master_proc) { 2514 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2515 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2516 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 2517 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2518 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2519 /* some initializations */ 2520 displacements_recv[0]=0; 2521 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 2522 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2523 for(j=0;j<size_coarse_comm;j++) 2524 for(i=0;i<size_prec_comm;i++) 2525 if(coarse_subdivision[i]==j) 2526 total_count_recv[j]++; 2527 /* displacements needed for scatterv of total_ranks_recv */ 2528 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2529 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2530 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2531 for(j=0;j<size_coarse_comm;j++) { 2532 for(i=0;i<size_prec_comm;i++) { 2533 if(coarse_subdivision[i]==j) { 2534 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2535 total_count_recv[j]+=1; 2536 } 2537 } 2538 } 2539 /*for(j=0;j<size_coarse_comm;j++) { 2540 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2541 for(i=0;i<total_count_recv[j];i++) { 2542 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2543 } 2544 printf("\n"); 2545 }*/ 2546 2547 /* identify new decomposition in terms of ranks in the old communicator */ 2548 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2549 /*printf("coarse_subdivision in old end new ranks\n"); 2550 for(i=0;i<size_prec_comm;i++) 2551 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2552 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2553 } else { 2554 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2555 } 2556 printf("\n");*/ 2557 } 2558 2559 /* Scatter new decomposition for send details */ 2560 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2561 /* Scatter receiving details to members of coarse decomposition */ 2562 if( coarse_color == 0) { 2563 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2564 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2565 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2566 } 2567 2568 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2569 if(coarse_color == 0) { 2570 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2571 for(i=0;i<count_recv;i++) 2572 printf("%d ",ranks_recv[i]); 2573 printf("\n"); 2574 }*/ 2575 2576 if(rank_prec_comm == master_proc) { 2577 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2578 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2579 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 2580 free(coarse_subdivision); 2581 free(total_count_recv); 2582 free(total_ranks_recv); 2583 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2584 } 2585 break; 2586 } 2587 2588 case(REPLICATED_BDDC): 2589 2590 pcbddc->coarse_communications_type = GATHERS_BDDC; 2591 coarse_mat_type = MATSEQAIJ; 2592 coarse_pc_type = PCLU; 2593 coarse_ksp_type = KSPPREONLY; 2594 coarse_comm = PETSC_COMM_SELF; 2595 active_rank = rank_prec_comm; 2596 break; 2597 2598 case(PARALLEL_BDDC): 2599 2600 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2601 coarse_mat_type = MATMPIAIJ; 2602 coarse_pc_type = PCREDUNDANT; 2603 coarse_ksp_type = KSPPREONLY; 2604 coarse_comm = prec_comm; 2605 active_rank = rank_prec_comm; 2606 break; 2607 2608 case(SEQUENTIAL_BDDC): 2609 pcbddc->coarse_communications_type = GATHERS_BDDC; 2610 coarse_mat_type = MATSEQAIJ; 2611 coarse_pc_type = PCLU; 2612 coarse_ksp_type = KSPPREONLY; 2613 coarse_comm = PETSC_COMM_SELF; 2614 active_rank = master_proc; 2615 break; 2616 } 2617 2618 switch(pcbddc->coarse_communications_type){ 2619 2620 case(SCATTERS_BDDC): 2621 { 2622 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2623 2624 PetscMPIInt send_size; 2625 PetscInt *aux_ins_indices; 2626 PetscInt ii,jj; 2627 MPI_Request *requests; 2628 2629 /* allocate auxiliary space */ 2630 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2631 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2632 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2633 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2634 /* allocate stuffs for message massing */ 2635 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2636 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2637 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2638 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2639 /* fill up quantities */ 2640 j=0; 2641 for(i=0;i<count_recv;i++){ 2642 ii = ranks_recv[i]; 2643 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2644 localdispl2[i]=j; 2645 j+=localsizes2[i]; 2646 jj = pcbddc->local_primal_displacements[ii]; 2647 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 2648 } 2649 /*printf("aux_ins_indices 1\n"); 2650 for(i=0;i<pcbddc->coarse_size;i++) 2651 printf("%d ",aux_ins_indices[i]); 2652 printf("\n");*/ 2653 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2654 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2655 /* evaluate how many values I will insert in coarse mat */ 2656 ins_local_primal_size=0; 2657 for(i=0;i<pcbddc->coarse_size;i++) 2658 if(aux_ins_indices[i]) 2659 ins_local_primal_size++; 2660 /* evaluate indices I will insert in coarse mat */ 2661 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2662 j=0; 2663 for(i=0;i<pcbddc->coarse_size;i++) 2664 if(aux_ins_indices[i]) 2665 ins_local_primal_indices[j++]=i; 2666 /* use aux_ins_indices to realize a global to local mapping */ 2667 j=0; 2668 for(i=0;i<pcbddc->coarse_size;i++){ 2669 if(aux_ins_indices[i]==0){ 2670 aux_ins_indices[i]=-1; 2671 } else { 2672 aux_ins_indices[i]=j; 2673 j++; 2674 } 2675 } 2676 2677 /*printf("New details localsizes2 localdispl2\n"); 2678 for(i=0;i<count_recv;i++) 2679 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2680 printf("\n"); 2681 printf("aux_ins_indices 2\n"); 2682 for(i=0;i<pcbddc->coarse_size;i++) 2683 printf("%d ",aux_ins_indices[i]); 2684 printf("\n"); 2685 printf("ins_local_primal_indices\n"); 2686 for(i=0;i<ins_local_primal_size;i++) 2687 printf("%d ",ins_local_primal_indices[i]); 2688 printf("\n"); 2689 printf("coarse_submat_vals\n"); 2690 for(i=0;i<pcbddc->local_primal_size;i++) 2691 for(j=0;j<pcbddc->local_primal_size;j++) 2692 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2693 printf("\n");*/ 2694 2695 /* processes partecipating in coarse problem receive matrix data from their friends */ 2696 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2697 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2698 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2699 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2700 } 2701 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2702 2703 /*if(coarse_color == 0) { 2704 printf("temp_coarse_mat_vals\n"); 2705 for(k=0;k<count_recv;k++){ 2706 printf("---- %d ----\n",ranks_recv[k]); 2707 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2708 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2709 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2710 printf("\n"); 2711 } 2712 }*/ 2713 /* calculate data to insert in coarse mat */ 2714 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2715 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2716 2717 PetscMPIInt rr,kk,lps,lpd; 2718 PetscInt row_ind,col_ind; 2719 for(k=0;k<count_recv;k++){ 2720 rr = ranks_recv[k]; 2721 kk = localdispl2[k]; 2722 lps = pcbddc->local_primal_sizes[rr]; 2723 lpd = pcbddc->local_primal_displacements[rr]; 2724 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 2725 for(j=0;j<lps;j++){ 2726 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2727 for(i=0;i<lps;i++){ 2728 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2729 /*printf("%d %d\n",row_ind,col_ind);*/ 2730 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2731 } 2732 } 2733 } 2734 ierr = PetscFree(requests);CHKERRQ(ierr); 2735 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2736 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2737 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2738 2739 /* create local to global mapping needed by coarse MATIS */ 2740 { 2741 IS coarse_IS; 2742 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2743 coarse_comm = prec_comm; 2744 active_rank=rank_prec_comm; 2745 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2746 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2747 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2748 } 2749 } 2750 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2751 /* arrays for values insertion */ 2752 ins_local_primal_size = pcbddc->local_primal_size; 2753 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2754 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2755 for(j=0;j<ins_local_primal_size;j++){ 2756 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2757 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2758 } 2759 } 2760 break; 2761 2762 } 2763 2764 case(GATHERS_BDDC): 2765 { 2766 2767 PetscMPIInt mysize,mysize2; 2768 2769 if(rank_prec_comm==active_rank) { 2770 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2771 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2772 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2773 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2774 /* arrays for values insertion */ 2775 ins_local_primal_size = pcbddc->coarse_size; 2776 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2777 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2778 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2779 localdispl2[0]=0; 2780 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2781 j=0; 2782 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2783 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2784 } 2785 2786 mysize=pcbddc->local_primal_size; 2787 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2788 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2789 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2790 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2791 } else { 2792 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2793 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2794 } 2795 2796 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2797 if(rank_prec_comm==active_rank) { 2798 PetscInt offset,offset2,row_ind,col_ind; 2799 for(j=0;j<ins_local_primal_size;j++){ 2800 ins_local_primal_indices[j]=j; 2801 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2802 } 2803 for(k=0;k<size_prec_comm;k++){ 2804 offset=pcbddc->local_primal_displacements[k]; 2805 offset2=localdispl2[k]; 2806 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2807 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2808 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2809 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2810 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2811 } 2812 } 2813 } 2814 } 2815 break; 2816 }/* switch on coarse problem and communications associated with finished */ 2817 } 2818 2819 /* Now create and fill up coarse matrix */ 2820 if( rank_prec_comm == active_rank ) { 2821 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2822 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2823 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2824 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2825 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2826 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2827 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2828 } else { 2829 Mat matis_coarse_local_mat; 2830 /* remind bs */ 2831 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2832 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2833 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2834 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2835 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2836 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2837 } 2838 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2839 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2840 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2841 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2842 2843 /* PetscViewer view_out; 2844 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 2845 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2846 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 2847 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 2848 2849 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2850 /* Preconditioner for coarse problem */ 2851 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2852 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2853 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2854 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2855 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2856 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2857 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2858 /* Allow user's customization */ 2859 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 2860 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2861 /* Set Up PC for coarse problem BDDC */ 2862 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2863 if(dbg_flag) { 2864 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2865 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2866 } 2867 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2868 } 2869 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2870 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2871 if(dbg_flag) { 2872 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2873 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2874 } 2875 } 2876 } 2877 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2878 IS local_IS,global_IS; 2879 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2880 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2881 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2882 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2883 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2884 } 2885 2886 2887 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2888 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2889 PetscScalar m_one=-1.0; 2890 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2891 const KSPType check_ksp_type=KSPGMRES; 2892 2893 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2894 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2895 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2896 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2897 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2898 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2899 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2900 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2901 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2902 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2903 if(dbg_flag) { 2904 kappa_2=lambda_max/lambda_min; 2905 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2906 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2907 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2908 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2909 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2910 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2911 } 2912 /* restore coarse ksp to default values */ 2913 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2914 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2915 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2916 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2917 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2918 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2919 } 2920 2921 /* free data structures no longer needed */ 2922 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2923 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2924 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2925 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2926 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2927 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2928 2929 PetscFunctionReturn(0); 2930 } 2931 2932 #undef __FUNCT__ 2933 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2934 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2935 { 2936 2937 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2938 PC_IS *pcis = (PC_IS*)pc->data; 2939 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2940 PCBDDCGraph mat_graph=pcbddc->mat_graph; 2941 PetscInt *queue_in_global_numbering,*is_indices; 2942 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 2943 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 2944 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 2945 PetscBool same_set; 2946 MPI_Comm interface_comm=((PetscObject)pc)->comm; 2947 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 2948 const PetscInt *neumann_nodes; 2949 const PetscInt *dirichlet_nodes; 2950 IS used_IS,*custom_ISForDofs; 2951 PetscScalar *array; 2952 PetscScalar *array2; 2953 PetscViewer viewer=pcbddc->dbg_viewer; 2954 2955 PetscFunctionBegin; 2956 /* Setup local adjacency graph */ 2957 mat_graph->nvtxs=pcis->n; 2958 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 2959 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 2960 i = mat_graph->nvtxs; 2961 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 2962 ierr = PetscMalloc3(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched,i,PetscInt,&queue_in_global_numbering);CHKERRQ(ierr); 2963 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2964 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2965 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2966 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 2967 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2968 2969 /* Setting dofs splitting in mat_graph->which_dof 2970 Get information about dofs' splitting if provided by the user 2971 Otherwise it assumes a constant block size */ 2972 vertex_size=0; 2973 if(!pcbddc->n_ISForDofs) { 2974 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2975 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2976 for(i=0;i<bs;i++) { 2977 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2978 } 2979 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2980 vertex_size=1; 2981 /* remove my references to IS objects */ 2982 for(i=0;i<bs;i++) { 2983 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2984 } 2985 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2986 } 2987 for(i=0;i<pcbddc->n_ISForDofs;i++) { 2988 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 2989 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2990 for(j=0;j<k;j++) { 2991 mat_graph->which_dof[is_indices[j]]=i; 2992 } 2993 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 2994 } 2995 /* use mat block size as vertex size if it has not yet set */ 2996 if(!vertex_size) { 2997 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2998 } 2999 3000 /* count number of neigh per node */ 3001 total_counts=0; 3002 for(i=1;i<pcis->n_neigh;i++){ 3003 s=pcis->n_shared[i]; 3004 total_counts+=s; 3005 for(j=0;j<s;j++){ 3006 mat_graph->count[pcis->shared[i][j]] += 1; 3007 } 3008 } 3009 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 3010 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 3011 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3012 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3013 if(used_IS) { 3014 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 3015 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 3016 for(i=0;i<neumann_bsize;i++){ 3017 iindex = neumann_nodes[i]; 3018 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 3019 mat_graph->count[iindex]+=1; 3020 total_counts++; 3021 array[iindex]=array[iindex]+1.0; 3022 } else if(array[iindex]>0.0) { 3023 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 3024 } 3025 } 3026 } 3027 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3028 /* allocate space for storing the set of neighbours for each node */ 3029 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 3030 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 3031 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 3032 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3033 for(i=1;i<pcis->n_neigh;i++){ 3034 s=pcis->n_shared[i]; 3035 for(j=0;j<s;j++) { 3036 k=pcis->shared[i][j]; 3037 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 3038 mat_graph->count[k]+=1; 3039 } 3040 } 3041 /* Check consistency of Neumann nodes */ 3042 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3043 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3044 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3045 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3046 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3047 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3048 /* set -1 fake neighbour to mimic Neumann boundary */ 3049 if(used_IS) { 3050 for(i=0;i<neumann_bsize;i++){ 3051 iindex = neumann_nodes[i]; 3052 if(mat_graph->count[iindex] > NEUMANNCNT){ 3053 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 3054 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 3055 } 3056 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 3057 mat_graph->count[iindex]+=1; 3058 } 3059 } 3060 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 3061 } 3062 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3063 /* sort set of sharing subdomains */ 3064 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 3065 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 3066 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 3067 nodes_touched=0; 3068 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 3069 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 3070 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3071 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3072 if(used_IS) { 3073 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 3074 if(dirichlet_bsize && matis->pure_neumann) { 3075 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 3076 } 3077 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3078 for(i=0;i<dirichlet_bsize;i++){ 3079 iindex=dirichlet_nodes[i]; 3080 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 3081 if(array[iindex]>0.0) { 3082 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 3083 } 3084 mat_graph->touched[iindex]=PETSC_TRUE; 3085 mat_graph->where[iindex]=0; 3086 nodes_touched++; 3087 array2[iindex]=array2[iindex]+1.0; 3088 } 3089 } 3090 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3091 } 3092 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3093 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3094 /* Check consistency of Dirichlet nodes */ 3095 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3096 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3097 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3098 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3099 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3100 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3102 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3103 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3104 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3106 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3107 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3108 if(used_IS) { 3109 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 3110 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3111 for(i=0;i<dirichlet_bsize;i++){ 3112 iindex=dirichlet_nodes[i]; 3113 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 3114 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 3115 } 3116 } 3117 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3118 } 3119 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3120 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3121 3122 for(i=0;i<mat_graph->nvtxs;i++){ 3123 if(!mat_graph->count[i]){ /* interior nodes */ 3124 mat_graph->touched[i]=PETSC_TRUE; 3125 mat_graph->where[i]=0; 3126 nodes_touched++; 3127 } 3128 } 3129 mat_graph->ncmps = 0; 3130 i=0; 3131 while(nodes_touched<mat_graph->nvtxs) { 3132 /* find first untouched node in local ordering */ 3133 while(mat_graph->touched[i]) i++; 3134 mat_graph->touched[i]=PETSC_TRUE; 3135 mat_graph->where[i]=where_values; 3136 nodes_touched++; 3137 /* now find all other nodes having the same set of sharing subdomains */ 3138 for(j=i+1;j<mat_graph->nvtxs;j++){ 3139 /* check for same number of sharing subdomains and dof number */ 3140 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 3141 /* check for same set of sharing subdomains */ 3142 same_set=PETSC_TRUE; 3143 for(k=0;k<mat_graph->count[j];k++){ 3144 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 3145 same_set=PETSC_FALSE; 3146 } 3147 } 3148 /* I found a friend of mine */ 3149 if(same_set) { 3150 mat_graph->where[j]=where_values; 3151 mat_graph->touched[j]=PETSC_TRUE; 3152 nodes_touched++; 3153 } 3154 } 3155 } 3156 where_values++; 3157 } 3158 where_values--; if(where_values<0) where_values=0; 3159 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 3160 /* Find connected components defined on the shared interface */ 3161 if(where_values) { 3162 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 3163 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 3164 for(i=0;i<mat_graph->ncmps;i++) { 3165 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 3166 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 3167 } 3168 } 3169 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 3170 for(i=0;i<where_values;i++) { 3171 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 3172 if(mat_graph->where_ncmps[i]>1) { 3173 adapt_interface=1; 3174 break; 3175 } 3176 } 3177 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 3178 if(pcbddc->dbg_flag && adapt_interface_reduced) { 3179 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 3180 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3181 } 3182 if(where_values && adapt_interface_reduced) { 3183 3184 PetscInt sum_requests=0,my_rank; 3185 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 3186 PetscInt temp_buffer_size,ins_val,global_where_counter; 3187 PetscInt *cum_recv_counts; 3188 PetscInt *where_to_nodes_indices; 3189 PetscInt *petsc_buffer; 3190 PetscMPIInt *recv_buffer; 3191 PetscMPIInt *recv_buffer_where; 3192 PetscMPIInt *send_buffer; 3193 PetscMPIInt size_of_send; 3194 PetscInt *sizes_of_sends; 3195 MPI_Request *send_requests; 3196 MPI_Request *recv_requests; 3197 PetscInt *where_cc_adapt; 3198 PetscInt **temp_buffer; 3199 PetscInt *nodes_to_temp_buffer_indices; 3200 PetscInt *add_to_where; 3201 3202 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 3203 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 3204 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 3205 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 3206 /* first count how many neighbours per connected component I will receive from */ 3207 cum_recv_counts[0]=0; 3208 for(i=1;i<where_values+1;i++){ 3209 j=0; 3210 while(mat_graph->where[j] != i) j++; 3211 where_to_nodes_indices[i-1]=j; 3212 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 3213 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 3214 } 3215 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 3216 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 3217 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 3218 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 3219 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 3220 for(i=0;i<cum_recv_counts[where_values];i++) { 3221 send_requests[i]=MPI_REQUEST_NULL; 3222 recv_requests[i]=MPI_REQUEST_NULL; 3223 } 3224 /* exchange with my neighbours the number of my connected components on the shared interface */ 3225 for(i=0;i<where_values;i++){ 3226 j=where_to_nodes_indices[i]; 3227 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3228 for(;k<mat_graph->count[j];k++){ 3229 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3230 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3231 sum_requests++; 3232 } 3233 } 3234 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3235 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3236 /* determine the connected component I need to adapt */ 3237 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 3238 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 3239 for(i=0;i<where_values;i++){ 3240 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 3241 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 3242 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 3243 where_cc_adapt[i]=PETSC_TRUE; 3244 break; 3245 } 3246 } 3247 } 3248 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 3249 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 3250 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 3251 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 3252 sum_requests=0; 3253 start_of_send=0; 3254 start_of_recv=cum_recv_counts[where_values]; 3255 for(i=0;i<where_values;i++) { 3256 if(where_cc_adapt[i]) { 3257 size_of_send=0; 3258 for(j=i;j<mat_graph->ncmps;j++) { 3259 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 3260 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 3261 size_of_send+=1; 3262 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 3263 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 3264 } 3265 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 3266 } 3267 } 3268 j = where_to_nodes_indices[i]; 3269 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3270 sizes_of_sends[i]=size_of_send; 3271 for(;k<mat_graph->count[j];k++){ 3272 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3273 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3274 sum_requests++; 3275 } 3276 start_of_send+=size_of_send; 3277 } 3278 } 3279 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3280 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3281 buffer_size=0; 3282 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 3283 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 3284 /* now exchange the data */ 3285 start_of_recv=0; 3286 start_of_send=0; 3287 sum_requests=0; 3288 for(i=0;i<where_values;i++) { 3289 if(where_cc_adapt[i]) { 3290 size_of_send = sizes_of_sends[i]; 3291 j = where_to_nodes_indices[i]; 3292 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3293 for(;k<mat_graph->count[j];k++){ 3294 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3295 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 3296 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3297 start_of_recv+=size_of_recv; 3298 sum_requests++; 3299 } 3300 start_of_send+=size_of_send; 3301 } 3302 } 3303 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3304 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3305 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 3306 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 3307 for(j=0;j<buffer_size;) { 3308 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 3309 k=petsc_buffer[j]+1; 3310 j+=k; 3311 } 3312 sum_requests=cum_recv_counts[where_values]; 3313 start_of_recv=0; 3314 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 3315 global_where_counter=0; 3316 for(i=0;i<where_values;i++){ 3317 if(where_cc_adapt[i]){ 3318 temp_buffer_size=0; 3319 /* find nodes on the shared interface we need to adapt */ 3320 for(j=0;j<mat_graph->nvtxs;j++){ 3321 if(mat_graph->where[j]==i+1) { 3322 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 3323 temp_buffer_size++; 3324 } else { 3325 nodes_to_temp_buffer_indices[j]=-1; 3326 } 3327 } 3328 /* allocate some temporary space */ 3329 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 3330 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 3331 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 3332 for(j=1;j<temp_buffer_size;j++){ 3333 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 3334 } 3335 /* analyze contributions from neighbouring subdomains for i-th conn comp 3336 temp buffer structure: 3337 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 3338 3 neighs procs with structured connected components: 3339 neigh 0: [0 1 4], [2 3]; (2 connected components) 3340 neigh 1: [0 1], [2 3 4]; (2 connected components) 3341 neigh 2: [0 4], [1], [2 3]; (3 connected components) 3342 tempbuffer (row-oriented) should be filled as: 3343 [ 0, 0, 0; 3344 0, 0, 1; 3345 1, 1, 2; 3346 1, 1, 2; 3347 0, 1, 0; ]; 3348 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 3349 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 3350 */ 3351 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 3352 ins_val=0; 3353 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 3354 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 3355 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 3356 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 3357 } 3358 buffer_size+=k; 3359 ins_val++; 3360 } 3361 start_of_recv+=size_of_recv; 3362 sum_requests++; 3363 } 3364 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 3365 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 3366 for(j=0;j<temp_buffer_size;j++){ 3367 if(!add_to_where[j]){ /* found a new cc */ 3368 global_where_counter++; 3369 add_to_where[j]=global_where_counter; 3370 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 3371 same_set=PETSC_TRUE; 3372 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 3373 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 3374 same_set=PETSC_FALSE; 3375 break; 3376 } 3377 } 3378 if(same_set) add_to_where[k]=global_where_counter; 3379 } 3380 } 3381 } 3382 /* insert new data in where array */ 3383 temp_buffer_size=0; 3384 for(j=0;j<mat_graph->nvtxs;j++){ 3385 if(mat_graph->where[j]==i+1) { 3386 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 3387 temp_buffer_size++; 3388 } 3389 } 3390 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 3391 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 3392 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 3393 } 3394 } 3395 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 3396 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 3397 ierr = PetscFree(send_requests);CHKERRQ(ierr); 3398 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 3399 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 3400 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 3401 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 3402 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 3403 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 3404 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 3405 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 3406 if(global_where_counter) { 3407 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 3408 global_where_counter=0; 3409 for(i=0;i<mat_graph->nvtxs;i++){ 3410 if(mat_graph->where[i] && !mat_graph->touched[i]) { 3411 global_where_counter++; 3412 for(j=i+1;j<mat_graph->nvtxs;j++){ 3413 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 3414 mat_graph->where[j]=global_where_counter; 3415 mat_graph->touched[j]=PETSC_TRUE; 3416 } 3417 } 3418 mat_graph->where[i]=global_where_counter; 3419 mat_graph->touched[i]=PETSC_TRUE; 3420 } 3421 } 3422 where_values=global_where_counter; 3423 } 3424 if(global_where_counter) { 3425 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3426 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3427 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3428 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 3429 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 3430 for(i=0;i<mat_graph->ncmps;i++) { 3431 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 3432 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 3433 } 3434 } 3435 } /* Finished adapting interface */ 3436 PetscInt nfc=0; 3437 PetscInt nec=0; 3438 PetscInt nvc=0; 3439 PetscBool twodim_flag=PETSC_FALSE; 3440 for (i=0; i<mat_graph->ncmps; i++) { 3441 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3442 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 3443 nfc++; 3444 } else { /* note that nec will be zero in 2d */ 3445 nec++; 3446 } 3447 } else { 3448 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 3449 } 3450 } 3451 3452 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 3453 nec = nfc; 3454 nfc = 0; 3455 twodim_flag = PETSC_TRUE; 3456 } 3457 /* allocate IS arrays for faces, edges. Vertices need a single index set. 3458 Reusing space allocated in mat_graph->where for creating IS objects */ 3459 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 3460 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 3461 use_faces=PETSC_TRUE; 3462 } 3463 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 3464 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 3465 use_edges=PETSC_TRUE; 3466 } 3467 nfc=0; 3468 nec=0; 3469 for (i=0; i<mat_graph->ncmps; i++) { 3470 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3471 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 3472 mat_graph->where[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 3473 } 3474 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 3475 if(twodim_flag) { 3476 if(use_edges) { 3477 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3478 nec++; 3479 } 3480 } else { 3481 if(use_faces) { 3482 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 3483 nfc++; 3484 } 3485 } 3486 } else { 3487 if(use_edges) { 3488 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3489 nec++; 3490 } 3491 } 3492 } 3493 } 3494 pcbddc->n_ISForFaces=nfc; 3495 pcbddc->n_ISForEdges=nec; 3496 nvc=0; 3497 if( !pcbddc->constraints_flag ) { 3498 for (i=0; i<mat_graph->ncmps; i++) { 3499 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 3500 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 3501 mat_graph->where[nvc]=mat_graph->queue[j]; 3502 nvc++; 3503 } 3504 } 3505 } 3506 } 3507 /* sort vertex set (by local ordering) */ 3508 ierr = PetscSortInt(nvc,mat_graph->where);CHKERRQ(ierr); 3509 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,mat_graph->where,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 3510 3511 if(pcbddc->dbg_flag) { 3512 3513 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3514 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3515 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3516 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 3517 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3518 for(i=0;i<mat_graph->nvtxs;i++) { 3519 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3520 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3521 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3522 } 3523 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3524 }*/ 3525 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3526 for(i=0;i<mat_graph->ncmps;i++) { 3527 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3528 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3529 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 3530 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 3531 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 3532 } 3533 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 3534 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3535 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 3536 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 3537 } 3538 } 3539 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3540 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3541 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3542 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3543 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3544 } 3545 3546 /* Free graph structure */ 3547 if(mat_graph->nvtxs){ 3548 ierr = PetscFree4(mat_graph->where,mat_graph->count,mat_graph->cptr,mat_graph->queue);CHKERRQ(ierr); 3549 ierr = PetscFree3(mat_graph->which_dof,mat_graph->touched,queue_in_global_numbering);CHKERRQ(ierr); 3550 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3551 } 3552 3553 PetscFunctionReturn(0); 3554 3555 } 3556 3557 /* -------------------------------------------------------------------------- */ 3558 3559 /* The following code has been adapted from function IsConnectedSubdomain contained 3560 in source file contig.c of METIS library (version 5.0.1) 3561 It finds connected components of each partition labeled from 1 to n_dist */ 3562 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3565 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3566 { 3567 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3568 PetscInt *xadj, *adjncy, *where, *queue; 3569 PetscInt *cptr; 3570 PetscBool *touched; 3571 3572 PetscFunctionBegin; 3573 3574 nvtxs = graph->nvtxs; 3575 xadj = graph->xadj; 3576 adjncy = graph->adjncy; 3577 where = graph->where; 3578 touched = graph->touched; 3579 queue = graph->queue; 3580 cptr = graph->cptr; 3581 3582 for (i=0; i<nvtxs; i++) 3583 touched[i] = PETSC_FALSE; 3584 3585 cum_queue=0; 3586 ncmps=0; 3587 3588 for(n=0; n<n_dist; n++) { 3589 pid = n+1; /* partition labeled by 0 is discarded */ 3590 nleft = 0; 3591 for (i=0; i<nvtxs; i++) { 3592 if (where[i] == pid) 3593 nleft++; 3594 } 3595 for (i=0; i<nvtxs; i++) { 3596 if (where[i] == pid) 3597 break; 3598 } 3599 touched[i] = PETSC_TRUE; 3600 queue[cum_queue] = i; 3601 first = 0; last = 1; 3602 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3603 ncmps_pid = 0; 3604 while (first != nleft) { 3605 if (first == last) { /* Find another starting vertex */ 3606 cptr[++ncmps] = first+cum_queue; 3607 ncmps_pid++; 3608 for (i=0; i<nvtxs; i++) { 3609 if (where[i] == pid && !touched[i]) 3610 break; 3611 } 3612 queue[cum_queue+last] = i; 3613 last++; 3614 touched[i] = PETSC_TRUE; 3615 } 3616 i = queue[cum_queue+first]; 3617 first++; 3618 for (j=xadj[i]; j<xadj[i+1]; j++) { 3619 k = adjncy[j]; 3620 if (where[k] == pid && !touched[k]) { 3621 queue[cum_queue+last] = k; 3622 last++; 3623 touched[k] = PETSC_TRUE; 3624 } 3625 } 3626 } 3627 cptr[++ncmps] = first+cum_queue; 3628 ncmps_pid++; 3629 cum_queue=cptr[ncmps]; 3630 graph->where_ncmps[n] = ncmps_pid; 3631 } 3632 graph->ncmps = ncmps; 3633 3634 PetscFunctionReturn(0); 3635 } 3636 3637