1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 - change prec_type to switch_inexact_prec_type 5 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 6 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 7 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 8 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 9 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 10 code refactoring: 11 - pick up better names for static functions 12 change options structure: 13 - insert BDDC into MG framework? 14 provide other ops? Ask to developers 15 remove all unused printf 16 man pages 17 */ 18 19 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 20 Implementation of BDDC preconditioner based on: 21 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 22 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 23 24 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 25 #include <petscblaslapack.h> 26 /* -------------------------------------------------------------------------- */ 27 #undef __FUNCT__ 28 #define __FUNCT__ "PCSetFromOptions_BDDC" 29 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 30 { 31 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 32 PetscErrorCode ierr; 33 34 PetscFunctionBegin; 35 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 36 /* Verbose debugging of main data structures */ 37 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 38 /* Some customization for default primal space */ 39 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 40 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 41 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 42 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 43 /* Coarse solver context */ 44 static const char * const avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel","CoarseProblemType","PC_BDDC_",0}; /*order of choiches depends on ENUM defined in bddc.h */ 45 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 46 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 47 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 48 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 49 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 50 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 51 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 52 ierr = PetscOptionsInt("-pc_bddc_max_levels","Set maximum number of levels for multilevel","none",pcbddc->max_levels,&pcbddc->max_levels,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsTail();CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 /* -------------------------------------------------------------------------- */ 57 EXTERN_C_BEGIN 58 #undef __FUNCT__ 59 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 60 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 61 { 62 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 63 64 PetscFunctionBegin; 65 pcbddc->coarse_problem_type = CPT; 66 PetscFunctionReturn(0); 67 } 68 EXTERN_C_END 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 71 /*@ 72 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 73 74 Not collective 75 76 Input Parameters: 77 + pc - the preconditioning context 78 - CoarseProblemType - pick a better name and explain what this is 79 80 Level: intermediate 81 82 Notes: 83 Not collective but all procs must call with same arguments. 84 85 .seealso: PCBDDC 86 @*/ 87 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 88 { 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 93 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 /* -------------------------------------------------------------------------- */ 97 EXTERN_C_BEGIN 98 #undef __FUNCT__ 99 #define __FUNCT__ "PCBDDCSetCoarseningRatio_BDDC" 100 static PetscErrorCode PCBDDCSetCoarseningRatio_BDDC(PC pc,PetscInt k) 101 { 102 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 103 104 PetscFunctionBegin; 105 pcbddc->coarsening_ratio=k; 106 PetscFunctionReturn(0); 107 } 108 EXTERN_C_END 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCBDDCSetCoarseningRatio" 111 /*@ 112 PCBDDCSetCoarseningRatio - Set coarsening ratio used in multilevel coarsening 113 114 Logically collective on PC 115 116 Input Parameters: 117 + pc - the preconditioning context 118 - k - coarsening ratio 119 120 Approximatively k subdomains at the finer level will be aggregated into a single subdomain at the coarser level. 121 122 Level: intermediate 123 124 Notes: 125 126 .seealso: PCBDDC 127 @*/ 128 PetscErrorCode PCBDDCSetCoarseningRatio(PC pc,PetscInt k) 129 { 130 PetscErrorCode ierr; 131 132 PetscFunctionBegin; 133 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 134 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseningRatio_C",(PC,PetscInt),(pc,k));CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 /* -------------------------------------------------------------------------- */ 138 EXTERN_C_BEGIN 139 #undef __FUNCT__ 140 #define __FUNCT__ "PCBDDCSetMaxLevels_BDDC" 141 static PetscErrorCode PCBDDCSetMaxLevels_BDDC(PC pc,PetscInt max_levels) 142 { 143 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 144 145 PetscFunctionBegin; 146 pcbddc->max_levels=max_levels; 147 PetscFunctionReturn(0); 148 } 149 EXTERN_C_END 150 #undef __FUNCT__ 151 #define __FUNCT__ "PCBDDCSetMaxLevels" 152 /*@ 153 PCBDDCSetMaxLevels - Sets the maximum number of levels within the multilevel approach. 154 155 Logically collective on PC 156 157 Input Parameters: 158 + pc - the preconditioning context 159 - max_levels - the maximum number of levels 160 161 Default value is 1, i.e. coarse problem will be solved inexactly with one application 162 of PCBDDC preconditioner if the multilevel approach is requested. 163 164 Level: intermediate 165 166 Notes: 167 168 .seealso: PCBDDC 169 @*/ 170 PetscErrorCode PCBDDCSetMaxLevels(PC pc,PetscInt max_levels) 171 { 172 PetscErrorCode ierr; 173 174 PetscFunctionBegin; 175 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 176 ierr = PetscTryMethod(pc,"PCBDDCSetMaxLevels_C",(PC,PetscInt),(pc,max_levels));CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 /* -------------------------------------------------------------------------- */ 180 EXTERN_C_BEGIN 181 #undef __FUNCT__ 182 #define __FUNCT__ "PCBDDCSetNullSpace_BDDC" 183 static PetscErrorCode PCBDDCSetNullSpace_BDDC(PC pc,MatNullSpace NullSpace) 184 { 185 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = PetscObjectReference((PetscObject)NullSpace);CHKERRQ(ierr); 190 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 191 pcbddc->NullSpace=NullSpace; 192 PetscFunctionReturn(0); 193 } 194 EXTERN_C_END 195 #undef __FUNCT__ 196 #define __FUNCT__ "PCBDDCSetNullSpace" 197 /*@ 198 PCBDDCSetNullSpace - Set NullSpace of global operator of BDDC preconditioned mat. 199 200 Logically collective on PC and MatNullSpace 201 202 Input Parameters: 203 + pc - the preconditioning context 204 - NullSpace - Null space of the linear operator to be preconditioned. 205 206 Level: intermediate 207 208 Notes: 209 210 .seealso: PCBDDC 211 @*/ 212 PetscErrorCode PCBDDCSetNullSpace(PC pc,MatNullSpace NullSpace) 213 { 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 218 ierr = PetscTryMethod(pc,"PCBDDCSetNullSpace_C",(PC,MatNullSpace),(pc,NullSpace));CHKERRQ(ierr); 219 PetscFunctionReturn(0); 220 } 221 /* -------------------------------------------------------------------------- */ 222 EXTERN_C_BEGIN 223 #undef __FUNCT__ 224 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 225 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 226 { 227 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 228 PetscErrorCode ierr; 229 230 PetscFunctionBegin; 231 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 232 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 233 pcbddc->DirichletBoundaries=DirichletBoundaries; 234 PetscFunctionReturn(0); 235 } 236 EXTERN_C_END 237 #undef __FUNCT__ 238 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 239 /*@ 240 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 241 of Dirichlet boundaries for the global problem. 242 243 Not collective 244 245 Input Parameters: 246 + pc - the preconditioning context 247 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 248 249 Level: intermediate 250 251 Notes: 252 253 .seealso: PCBDDC 254 @*/ 255 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 256 { 257 PetscErrorCode ierr; 258 259 PetscFunctionBegin; 260 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 261 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 262 PetscFunctionReturn(0); 263 } 264 /* -------------------------------------------------------------------------- */ 265 EXTERN_C_BEGIN 266 #undef __FUNCT__ 267 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 268 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 269 { 270 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 271 PetscErrorCode ierr; 272 273 PetscFunctionBegin; 274 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 275 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 276 pcbddc->NeumannBoundaries=NeumannBoundaries; 277 PetscFunctionReturn(0); 278 } 279 EXTERN_C_END 280 #undef __FUNCT__ 281 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 282 /*@ 283 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 284 of Neumann boundaries for the global problem. 285 286 Not collective 287 288 Input Parameters: 289 + pc - the preconditioning context 290 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 291 292 Level: intermediate 293 294 Notes: 295 296 .seealso: PCBDDC 297 @*/ 298 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 299 { 300 PetscErrorCode ierr; 301 302 PetscFunctionBegin; 303 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 304 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 /* -------------------------------------------------------------------------- */ 308 EXTERN_C_BEGIN 309 #undef __FUNCT__ 310 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 311 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 312 { 313 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 314 315 PetscFunctionBegin; 316 *DirichletBoundaries = pcbddc->DirichletBoundaries; 317 PetscFunctionReturn(0); 318 } 319 EXTERN_C_END 320 #undef __FUNCT__ 321 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 322 /*@ 323 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 324 of Dirichlet boundaries for the global problem. 325 326 Not collective 327 328 Input Parameters: 329 + pc - the preconditioning context 330 331 Output Parameters: 332 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 333 334 Level: intermediate 335 336 Notes: 337 338 .seealso: PCBDDC 339 @*/ 340 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 341 { 342 PetscErrorCode ierr; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 346 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 /* -------------------------------------------------------------------------- */ 350 EXTERN_C_BEGIN 351 #undef __FUNCT__ 352 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 353 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 354 { 355 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 356 357 PetscFunctionBegin; 358 *NeumannBoundaries = pcbddc->NeumannBoundaries; 359 PetscFunctionReturn(0); 360 } 361 EXTERN_C_END 362 #undef __FUNCT__ 363 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 364 /*@ 365 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 366 of Neumann boundaries for the global problem. 367 368 Not collective 369 370 Input Parameters: 371 + pc - the preconditioning context 372 373 Output Parameters: 374 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 375 376 Level: intermediate 377 378 Notes: 379 380 .seealso: PCBDDC 381 @*/ 382 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 383 { 384 PetscErrorCode ierr; 385 386 PetscFunctionBegin; 387 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 388 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 389 PetscFunctionReturn(0); 390 } 391 /* -------------------------------------------------------------------------- */ 392 EXTERN_C_BEGIN 393 #undef __FUNCT__ 394 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 395 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode) 396 { 397 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 398 PCBDDCGraph mat_graph=pcbddc->mat_graph; 399 PetscErrorCode ierr; 400 401 PetscFunctionBegin; 402 mat_graph->nvtxs=nvtxs; 403 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 404 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 405 if (copymode == PETSC_COPY_VALUES) { 406 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 407 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 408 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 409 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 410 } else if (copymode == PETSC_OWN_POINTER) { 411 mat_graph->xadj = (PetscInt*)xadj; 412 mat_graph->adjncy = (PetscInt*)adjncy; 413 } else { 414 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 415 } 416 PetscFunctionReturn(0); 417 } 418 EXTERN_C_END 419 #undef __FUNCT__ 420 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 421 /*@ 422 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 423 424 Not collective 425 426 Input Parameters: 427 + pc - the preconditioning context 428 - nvtxs - number of local vertices of the graph 429 - xadj, adjncy - the CSR graph 430 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 431 in the latter case, memory must be obtained with PetscMalloc. 432 433 Level: intermediate 434 435 Notes: 436 437 .seealso: PCBDDC 438 @*/ 439 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,const PetscInt xadj[],const PetscInt adjncy[], PetscCopyMode copymode) 440 { 441 PetscInt nrows,ncols; 442 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 443 PetscErrorCode ierr; 444 445 PetscFunctionBegin; 446 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 447 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 448 if (nvtxs != nrows) { 449 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 450 } else { 451 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,const PetscInt[],const PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 452 } 453 PetscFunctionReturn(0); 454 } 455 /* -------------------------------------------------------------------------- */ 456 EXTERN_C_BEGIN 457 #undef __FUNCT__ 458 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 459 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 460 { 461 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 462 PetscInt i; 463 PetscErrorCode ierr; 464 465 PetscFunctionBegin; 466 /* Destroy ISes if they were already set */ 467 for (i=0;i<pcbddc->n_ISForDofs;i++) { 468 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 469 } 470 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 471 /* allocate space then set */ 472 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 473 for (i=0;i<n_is;i++) { 474 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 475 pcbddc->ISForDofs[i]=ISForDofs[i]; 476 } 477 pcbddc->n_ISForDofs=n_is; 478 PetscFunctionReturn(0); 479 } 480 EXTERN_C_END 481 #undef __FUNCT__ 482 #define __FUNCT__ "PCBDDCSetDofsSplitting" 483 /*@ 484 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 485 486 Not collective 487 488 Input Parameters: 489 + pc - the preconditioning context 490 - n - number of index sets defining the fields 491 - IS[] - array of IS describing the fields 492 493 Level: intermediate 494 495 Notes: 496 497 .seealso: PCBDDC 498 @*/ 499 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 500 { 501 PetscErrorCode ierr; 502 503 PetscFunctionBegin; 504 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 505 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 /* -------------------------------------------------------------------------- */ 509 #undef __FUNCT__ 510 #define __FUNCT__ "PCPreSolve_BDDC" 511 /* -------------------------------------------------------------------------- */ 512 /* 513 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 514 guess if a transformation of basis approach has been selected. 515 516 Input Parameter: 517 + pc - the preconditioner contex 518 519 Application Interface Routine: PCPreSolve() 520 521 Notes: 522 The interface routine PCPreSolve() is not usually called directly by 523 the user, but instead is called by KSPSolve(). 524 */ 525 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 526 { 527 PetscErrorCode ierr; 528 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 529 PC_IS *pcis = (PC_IS*)(pc->data); 530 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 531 Mat temp_mat; 532 IS dirIS; 533 PetscInt dirsize,i,*is_indices; 534 PetscScalar *array_x,*array_diagonal; 535 Vec used_vec; 536 PetscBool guess_nonzero; 537 538 PetscFunctionBegin; 539 if (x) { 540 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 541 used_vec = x; 542 } else { 543 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 544 used_vec = pcbddc->temp_solution; 545 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 546 } 547 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 548 if (ksp) { 549 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 550 if ( !guess_nonzero ) { 551 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 552 } 553 } 554 /* store the original rhs */ 555 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 556 557 /* Take into account zeroed rows -> change rhs and store solution removed */ 558 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 559 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 560 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 561 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 562 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 563 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 564 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 565 if (dirIS) { 566 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 567 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 568 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 569 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 570 for (i=0;i<dirsize;i++) { 571 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 572 } 573 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 574 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 575 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 576 } 577 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 578 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 579 580 /* remove the computed solution from the rhs */ 581 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 582 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 583 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 584 585 /* store partially computed solution and set initial guess */ 586 if (x) { 587 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 588 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 589 if (pcbddc->use_exact_dirichlet) { 590 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 591 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 592 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 593 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 594 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 595 if (ksp) { 596 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 597 } 598 } 599 } 600 601 /* rhs change of basis */ 602 if (pcbddc->usechangeofbasis) { 603 /* swap pointers for local matrices */ 604 temp_mat = matis->A; 605 matis->A = pcbddc->local_mat; 606 pcbddc->local_mat = temp_mat; 607 /* Get local rhs and apply transformation of basis */ 608 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 609 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 610 /* from original basis to modified basis */ 611 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 612 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 613 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 614 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 615 if (ksp && pcbddc->NullSpace) { 616 ierr = MatNullSpaceRemove(pcbddc->NullSpace,used_vec,PETSC_NULL);CHKERRQ(ierr); 617 ierr = MatNullSpaceRemove(pcbddc->NullSpace,rhs,PETSC_NULL);CHKERRQ(ierr); 618 } 619 } 620 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 /* -------------------------------------------------------------------------- */ 624 #undef __FUNCT__ 625 #define __FUNCT__ "PCPostSolve_BDDC" 626 /* -------------------------------------------------------------------------- */ 627 /* 628 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 629 approach has been selected. Also, restores rhs to its original state. 630 631 Input Parameter: 632 + pc - the preconditioner contex 633 634 Application Interface Routine: PCPostSolve() 635 636 Notes: 637 The interface routine PCPostSolve() is not usually called directly by 638 the user, but instead is called by KSPSolve(). 639 */ 640 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 641 { 642 PetscErrorCode ierr; 643 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 644 PC_IS *pcis = (PC_IS*)(pc->data); 645 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 646 Mat temp_mat; 647 648 PetscFunctionBegin; 649 if (pcbddc->usechangeofbasis) { 650 /* swap pointers for local matrices */ 651 temp_mat = matis->A; 652 matis->A = pcbddc->local_mat; 653 pcbddc->local_mat = temp_mat; 654 /* restore rhs to its original state */ 655 if (rhs) { 656 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 657 } 658 /* Get Local boundary and apply transformation of basis to solution vector */ 659 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 660 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 /* from modified basis to original basis */ 662 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 663 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 664 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 665 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 666 } 667 /* add solution removed in presolve */ 668 if (x) { 669 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 670 } 671 PetscFunctionReturn(0); 672 } 673 /* -------------------------------------------------------------------------- */ 674 #undef __FUNCT__ 675 #define __FUNCT__ "PCSetUp_BDDC" 676 /* -------------------------------------------------------------------------- */ 677 /* 678 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 679 by setting data structures and options. 680 681 Input Parameter: 682 + pc - the preconditioner context 683 684 Application Interface Routine: PCSetUp() 685 686 Notes: 687 The interface routine PCSetUp() is not usually called directly by 688 the user, but instead is called by PCApply() if necessary. 689 */ 690 PetscErrorCode PCSetUp_BDDC(PC pc) 691 { 692 PetscErrorCode ierr; 693 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 694 695 PetscFunctionBegin; 696 if (!pc->setupcalled) { 697 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 698 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 699 Also, we decide to directly build the (same) Dirichlet problem */ 700 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 701 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 702 /* Set up all the "iterative substructuring" common block */ 703 704 ierr = PCISSetUp(pc);CHKERRQ(ierr); 705 /* Get stdout for dbg */ 706 if (pcbddc->dbg_flag) { 707 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 708 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 709 } 710 /* Analyze local interface */ 711 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 712 /* Set up local constraint matrix */ 713 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 714 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 715 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 716 } 717 PetscFunctionReturn(0); 718 } 719 720 /* -------------------------------------------------------------------------- */ 721 /* 722 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 723 724 Input Parameters: 725 . pc - the preconditioner context 726 . r - input vector (global) 727 728 Output Parameter: 729 . z - output vector (global) 730 731 Application Interface Routine: PCApply() 732 */ 733 #undef __FUNCT__ 734 #define __FUNCT__ "PCApply_BDDC" 735 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 736 { 737 PC_IS *pcis = (PC_IS*)(pc->data); 738 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 739 PetscErrorCode ierr; 740 const PetscScalar one = 1.0; 741 const PetscScalar m_one = -1.0; 742 const PetscScalar zero = 0.0; 743 744 /* This code is similar to that provided in nn.c for PCNN 745 NN interface preconditioner changed to BDDC 746 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 747 748 PetscFunctionBegin; 749 if (!pcbddc->use_exact_dirichlet) { 750 /* First Dirichlet solve */ 751 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 753 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 754 /* 755 Assembling right hand side for BDDC operator 756 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 757 - the interface part of the global vector z 758 */ 759 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 760 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 761 if (pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 762 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 763 ierr = VecCopy(r,z);CHKERRQ(ierr); 764 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 765 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 766 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 767 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 768 } else { 769 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 771 ierr = VecSet(pcis->vec1_D,zero);CHKERRQ(ierr); 772 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 773 } 774 775 /* Apply partition of unity */ 776 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 777 778 /* Apply interface preconditioner 779 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 780 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 781 782 /* Apply partition of unity and sum boundary values */ 783 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 784 ierr = VecSet(z,zero);CHKERRQ(ierr); 785 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 786 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 787 788 /* Second Dirichlet solve and assembling of output */ 789 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 790 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 792 if (pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 793 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 794 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 795 if (pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 796 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 797 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 798 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 799 PetscFunctionReturn(0); 800 801 } 802 /* -------------------------------------------------------------------------- */ 803 #undef __FUNCT__ 804 #define __FUNCT__ "PCDestroy_BDDC" 805 PetscErrorCode PCDestroy_BDDC(PC pc) 806 { 807 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 808 PetscInt i; 809 PetscErrorCode ierr; 810 811 PetscFunctionBegin; 812 /* free data created by PCIS */ 813 ierr = PCISDestroy(pc);CHKERRQ(ierr); 814 /* free BDDC data */ 815 ierr = MatNullSpaceDestroy(&pcbddc->CoarseNullSpace);CHKERRQ(ierr); 816 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 817 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 818 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 819 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 820 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 821 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 822 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 823 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 824 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 825 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 826 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 827 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 828 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 829 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 830 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 831 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 832 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 833 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 834 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 835 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 836 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 837 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 838 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 839 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 840 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 841 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 842 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 843 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 844 ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 845 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 846 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 847 for (i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 848 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 849 for (i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 850 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 851 for (i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 852 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 853 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 854 /* Free graph structure */ 855 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 856 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 857 if (pcbddc->mat_graph->nvtxs) { 858 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 859 } 860 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 861 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 862 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 863 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 864 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 865 /* remove functions */ 866 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseningRatio_C","",PETSC_NULL);CHKERRQ(ierr); 867 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetMaxLevels_C","",PETSC_NULL);CHKERRQ(ierr); 868 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNullSpace_C","",PETSC_NULL);CHKERRQ(ierr); 869 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 870 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 871 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 872 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 873 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 874 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 875 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 876 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 877 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 878 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 879 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 881 /* Free the private data structure that was hanging off the PC */ 882 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 /* -------------------------------------------------------------------------- */ 886 EXTERN_C_BEGIN 887 #undef __FUNCT__ 888 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 889 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 890 { 891 FETIDPMat_ctx *mat_ctx; 892 PC_IS* pcis; 893 PC_BDDC* pcbddc; 894 PetscErrorCode ierr; 895 896 PetscFunctionBegin; 897 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 898 pcis = (PC_IS*)mat_ctx->pc->data; 899 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 900 901 /* change of basis for physical rhs if needed 902 It also changes the rhs in case of dirichlet boundaries */ 903 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 904 /* store vectors for computation of fetidp final solution */ 905 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 /* scale rhs since it should be unassembled */ 910 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 911 if (!pcbddc->prec_type) { 912 /* compute partially subassembled Schur complement right-hand side */ 913 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 914 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 915 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 916 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 917 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 918 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 919 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 922 } 923 /* BDDC rhs */ 924 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 925 if (pcbddc->prec_type) { 926 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 927 } 928 /* apply BDDC */ 929 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 930 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 931 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 932 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 933 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 934 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 /* restore original rhs */ 936 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 937 PetscFunctionReturn(0); 938 } 939 EXTERN_C_END 940 #undef __FUNCT__ 941 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 942 /*@ 943 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 944 945 Collective 946 947 Input Parameters: 948 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 949 + standard_rhs - the rhs of your linear system 950 951 Output Parameters: 952 + fetidp_flux_rhs - the rhs of the FETIDP linear system 953 954 Level: developer 955 956 Notes: 957 958 .seealso: PCBDDC 959 @*/ 960 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 961 { 962 FETIDPMat_ctx *mat_ctx; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 967 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 /* -------------------------------------------------------------------------- */ 971 EXTERN_C_BEGIN 972 #undef __FUNCT__ 973 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 974 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 975 { 976 FETIDPMat_ctx *mat_ctx; 977 PC_IS* pcis; 978 PC_BDDC* pcbddc; 979 PetscErrorCode ierr; 980 981 PetscFunctionBegin; 982 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 983 pcis = (PC_IS*)mat_ctx->pc->data; 984 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 985 986 /* apply B_delta^T */ 987 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 988 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 989 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 990 /* compute rhs for BDDC application */ 991 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 992 if (pcbddc->prec_type) { 993 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 994 } 995 /* apply BDDC */ 996 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 997 /* put values into standard global vector */ 998 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1000 if (!pcbddc->prec_type) { 1001 /* compute values into the interior if solved for the partially subassembled Schur complement */ 1002 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 1003 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 1004 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 1005 } 1006 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1007 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 /* final change of basis if needed 1009 Is also sums the dirichlet part removed during RHS assembling */ 1010 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 1011 PetscFunctionReturn(0); 1012 1013 } 1014 EXTERN_C_END 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 1017 /*@ 1018 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 1019 1020 Collective 1021 1022 Input Parameters: 1023 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 1024 + fetidp_flux_sol - the solution of the FETIDP linear system 1025 1026 Output Parameters: 1027 + standard_sol - the solution on the global domain 1028 1029 Level: developer 1030 1031 Notes: 1032 1033 .seealso: PCBDDC 1034 @*/ 1035 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 1036 { 1037 FETIDPMat_ctx *mat_ctx; 1038 PetscErrorCode ierr; 1039 1040 PetscFunctionBegin; 1041 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 1042 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 1043 PetscFunctionReturn(0); 1044 } 1045 /* -------------------------------------------------------------------------- */ 1046 EXTERN_C_BEGIN 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 1049 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 1050 { 1051 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 1052 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 1053 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 1054 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 1055 1056 FETIDPMat_ctx *fetidpmat_ctx; 1057 Mat newmat; 1058 FETIDPPC_ctx *fetidppc_ctx; 1059 PC newpc; 1060 MPI_Comm comm = ((PetscObject)pc)->comm; 1061 PetscErrorCode ierr; 1062 1063 PetscFunctionBegin; 1064 /* FETIDP linear matrix */ 1065 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 1066 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 1067 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 1068 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 1069 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 1070 ierr = MatSetUp(newmat);CHKERRQ(ierr); 1071 /* FETIDP preconditioner */ 1072 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 1073 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 1074 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 1075 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 1076 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 1077 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 1078 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 1079 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1080 ierr = PCSetUp(newpc);CHKERRQ(ierr); 1081 /* return pointers for objects created */ 1082 *fetidp_mat=newmat; 1083 *fetidp_pc=newpc; 1084 1085 PetscFunctionReturn(0); 1086 } 1087 EXTERN_C_END 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 1090 /*@ 1091 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 1092 1093 Collective 1094 1095 Input Parameters: 1096 + pc - the BDDC preconditioning context (setup must be already called) 1097 1098 Level: developer 1099 1100 Notes: 1101 1102 .seealso: PCBDDC 1103 @*/ 1104 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 1105 { 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1110 if (pc->setupcalled) { 1111 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 1112 } else { 1113 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 /* -------------------------------------------------------------------------- */ 1118 /*MC 1119 PCBDDC - Balancing Domain Decomposition by Constraints. 1120 1121 Options Database Keys: 1122 . -pcbddc ??? - 1123 1124 Level: intermediate 1125 1126 Notes: The matrix used with this preconditioner must be of type MATIS 1127 1128 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 1129 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 1130 on the subdomains). 1131 1132 Options for the coarse grid preconditioner can be set with - 1133 Options for the Dirichlet subproblem can be set with - 1134 Options for the Neumann subproblem can be set with - 1135 1136 Contributed by Stefano Zampini 1137 1138 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1139 M*/ 1140 EXTERN_C_BEGIN 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "PCCreate_BDDC" 1143 PetscErrorCode PCCreate_BDDC(PC pc) 1144 { 1145 PetscErrorCode ierr; 1146 PC_BDDC *pcbddc; 1147 PCBDDCGraph mat_graph; 1148 1149 PetscFunctionBegin; 1150 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1151 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1152 pc->data = (void*)pcbddc; 1153 1154 /* create PCIS data structure */ 1155 ierr = PCISCreate(pc);CHKERRQ(ierr); 1156 1157 /* BDDC specific */ 1158 pcbddc->CoarseNullSpace = 0; 1159 pcbddc->NullSpace = 0; 1160 pcbddc->temp_solution = 0; 1161 pcbddc->original_rhs = 0; 1162 pcbddc->local_mat = 0; 1163 pcbddc->ChangeOfBasisMatrix = 0; 1164 pcbddc->usechangeofbasis = PETSC_TRUE; 1165 pcbddc->usechangeonfaces = PETSC_FALSE; 1166 pcbddc->coarse_vec = 0; 1167 pcbddc->coarse_rhs = 0; 1168 pcbddc->coarse_ksp = 0; 1169 pcbddc->coarse_phi_B = 0; 1170 pcbddc->coarse_phi_D = 0; 1171 pcbddc->vec1_P = 0; 1172 pcbddc->vec1_R = 0; 1173 pcbddc->vec2_R = 0; 1174 pcbddc->local_auxmat1 = 0; 1175 pcbddc->local_auxmat2 = 0; 1176 pcbddc->R_to_B = 0; 1177 pcbddc->R_to_D = 0; 1178 pcbddc->ksp_D = 0; 1179 pcbddc->ksp_R = 0; 1180 pcbddc->local_primal_indices = 0; 1181 pcbddc->prec_type = PETSC_FALSE; 1182 pcbddc->NeumannBoundaries = 0; 1183 pcbddc->ISForDofs = 0; 1184 pcbddc->ISForVertices = 0; 1185 pcbddc->n_ISForFaces = 0; 1186 pcbddc->n_ISForEdges = 0; 1187 pcbddc->ConstraintMatrix = 0; 1188 pcbddc->use_nnsp_true = PETSC_FALSE; 1189 pcbddc->local_primal_sizes = 0; 1190 pcbddc->local_primal_displacements = 0; 1191 pcbddc->replicated_local_primal_indices = 0; 1192 pcbddc->replicated_local_primal_values = 0; 1193 pcbddc->coarse_loc_to_glob = 0; 1194 pcbddc->dbg_flag = PETSC_FALSE; 1195 pcbddc->coarsening_ratio = 8; 1196 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1197 pcbddc->current_level = 0; 1198 pcbddc->max_levels = 1; 1199 1200 /* allocate and initialize needed graph structure */ 1201 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1202 pcbddc->mat_graph->xadj = 0; 1203 pcbddc->mat_graph->adjncy = 0; 1204 1205 /* function pointers */ 1206 pc->ops->apply = PCApply_BDDC; 1207 pc->ops->applytranspose = 0; 1208 pc->ops->setup = PCSetUp_BDDC; 1209 pc->ops->destroy = PCDestroy_BDDC; 1210 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1211 pc->ops->view = 0; 1212 pc->ops->applyrichardson = 0; 1213 pc->ops->applysymmetricleft = 0; 1214 pc->ops->applysymmetricright = 0; 1215 pc->ops->presolve = PCPreSolve_BDDC; 1216 pc->ops->postsolve = PCPostSolve_BDDC; 1217 1218 /* composing function */ 1219 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseningRatio_C","PCBDDCSetCoarseningRatio_BDDC", 1220 PCBDDCSetCoarseningRatio_BDDC);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetMaxLevels_C","PCBDDCSetMaxLevels_BDDC", 1222 PCBDDCSetMaxLevels_BDDC);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNullSpace_C","PCBDDCSetNullSpace_BDDC", 1224 PCBDDCSetNullSpace_BDDC);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1226 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1228 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1230 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1232 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1234 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1236 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1238 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1240 PCPreSolve_BDDC);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1242 PCPostSolve_BDDC);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1244 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1246 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1248 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 EXTERN_C_END 1252 1253 /* -------------------------------------------------------------------------- */ 1254 /* All static functions from now on */ 1255 /* -------------------------------------------------------------------------- */ 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "PCBDDCSetUseExactDirichlet" 1258 static PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use) 1259 { 1260 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1261 1262 PetscFunctionBegin; 1263 pcbddc->use_exact_dirichlet=use; 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "PCBDDCSetLevel" 1269 static PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level) 1270 { 1271 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1272 1273 PetscFunctionBegin; 1274 pcbddc->current_level=level; 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "PCBDDCAdaptNullSpace" 1280 static PetscErrorCode PCBDDCAdaptNullSpace(PC pc) 1281 { 1282 PC_IS* pcis = (PC_IS*) (pc->data); 1283 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1284 KSP inv_change; 1285 PC pc_change; 1286 const Vec *nsp_vecs; 1287 Vec *new_nsp_vecs; 1288 PetscInt i,nsp_size,new_nsp_size,start_new; 1289 PetscBool nsp_has_cnst; 1290 MatNullSpace new_nsp; 1291 PetscErrorCode ierr; 1292 1293 PetscFunctionBegin; 1294 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); 1295 ierr = KSPCreate(PETSC_COMM_SELF,&inv_change);CHKERRQ(ierr); 1296 ierr = KSPSetOperators(inv_change,pcbddc->ChangeOfBasisMatrix,pcbddc->ChangeOfBasisMatrix,SAME_PRECONDITIONER);CHKERRQ(ierr); 1297 ierr = KSPSetType(inv_change,KSPPREONLY);CHKERRQ(ierr); 1298 ierr = KSPGetPC(inv_change,&pc_change);CHKERRQ(ierr); 1299 ierr = PCSetType(pc_change,PCLU);CHKERRQ(ierr); 1300 ierr = KSPSetUp(inv_change);CHKERRQ(ierr); 1301 new_nsp_size = nsp_size; 1302 if (nsp_has_cnst) { new_nsp_size++; } 1303 ierr = PetscMalloc(new_nsp_size*sizeof(Vec),&new_nsp_vecs);CHKERRQ(ierr); 1304 for (i=0;i<new_nsp_size;i++) { ierr = VecDuplicate(pcis->vec1_global,&new_nsp_vecs[i]);CHKERRQ(ierr); } 1305 start_new = 0; 1306 if (nsp_has_cnst) { 1307 start_new = 1; 1308 ierr = VecSet(new_nsp_vecs[0],1.0);CHKERRQ(ierr); 1309 ierr = VecSet(pcis->vec1_B,1.0);CHKERRQ(ierr); 1310 ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); 1311 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1312 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[0],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1313 } 1314 for (i=0;i<nsp_size;i++) { 1315 ierr = VecCopy(nsp_vecs[i],new_nsp_vecs[i+start_new]);CHKERRQ(ierr); 1316 ierr = VecScatterBegin(pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1317 ierr = VecScatterEnd (pcis->global_to_B,nsp_vecs[i],pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1318 ierr = KSPSolve(inv_change,pcis->vec1_B,pcis->vec1_B); 1319 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1320 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,new_nsp_vecs[i+start_new],INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1321 } 1322 ierr = VecNormalize(new_nsp_vecs[0],PETSC_NULL);CHKERRQ(ierr); 1323 /* TODO : Orthonormalize vecs when new_nsp_size > 0! */ 1324 1325 /*PetscBool nsp_t=PETSC_FALSE; 1326 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1327 printf("Original Null Space test: %d\n",nsp_t); 1328 Mat temp_mat; 1329 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1330 temp_mat = matis->A; 1331 matis->A = pcbddc->local_mat; 1332 pcbddc->local_mat = temp_mat; 1333 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1334 printf("Original Null Space, mat changed test: %d\n",nsp_t); 1335 { 1336 PetscReal test_norm; 1337 for (i=0;i<new_nsp_size;i++) { 1338 ierr = MatMult(pc->pmat,new_nsp_vecs[i],pcis->vec1_global);CHKERRQ(ierr); 1339 ierr = VecNorm(pcis->vec1_global,NORM_2,&test_norm);CHKERRQ(ierr); 1340 if (test_norm > 1.e-12) { 1341 printf("------------ERROR VEC %d------------------\n",i); 1342 ierr = VecView(pcis->vec1_global,PETSC_VIEWER_STDOUT_WORLD); 1343 printf("------------------------------------------\n"); 1344 } 1345 } 1346 }*/ 1347 1348 ierr = KSPDestroy(&inv_change);CHKERRQ(ierr); 1349 ierr = MatNullSpaceCreate(((PetscObject)pc)->comm,PETSC_FALSE,new_nsp_size,new_nsp_vecs,&new_nsp);CHKERRQ(ierr); 1350 ierr = PCBDDCSetNullSpace(pc,new_nsp);CHKERRQ(ierr); 1351 ierr = MatNullSpaceDestroy(&new_nsp);CHKERRQ(ierr); 1352 /* 1353 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1354 printf("New Null Space, mat changed: %d\n",nsp_t); 1355 temp_mat = matis->A; 1356 matis->A = pcbddc->local_mat; 1357 pcbddc->local_mat = temp_mat; 1358 ierr = MatNullSpaceTest(pcbddc->NullSpace,pc->pmat,&nsp_t);CHKERRQ(ierr); 1359 printf("New Null Space, mat original: %d\n",nsp_t);*/ 1360 1361 for (i=0;i<new_nsp_size;i++) { ierr = VecDestroy(&new_nsp_vecs[i]);CHKERRQ(ierr); } 1362 ierr = PetscFree(new_nsp_vecs);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1368 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1369 { 1370 FETIDPMat_ctx *newctx; 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1375 newctx->lambda_local = 0; 1376 newctx->temp_solution_B = 0; 1377 newctx->temp_solution_D = 0; 1378 newctx->B_delta = 0; 1379 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1380 newctx->l2g_lambda = 0; 1381 /* increase the reference count for BDDC preconditioner */ 1382 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1383 newctx->pc = pc; 1384 *fetidpmat_ctx = newctx; 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1390 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1391 { 1392 FETIDPPC_ctx *newctx; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1397 newctx->lambda_local = 0; 1398 newctx->B_Ddelta = 0; 1399 newctx->l2g_lambda = 0; 1400 /* increase the reference count for BDDC preconditioner */ 1401 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1402 newctx->pc = pc; 1403 *fetidppc_ctx = newctx; 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1409 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1410 { 1411 FETIDPMat_ctx *mat_ctx; 1412 PetscErrorCode ierr; 1413 1414 PetscFunctionBegin; 1415 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1416 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1417 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1418 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1419 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1420 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1421 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1422 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1423 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1429 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1430 { 1431 FETIDPPC_ctx *pc_ctx; 1432 PetscErrorCode ierr; 1433 1434 PetscFunctionBegin; 1435 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1436 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1437 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1438 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1439 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1440 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1446 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1447 { 1448 PetscErrorCode ierr; 1449 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1450 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1451 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1452 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1453 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1454 1455 Mat ScalingMat; 1456 Vec lambda_global; 1457 IS IS_l2g_lambda; 1458 1459 PetscBool skip_node,fully_redundant; 1460 PetscInt i,j,k,s,n_boundary_dofs,n_global_lambda,n_vertices,partial_sum; 1461 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1462 PetscMPIInt rank,nprocs; 1463 PetscScalar scalar_value; 1464 1465 PetscInt *vertex_indices,*temp_indices; 1466 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1467 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1468 PetscScalar *array,*scaling_factors,*vals_B_delta; 1469 PetscInt *aux_local_numbering_2,*dof_sizes,*dof_displs; 1470 PetscInt first_index,old_index; 1471 PetscBool first_found = PETSC_FALSE; 1472 1473 /* For communication of scaling factors */ 1474 PetscInt *ptrs_buffer,neigh_position; 1475 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1476 MPI_Request *send_reqs,*recv_reqs; 1477 1478 /* tests */ 1479 Vec test_vec; 1480 PetscBool test_fetidp; 1481 PetscViewer viewer; 1482 1483 PetscFunctionBegin; 1484 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1485 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1486 1487 /* Default type of lagrange multipliers is non-redundant */ 1488 fully_redundant = PETSC_FALSE; 1489 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1490 1491 /* Evaluate local and global number of lagrange multipliers */ 1492 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1493 n_local_lambda = 0; 1494 partial_sum = 0; 1495 n_boundary_dofs = 0; 1496 s = 0; 1497 n_vertices = 0; 1498 /* Get Vertices used to define the BDDC */ 1499 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1500 for (i=0;i<pcbddc->local_primal_size;i++) { 1501 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1502 if (j == 1) { 1503 vertex_indices[n_vertices]=temp_indices[0]; 1504 n_vertices++; 1505 } 1506 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1507 } 1508 dual_size = pcis->n_B-n_vertices; 1509 ierr = PetscSortInt(n_vertices,vertex_indices);CHKERRQ(ierr); 1510 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1511 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1512 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1513 1514 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1515 for (i=0;i<pcis->n;i++){ 1516 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1517 k = 0; 1518 if (j > 0) { 1519 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1520 } 1521 j = j - k ; 1522 if ( j > 0 ) { n_boundary_dofs++; } 1523 1524 skip_node = PETSC_FALSE; 1525 if ( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1526 skip_node = PETSC_TRUE; 1527 s++; 1528 } 1529 if (j < 1) {skip_node = PETSC_TRUE;} 1530 if ( !skip_node ) { 1531 if (fully_redundant) { 1532 /* fully redundant set of lagrange multipliers */ 1533 n_lambda_for_dof = (j*(j+1))/2; 1534 } else { 1535 n_lambda_for_dof = j; 1536 } 1537 n_local_lambda += j; 1538 /* needed to evaluate global number of lagrange multipliers */ 1539 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1540 /* store some data needed */ 1541 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1542 aux_local_numbering_1[partial_sum] = i; 1543 aux_local_numbering_2[partial_sum] = n_lambda_for_dof; 1544 partial_sum++; 1545 } 1546 } 1547 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1548 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1549 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1550 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1551 1552 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1553 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1554 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1555 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1556 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1557 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1558 1559 /* compute global ordering of lagrange multipliers and associate l2g map */ 1560 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1561 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1562 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1563 for (i=0;i<dual_size;i++) { 1564 array[aux_local_numbering_1[i]] = aux_local_numbering_2[i]; 1565 } 1566 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1567 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1568 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1569 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1570 if (pcbddc->dbg_flag && (PetscInt)scalar_value != fetidpmat_ctx->n_lambda) { 1571 SETERRQ3(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Error in %s: global number of multipliers mismatch! (%d!=%d)\n",__FUNCT__,(PetscInt)scalar_value,fetidpmat_ctx->n_lambda); 1572 } 1573 1574 /* Fill pcis->vec1_global with cumulative function for global numbering */ 1575 ierr = VecGetArray(pcis->vec1_global,&array);CHKERRQ(ierr); 1576 ierr = VecGetLocalSize(pcis->vec1_global,&s);CHKERRQ(ierr); 1577 k = 0; 1578 first_index = -1; 1579 for (i=0;i<s;i++) { 1580 if (!first_found && array[i] > 0.0) { 1581 first_found = PETSC_TRUE; 1582 first_index = i; 1583 } 1584 k += (PetscInt)array[i]; 1585 } 1586 j = ( !rank ? nprocs : 0); 1587 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1588 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1589 ierr = MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1590 if (!rank) { 1591 dof_displs[0]=0; 1592 for (i=1;i<nprocs;i++) { 1593 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1594 } 1595 } 1596 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1597 if (first_found) { 1598 array[first_index] += k; 1599 old_index = first_index; 1600 for (i=first_index+1;i<s;i++) { 1601 if (array[i] > 0.0) { 1602 array[i] += array[old_index]; 1603 old_index = i; 1604 } 1605 } 1606 } 1607 ierr = VecRestoreArray(pcis->vec1_global,&array);CHKERRQ(ierr); 1608 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1609 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1612 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1613 for (i=0;i<dual_size;i++) { 1614 aux_global_numbering[i] = (PetscInt)array[aux_local_numbering_1[i]]-aux_local_numbering_2[i]; 1615 } 1616 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1617 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1618 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1619 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1620 1621 /* init data for scaling factors exchange */ 1622 partial_sum = 0; 1623 j = 0; 1624 ierr = PetscMalloc(pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1625 ierr = PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1626 ierr = PetscMalloc((pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1627 ierr = PetscMalloc(pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1628 ptrs_buffer[0]=0; 1629 for (i=1;i<pcis->n_neigh;i++) { 1630 partial_sum += pcis->n_shared[i]; 1631 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1632 } 1633 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1634 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1635 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1636 for (i=0;i<pcis->n-1;i++) { 1637 j = mat_graph->count[i]; 1638 if (j>0) { 1639 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1640 j = j - k; 1641 } 1642 all_factors[i+1]=all_factors[i]+j; 1643 } 1644 /* scatter B scaling to N vec */ 1645 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1646 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1647 /* communications */ 1648 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1649 for (i=1;i<pcis->n_neigh;i++) { 1650 for (j=0;j<pcis->n_shared[i];j++) { 1651 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1652 } 1653 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1654 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[i-1]);CHKERRQ(ierr); 1655 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); 1656 } 1657 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1658 ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1659 /* put values in correct places */ 1660 for (i=1;i<pcis->n_neigh;i++) { 1661 for (j=0;j<pcis->n_shared[i];j++) { 1662 k = pcis->shared[i][j]; 1663 neigh_position = 0; 1664 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1665 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1666 neigh_position = neigh_position - s; 1667 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1668 } 1669 } 1670 ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1671 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1672 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1673 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1674 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1675 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1676 1677 /* Compute B and B_delta (local actions) */ 1678 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1679 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1680 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1681 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1682 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1683 n_global_lambda=0; 1684 partial_sum=0; 1685 for (i=0;i<dual_size;i++) { 1686 n_global_lambda = aux_global_numbering[i]; 1687 j = mat_graph->count[aux_local_numbering_1[i]]; 1688 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1689 j = j - k; 1690 aux_sums[0]=0; 1691 for (s=1;s<j;s++) { 1692 aux_sums[s]=aux_sums[s-1]+j-s+1; 1693 } 1694 array = all_factors[aux_local_numbering_1[i]]; 1695 n_neg_values = 0; 1696 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1697 n_pos_values = j - n_neg_values; 1698 if (fully_redundant) { 1699 for (s=0;s<n_neg_values;s++) { 1700 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1701 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1702 vals_B_delta [partial_sum+s]=-1.0; 1703 scaling_factors[partial_sum+s]=array[s]; 1704 } 1705 for (s=0;s<n_pos_values;s++) { 1706 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1707 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1708 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1709 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1710 } 1711 partial_sum += j; 1712 } else { 1713 /* l2g_indices and default cols and vals of B_delta */ 1714 for (s=0;s<j;s++) { 1715 l2g_indices [partial_sum+s]=n_global_lambda+s; 1716 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1717 vals_B_delta [partial_sum+s]=0.0; 1718 } 1719 /* B_delta */ 1720 if ( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1721 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1722 } 1723 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1724 vals_B_delta [partial_sum+n_neg_values]=1.0; 1725 } 1726 /* scaling as in Klawonn-Widlund 1999*/ 1727 for (s=0;s<n_neg_values;s++) { 1728 scalar_value = 0.0; 1729 for (k=0;k<s+1;k++) { 1730 scalar_value += array[k]; 1731 } 1732 scaling_factors[partial_sum+s] = -scalar_value; 1733 } 1734 for (s=0;s<n_pos_values;s++) { 1735 scalar_value = 0.0; 1736 for (k=s+n_neg_values;k<j;k++) { 1737 scalar_value += array[k]; 1738 } 1739 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1740 } 1741 partial_sum += j; 1742 } 1743 } 1744 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1745 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1746 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1747 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1748 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1749 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1750 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1751 1752 /* Local to global mapping of fetidpmat */ 1753 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1754 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1755 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1756 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1757 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1758 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1759 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1760 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1761 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1762 1763 /* Create local part of B_delta */ 1764 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1765 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1766 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1767 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1768 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1769 for (i=0;i<n_local_lambda;i++) { 1770 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1771 } 1772 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1773 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1774 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1775 1776 if (fully_redundant) { 1777 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1778 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1779 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1780 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1781 for (i=0;i<n_local_lambda;i++) { 1782 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1783 } 1784 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1785 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1786 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1787 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1788 } else { 1789 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1790 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1791 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1792 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1793 for (i=0;i<n_local_lambda;i++) { 1794 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1795 } 1796 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1798 } 1799 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1800 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1801 1802 /* Create some vectors needed by fetidp */ 1803 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1804 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1805 1806 test_fetidp = PETSC_FALSE; 1807 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1808 1809 if (test_fetidp) { 1810 1811 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1812 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1813 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1816 if (fully_redundant) { 1817 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1818 } else { 1819 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1820 } 1821 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1822 1823 /******************************************************************/ 1824 /* TEST A/B: Test numbering of global lambda dofs */ 1825 /******************************************************************/ 1826 1827 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1828 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1829 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1830 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1831 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1832 scalar_value = -1.0; 1833 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1834 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1835 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1836 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1837 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1838 if (fully_redundant) { 1839 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1840 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1841 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1842 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1843 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1844 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1845 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1846 } 1847 1848 /******************************************************************/ 1849 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1850 /* This is the meaning of the B matrix */ 1851 /******************************************************************/ 1852 1853 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1854 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1855 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1856 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1857 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1858 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1859 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1860 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1861 /* Action of B_delta */ 1862 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1863 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1864 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1866 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1867 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1868 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1869 1870 /******************************************************************/ 1871 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1872 /* E_D = R_D^TR */ 1873 /* P_D = B_{D,delta}^T B_{delta} */ 1874 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1875 /******************************************************************/ 1876 1877 /* compute a random vector in \widetilde{W} */ 1878 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1879 scalar_value = 0.0; /* set zero at vertices */ 1880 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1881 for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1882 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1883 /* store w for final comparison */ 1884 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1885 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1886 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1887 1888 /* Jump operator P_D : results stored in pcis->vec1_B */ 1889 1890 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1891 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1892 /* Action of B_delta */ 1893 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1894 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1895 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1896 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1897 /* Action of B_Ddelta^T */ 1898 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1899 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1900 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1901 1902 /* Average operator E_D : results stored in pcis->vec2_B */ 1903 1904 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1905 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1906 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1907 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1908 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1909 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1910 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1911 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1912 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1913 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1914 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1915 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1916 1917 /* test E_D=I-P_D */ 1918 scalar_value = 1.0; 1919 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1920 scalar_value = -1.0; 1921 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1922 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1923 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1925 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1926 1927 /******************************************************************/ 1928 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1929 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1930 /******************************************************************/ 1931 1932 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1933 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1934 scalar_value = 0.0; /* set zero at vertices */ 1935 for (i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1936 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1937 1938 /* Jump operator P_D : results stored in pcis->vec1_B */ 1939 1940 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1941 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1942 /* Action of B_delta */ 1943 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1944 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1945 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1946 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1947 /* Action of B_Ddelta^T */ 1948 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1949 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1950 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1951 /* diagonal scaling */ 1952 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1953 /* sum on the interface */ 1954 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1955 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1956 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1957 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1958 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1959 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1960 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1961 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1962 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1963 1964 if (!fully_redundant) { 1965 /******************************************************************/ 1966 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1967 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1968 /******************************************************************/ 1969 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1970 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1971 /* Action of B_Ddelta^T */ 1972 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1973 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1974 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1975 /* Action of B_delta */ 1976 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1977 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1978 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1979 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1980 scalar_value = -1.0; 1981 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1982 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1983 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1984 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1985 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1986 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1987 } 1988 } 1989 /* final cleanup */ 1990 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1991 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1992 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1998 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1999 { 2000 FETIDPMat_ctx *mat_ctx; 2001 PetscErrorCode ierr; 2002 2003 PetscFunctionBegin; 2004 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 2005 /* get references from objects created when setting up feti mat context */ 2006 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 2007 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 2008 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 2009 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 2010 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 2011 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 #undef __FUNCT__ 2016 #define __FUNCT__ "FETIDPMatMult" 2017 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 2018 { 2019 FETIDPMat_ctx *mat_ctx; 2020 PC_IS *pcis; 2021 PetscErrorCode ierr; 2022 2023 PetscFunctionBegin; 2024 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 2025 pcis = (PC_IS*)mat_ctx->pc->data; 2026 /* Application of B_delta^T */ 2027 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2028 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2029 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 2030 /* Application of \widetilde{S}^-1 */ 2031 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 2032 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 2033 /* Application of B_delta */ 2034 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 2035 ierr = VecSet(y,0.0);CHKERRQ(ierr); 2036 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2037 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2038 PetscFunctionReturn(0); 2039 } 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "FETIDPPCApply" 2043 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 2044 { 2045 FETIDPPC_ctx *pc_ctx; 2046 PC_IS *pcis; 2047 PetscErrorCode ierr; 2048 2049 PetscFunctionBegin; 2050 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 2051 pcis = (PC_IS*)pc_ctx->pc->data; 2052 /* Application of B_Ddelta^T */ 2053 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2054 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2055 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 2056 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 2057 /* Application of S */ 2058 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2059 /* Application of B_Ddelta */ 2060 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 2061 ierr = VecSet(y,0.0);CHKERRQ(ierr); 2062 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2063 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 2069 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 2070 { 2071 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2072 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2073 PetscInt nvtxs; 2074 const PetscInt *xadj,*adjncy; 2075 Mat mat_adj; 2076 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 2077 PCBDDCGraph mat_graph=pcbddc->mat_graph; 2078 PetscErrorCode ierr; 2079 2080 PetscFunctionBegin; 2081 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 2082 if (!mat_graph->xadj) { 2083 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2084 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2085 if (!flg_row) { 2086 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2087 } 2088 /* Get adjacency into BDDC workspace */ 2089 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2090 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2091 if (!flg_row) { 2092 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2093 } 2094 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2095 } 2096 PetscFunctionReturn(0); 2097 } 2098 /* -------------------------------------------------------------------------- */ 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2101 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 2102 { 2103 PetscErrorCode ierr; 2104 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2105 PC_IS* pcis = (PC_IS*) (pc->data); 2106 const PetscScalar zero = 0.0; 2107 2108 PetscFunctionBegin; 2109 /* Application of PHI^T */ 2110 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2111 if (pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2112 2113 /* Scatter data of coarse_rhs */ 2114 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 2115 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 2117 /* Local solution on R nodes */ 2118 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2119 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2120 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2121 if (pcbddc->prec_type) { 2122 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2123 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2124 } 2125 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 2126 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2127 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2128 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2129 if (pcbddc->prec_type) { 2130 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2131 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2132 } 2133 2134 /* Coarse solution */ 2135 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2136 if (pcbddc->coarse_rhs) { 2137 if (pcbddc->CoarseNullSpace) { 2138 ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2139 } 2140 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2141 if (pcbddc->CoarseNullSpace) { 2142 ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,PETSC_NULL);CHKERRQ(ierr); 2143 } 2144 } 2145 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2146 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2147 2148 /* Sum contributions from two levels */ 2149 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2150 if (pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2151 PetscFunctionReturn(0); 2152 } 2153 /* -------------------------------------------------------------------------- */ 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 2156 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 2157 { 2158 PetscErrorCode ierr; 2159 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2160 2161 PetscFunctionBegin; 2162 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2163 if (pcbddc->local_auxmat1) { 2164 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 2165 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2166 } 2167 PetscFunctionReturn(0); 2168 } 2169 /* -------------------------------------------------------------------------- */ 2170 #undef __FUNCT__ 2171 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2172 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 2173 { 2174 PetscErrorCode ierr; 2175 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2176 2177 PetscFunctionBegin; 2178 switch(pcbddc->coarse_communications_type){ 2179 case SCATTERS_BDDC: 2180 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 2181 break; 2182 case GATHERS_BDDC: 2183 break; 2184 } 2185 PetscFunctionReturn(0); 2186 } 2187 /* -------------------------------------------------------------------------- */ 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2190 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 2191 { 2192 PetscErrorCode ierr; 2193 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2194 PetscScalar* array_to; 2195 PetscScalar* array_from; 2196 MPI_Comm comm=((PetscObject)pc)->comm; 2197 PetscInt i; 2198 2199 PetscFunctionBegin; 2200 2201 switch(pcbddc->coarse_communications_type){ 2202 case SCATTERS_BDDC: 2203 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 2204 break; 2205 case GATHERS_BDDC: 2206 if (vec_from) VecGetArray(vec_from,&array_from); 2207 if (vec_to) VecGetArray(vec_to,&array_to); 2208 switch(pcbddc->coarse_problem_type){ 2209 case SEQUENTIAL_BDDC: 2210 if (smode == SCATTER_FORWARD) { 2211 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 2212 if (vec_to) { 2213 if (imode == ADD_VALUES) { 2214 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2215 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 2216 } 2217 } else { 2218 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2219 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 2220 } 2221 } 2222 } 2223 } else { 2224 if (vec_from) { 2225 if (imode == ADD_VALUES) { 2226 printf("Scatter mode %d, insert mode %d for case %d not implemented!\n",smode,imode,pcbddc->coarse_problem_type); 2227 } 2228 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2229 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 2230 } 2231 } 2232 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 2233 } 2234 break; 2235 case REPLICATED_BDDC: 2236 if (smode == SCATTER_FORWARD) { 2237 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 2238 if (imode == ADD_VALUES) { 2239 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2240 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 2241 } 2242 } else { 2243 for (i=0;i<pcbddc->replicated_primal_size;i++) { 2244 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i]; 2245 } 2246 } 2247 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 2248 if (imode == ADD_VALUES) { 2249 for (i=0;i<pcbddc->local_primal_size;i++) { 2250 array_to[i]+=array_from[pcbddc->local_primal_indices[i]]; 2251 } 2252 } else { 2253 for (i=0;i<pcbddc->local_primal_size;i++) { 2254 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 2255 } 2256 } 2257 } 2258 break; 2259 case MULTILEVEL_BDDC: 2260 break; 2261 case PARALLEL_BDDC: 2262 break; 2263 } 2264 if (vec_from) VecRestoreArray(vec_from,&array_from); 2265 if (vec_to) VecRestoreArray(vec_to,&array_to); 2266 break; 2267 } 2268 PetscFunctionReturn(0); 2269 } 2270 /* -------------------------------------------------------------------------- */ 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 2273 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 2274 { 2275 PetscErrorCode ierr; 2276 PC_IS* pcis = (PC_IS*)(pc->data); 2277 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2278 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2279 PetscInt *nnz,*is_indices; 2280 PetscScalar *temp_quadrature_constraint; 2281 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 2282 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 2283 PetscInt n_constraints,n_vertices,size_of_constraint; 2284 PetscScalar quad_value; 2285 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 2286 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 2287 IS *used_IS; 2288 MatType impMatType=MATSEQAIJ; 2289 PetscBLASInt Bs,Bt,lwork,lierr; 2290 PetscReal tol=1.0e-8; 2291 MatNullSpace nearnullsp; 2292 const Vec *nearnullvecs; 2293 Vec *localnearnullsp; 2294 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 2295 PetscReal *rwork,*singular_vals; 2296 PetscBLASInt Bone=1,*ipiv; 2297 Vec temp_vec; 2298 Mat temp_mat; 2299 KSP temp_ksp; 2300 PC temp_pc; 2301 PetscInt s,start_constraint,dual_dofs; 2302 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2303 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2304 PetscBool boolforface,*change_basis; 2305 /* some ugly conditional declarations */ 2306 #if defined(PETSC_MISSING_LAPACK_GESVD) 2307 PetscScalar dot_result; 2308 PetscScalar one=1.0,zero=0.0; 2309 PetscInt ii; 2310 PetscScalar *singular_vectors; 2311 PetscBLASInt *iwork,*ifail; 2312 PetscReal dummy_real,abs_tol; 2313 PetscBLASInt eigs_found; 2314 #if defined(PETSC_USE_COMPLEX) 2315 PetscScalar val1,val2; 2316 #endif 2317 #endif 2318 PetscBLASInt dummy_int; 2319 PetscScalar dummy_scalar; 2320 2321 PetscFunctionBegin; 2322 /* check if near null space is attached to global mat */ 2323 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2324 if (nearnullsp) { 2325 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2326 } else { /* if near null space is not provided it uses constants */ 2327 nnsp_has_cnst = PETSC_TRUE; 2328 use_nnsp_true = PETSC_TRUE; 2329 } 2330 if (nnsp_has_cnst) { 2331 nnsp_addone = 1; 2332 } 2333 /* 2334 Evaluate maximum storage size needed by the procedure 2335 - temp_indices will contain start index of each constraint stored as follows 2336 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2337 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2338 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2339 */ 2340 2341 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2342 total_counts *= (nnsp_addone+nnsp_size); 2343 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2344 total_counts += n_vertices; 2345 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2346 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2347 total_counts = 0; 2348 max_size_of_constraint = 0; 2349 for (i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2350 if (i<pcbddc->n_ISForEdges){ 2351 used_IS = &pcbddc->ISForEdges[i]; 2352 } else { 2353 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2354 } 2355 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2356 total_counts += j; 2357 if (j>max_size_of_constraint) max_size_of_constraint=j; 2358 } 2359 total_counts *= (nnsp_addone+nnsp_size); 2360 total_counts += n_vertices; 2361 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2362 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2363 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2364 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2365 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2366 for (i=0;i<pcis->n;i++) { 2367 local_to_B[i]=-1; 2368 } 2369 for (i=0;i<pcis->n_B;i++) { 2370 local_to_B[is_indices[i]]=i; 2371 } 2372 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2373 2374 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2375 rwork = 0; 2376 work = 0; 2377 singular_vals = 0; 2378 temp_basis = 0; 2379 correlation_mat = 0; 2380 if (!pcbddc->use_nnsp_true) { 2381 PetscScalar temp_work; 2382 #if defined(PETSC_MISSING_LAPACK_GESVD) 2383 /* POD */ 2384 PetscInt max_n; 2385 max_n = nnsp_addone+nnsp_size; 2386 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2387 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2388 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2389 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2390 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2391 #if defined(PETSC_USE_COMPLEX) 2392 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2393 #endif 2394 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2395 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2396 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2397 Bt = PetscBLASIntCast(max_n); 2398 lwork=-1; 2399 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2400 #if !defined(PETSC_USE_COMPLEX) 2401 abs_tol=1.e-8; 2402 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2403 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2404 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2405 #else 2406 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2407 /* LAPACK call is missing here! TODO */ 2408 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2409 #endif 2410 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2411 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2412 #else /* on missing GESVD */ 2413 /* SVD */ 2414 PetscInt max_n,min_n; 2415 max_n = max_size_of_constraint; 2416 min_n = nnsp_addone+nnsp_size; 2417 if (max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2418 min_n = max_size_of_constraint; 2419 max_n = nnsp_addone+nnsp_size; 2420 } 2421 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2422 #if defined(PETSC_USE_COMPLEX) 2423 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2424 #endif 2425 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2426 lwork=-1; 2427 Bs = PetscBLASIntCast(max_n); 2428 Bt = PetscBLASIntCast(min_n); 2429 dummy_int = Bs; 2430 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2431 #if !defined(PETSC_USE_COMPLEX) 2432 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2433 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2434 #else 2435 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2436 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2437 #endif 2438 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2439 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2440 #endif 2441 /* Allocate optimal workspace */ 2442 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2443 total_counts = (PetscInt)lwork; 2444 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2445 } 2446 /* get local part of global near null space vectors */ 2447 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2448 for (k=0;k<nnsp_size;k++) { 2449 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2450 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2451 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2452 } 2453 /* Now we can loop on constraining sets */ 2454 total_counts=0; 2455 temp_indices[0]=0; 2456 /* vertices */ 2457 PetscBool used_vertex; 2458 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2459 if (nnsp_has_cnst) { /* consider all vertices */ 2460 for (i=0;i<n_vertices;i++) { 2461 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2462 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2463 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2464 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2465 change_basis[total_counts]=PETSC_FALSE; 2466 total_counts++; 2467 } 2468 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2469 for (i=0;i<n_vertices;i++) { 2470 used_vertex=PETSC_FALSE; 2471 k=0; 2472 while(!used_vertex && k<nnsp_size) { 2473 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2474 if (PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2475 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2476 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2477 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2478 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2479 change_basis[total_counts]=PETSC_FALSE; 2480 total_counts++; 2481 used_vertex=PETSC_TRUE; 2482 } 2483 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2484 k++; 2485 } 2486 } 2487 } 2488 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2489 n_vertices=total_counts; 2490 /* edges and faces */ 2491 for (i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2492 if (i<pcbddc->n_ISForEdges){ 2493 used_IS = &pcbddc->ISForEdges[i]; 2494 boolforface = pcbddc->usechangeofbasis; 2495 } else { 2496 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2497 boolforface = pcbddc->usechangeonfaces; 2498 } 2499 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2500 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2501 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2502 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2503 if (nnsp_has_cnst) { 2504 temp_constraints++; 2505 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2506 for (j=0;j<size_of_constraint;j++) { 2507 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2508 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2509 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2510 } 2511 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2512 change_basis[total_counts]=boolforface; 2513 total_counts++; 2514 } 2515 for (k=0;k<nnsp_size;k++) { 2516 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2517 for (j=0;j<size_of_constraint;j++) { 2518 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2519 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2520 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2521 } 2522 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2523 quad_value = 1.0; 2524 if ( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2525 Bs = PetscBLASIntCast(size_of_constraint); 2526 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2527 } 2528 if ( quad_value > 0.0 ) { /* keep indices and values */ 2529 temp_constraints++; 2530 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2531 change_basis[total_counts]=boolforface; 2532 total_counts++; 2533 } 2534 } 2535 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2536 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2537 if (!use_nnsp_true) { 2538 2539 Bs = PetscBLASIntCast(size_of_constraint); 2540 Bt = PetscBLASIntCast(temp_constraints); 2541 2542 #if defined(PETSC_MISSING_LAPACK_GESVD) 2543 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2544 /* Store upper triangular part of correlation matrix */ 2545 for (j=0;j<temp_constraints;j++) { 2546 for (k=0;k<j+1;k++) { 2547 #if defined(PETSC_USE_COMPLEX) 2548 /* hand made complex dot product -> replace */ 2549 dot_result = 0.0; 2550 for (ii=0; ii<size_of_constraint; ii++) { 2551 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2552 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2553 dot_result += val1*PetscConj(val2); 2554 } 2555 #else 2556 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2557 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2558 #endif 2559 correlation_mat[j*temp_constraints+k]=dot_result; 2560 } 2561 } 2562 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2563 #if !defined(PETSC_USE_COMPLEX) 2564 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2565 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2566 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2567 #else 2568 /* LAPACK call is missing here! TODO */ 2569 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2570 #endif 2571 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2572 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2573 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2574 j=0; 2575 while( j < Bt && singular_vals[j] < tol) j++; 2576 total_counts=total_counts-j; 2577 if (j<temp_constraints) { 2578 for (k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2579 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2580 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2581 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2582 /* copy POD basis into used quadrature memory */ 2583 for (k=0;k<Bt-j;k++) { 2584 for (ii=0;ii<size_of_constraint;ii++) { 2585 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2586 } 2587 } 2588 } 2589 2590 #else /* on missing GESVD */ 2591 PetscInt min_n = temp_constraints; 2592 if (min_n > size_of_constraint) min_n = size_of_constraint; 2593 dummy_int = Bs; 2594 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2595 #if !defined(PETSC_USE_COMPLEX) 2596 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2597 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2598 #else 2599 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2600 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2601 #endif 2602 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2603 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2604 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2605 j=0; 2606 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2607 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2608 #endif 2609 } 2610 } 2611 2612 n_constraints=total_counts-n_vertices; 2613 local_primal_size = total_counts; 2614 /* set quantities in pcbddc data structure */ 2615 pcbddc->n_vertices = n_vertices; 2616 pcbddc->n_constraints = n_constraints; 2617 pcbddc->local_primal_size = local_primal_size; 2618 2619 /* Create constraint matrix */ 2620 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2621 /* so we need to set it up properly either with or without change of basis */ 2622 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2623 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2624 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2625 /* compute a local numbering of constraints : vertices first then constraints */ 2626 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2627 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2628 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2629 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2630 total_counts=0; 2631 /* find vertices: subdomain corners plus dofs with basis changed */ 2632 for (i=0;i<local_primal_size;i++) { 2633 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2634 if (change_basis[i] || size_of_constraint == 1) { 2635 k=0; 2636 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2637 k=k+1; 2638 } 2639 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2640 array_vector[j] = 1.0; 2641 aux_primal_numbering[total_counts]=j; 2642 aux_primal_permutation[total_counts]=total_counts; 2643 total_counts++; 2644 } 2645 } 2646 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2647 /* permute indices in order to have a sorted set of vertices */ 2648 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2649 /* nonzero structure */ 2650 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2651 for (i=0;i<total_counts;i++) { 2652 nnz[i]=1; 2653 } 2654 j=total_counts; 2655 for (i=n_vertices;i<local_primal_size;i++) { 2656 if (!change_basis[i]) { 2657 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2658 j++; 2659 } 2660 } 2661 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2662 ierr = PetscFree(nnz);CHKERRQ(ierr); 2663 /* set values in constraint matrix */ 2664 for (i=0;i<total_counts;i++) { 2665 j = aux_primal_permutation[i]; 2666 k = aux_primal_numbering[j]; 2667 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2668 } 2669 for (i=n_vertices;i<local_primal_size;i++) { 2670 if (!change_basis[i]) { 2671 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2672 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2673 total_counts++; 2674 } 2675 } 2676 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2677 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2678 /* assembling */ 2679 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2680 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2681 2682 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2683 if (pcbddc->usechangeofbasis) { 2684 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2685 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2686 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2687 /* work arrays */ 2688 /* we need to reuse these arrays, so we free them */ 2689 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2690 ierr = PetscFree(work);CHKERRQ(ierr); 2691 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2692 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2693 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2694 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2695 for (i=0;i<pcis->n_B;i++) { 2696 nnz[i]=1; 2697 } 2698 /* Overestimated nonzeros per row */ 2699 k=1; 2700 for (i=pcbddc->n_vertices;i<local_primal_size;i++) { 2701 if (change_basis[i]) { 2702 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2703 if (k < size_of_constraint) { 2704 k = size_of_constraint; 2705 } 2706 for (j=0;j<size_of_constraint;j++) { 2707 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2708 } 2709 } 2710 } 2711 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2712 ierr = PetscFree(nnz);CHKERRQ(ierr); 2713 /* Temporary array to store indices */ 2714 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2715 /* Set initial identity in the matrix */ 2716 for (i=0;i<pcis->n_B;i++) { 2717 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2718 } 2719 /* Now we loop on the constraints which need a change of basis */ 2720 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2721 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2722 temp_constraints = 0; 2723 if (pcbddc->n_vertices < local_primal_size) { 2724 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2725 } 2726 for (i=pcbddc->n_vertices;i<local_primal_size;i++) { 2727 if (change_basis[i]) { 2728 compute_submatrix = PETSC_FALSE; 2729 useksp = PETSC_FALSE; 2730 if (temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2731 temp_constraints++; 2732 if (i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2733 compute_submatrix = PETSC_TRUE; 2734 } 2735 } 2736 if (compute_submatrix) { 2737 if (temp_constraints > 1 || pcbddc->use_nnsp_true) { 2738 useksp = PETSC_TRUE; 2739 } 2740 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2741 if (useksp) { /* experimental */ 2742 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2743 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2744 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2745 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2746 } 2747 /* First _size_of_constraint-temp_constraints_ columns */ 2748 dual_dofs = size_of_constraint-temp_constraints; 2749 start_constraint = i+1-temp_constraints; 2750 for (s=0;s<dual_dofs;s++) { 2751 is_indices[0] = s; 2752 for (j=0;j<temp_constraints;j++) { 2753 for (k=0;k<temp_constraints;k++) { 2754 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2755 } 2756 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2757 is_indices[j+1]=s+j+1; 2758 } 2759 Bt = temp_constraints; 2760 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2761 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2762 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2763 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2764 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2765 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2766 if (useksp) { 2767 /* temp mat with transposed rows and columns */ 2768 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2769 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2770 } 2771 } 2772 if (useksp) { 2773 /* last rows of temp_mat */ 2774 for (j=0;j<size_of_constraint;j++) { 2775 is_indices[j] = j; 2776 } 2777 for (s=0;s<temp_constraints;s++) { 2778 k = s + dual_dofs; 2779 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2780 } 2781 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2784 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2785 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2786 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2787 ierr = KSPGetPC(temp_ksp,&temp_pc);CHKERRQ(ierr); 2788 ierr = PCSetType(temp_pc,PCLU);CHKERRQ(ierr); 2789 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2790 for (s=0;s<temp_constraints;s++) { 2791 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2792 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2793 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2794 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2795 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2796 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2797 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2798 /* last columns of change of basis matrix associated to new primal dofs */ 2799 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2800 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2801 } 2802 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2803 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2804 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2805 } else { 2806 /* last columns of change of basis matrix associated to new primal dofs */ 2807 for (s=0;s<temp_constraints;s++) { 2808 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2809 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2810 } 2811 } 2812 /* prepare for the next cycle */ 2813 temp_constraints = 0; 2814 if (i != local_primal_size -1 ) { 2815 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2816 } 2817 } 2818 } 2819 } 2820 /* assembling */ 2821 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2822 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2823 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2824 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2825 } 2826 /* free workspace no longer needed */ 2827 ierr = PetscFree(rwork);CHKERRQ(ierr); 2828 ierr = PetscFree(work);CHKERRQ(ierr); 2829 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2830 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2831 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2832 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2833 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2834 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2835 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2836 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2837 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2838 #if defined(PETSC_MISSING_LAPACK_GESVD) 2839 ierr = PetscFree(iwork);CHKERRQ(ierr); 2840 ierr = PetscFree(ifail);CHKERRQ(ierr); 2841 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2842 #endif 2843 for (k=0;k<nnsp_size;k++) { 2844 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2845 } 2846 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2847 PetscFunctionReturn(0); 2848 } 2849 /* -------------------------------------------------------------------------- */ 2850 #undef __FUNCT__ 2851 #define __FUNCT__ "PCBDDCCoarseSetUp" 2852 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2853 { 2854 PetscErrorCode ierr; 2855 2856 PC_IS* pcis = (PC_IS*)(pc->data); 2857 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2858 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2859 Mat change_mat_all; 2860 IS is_R_local; 2861 IS is_V_local; 2862 IS is_C_local; 2863 IS is_aux1; 2864 IS is_aux2; 2865 VecType impVecType; 2866 MatType impMatType; 2867 PetscInt n_R=0; 2868 PetscInt n_D=0; 2869 PetscInt n_B=0; 2870 PetscScalar zero=0.0; 2871 PetscScalar one=1.0; 2872 PetscScalar m_one=-1.0; 2873 PetscScalar* array; 2874 PetscScalar *coarse_submat_vals; 2875 PetscInt *idx_R_local; 2876 PetscInt *idx_V_B; 2877 PetscScalar *coarsefunctions_errors; 2878 PetscScalar *constraints_errors; 2879 /* auxiliary indices */ 2880 PetscInt i,j,k; 2881 /* for verbose output of bddc */ 2882 PetscViewer viewer=pcbddc->dbg_viewer; 2883 PetscBool dbg_flag=pcbddc->dbg_flag; 2884 /* for counting coarse dofs */ 2885 PetscInt n_vertices,n_constraints; 2886 PetscInt size_of_constraint; 2887 PetscInt *row_cmat_indices; 2888 PetscScalar *row_cmat_values; 2889 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2890 2891 PetscFunctionBegin; 2892 /* Set Non-overlapping dimensions */ 2893 n_B = pcis->n_B; n_D = pcis->n - n_B; 2894 /* Set types for local objects needed by BDDC precondtioner */ 2895 impMatType = MATSEQDENSE; 2896 impVecType = VECSEQ; 2897 /* get vertex indices from constraint matrix */ 2898 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2899 n_vertices=0; 2900 for (i=0;i<pcbddc->local_primal_size;i++) { 2901 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2902 if (size_of_constraint == 1) { 2903 vertices[n_vertices]=row_cmat_indices[0]; 2904 n_vertices++; 2905 } 2906 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2907 } 2908 /* Set number of constraints */ 2909 n_constraints = pcbddc->local_primal_size-n_vertices; 2910 2911 /* vertices in boundary numbering */ 2912 if (n_vertices) { 2913 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2914 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2915 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2916 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2917 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2918 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2919 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2920 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2921 for (i=0; i<n_vertices; i++) { 2922 j=0; 2923 while (array[j] != i ) {j++;} 2924 idx_V_B[i]=j; 2925 } 2926 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2927 } 2928 2929 /* transform local matrices if needed */ 2930 if (pcbddc->usechangeofbasis) { 2931 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2932 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2933 for (i=0;i<n_D;i++) { 2934 nnz[is_indices[i]]=1; 2935 } 2936 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2937 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2938 k=1; 2939 for (i=0;i<n_B;i++) { 2940 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2941 nnz[is_indices[i]]=j; 2942 if ( k < j) { 2943 k = j; 2944 } 2945 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2946 } 2947 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2948 /* assemble change of basis matrix on the whole set of local dofs */ 2949 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2950 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2951 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2952 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2953 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2954 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2955 for (i=0;i<n_D;i++) { 2956 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2957 } 2958 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2959 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2960 for (i=0;i<n_B;i++) { 2961 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2962 for (k=0;k<j;k++) { 2963 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2964 } 2965 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2966 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2967 } 2968 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2969 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2970 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2971 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2972 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2973 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2974 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2975 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2976 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2977 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2978 ierr = PetscFree(nnz);CHKERRQ(ierr); 2979 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2980 } else { 2981 /* without change of basis, the local matrix is unchanged */ 2982 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2983 pcbddc->local_mat = matis->A; 2984 } 2985 /* Change global null space passed in by the user if change of basis has been performed */ 2986 if (pcbddc->NullSpace && pcbddc->usechangeofbasis) { 2987 ierr = PCBDDCAdaptNullSpace(pc);CHKERRQ(ierr); 2988 } 2989 2990 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2991 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2992 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2993 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2994 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2995 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2996 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2997 if (dbg_flag) { 2998 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2999 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3000 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3001 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3002 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 3003 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 3004 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3005 } 3006 3007 /* Allocate needed vectors */ 3008 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 3009 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 3010 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 3011 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 3012 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 3013 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3014 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3015 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 3016 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 3017 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3018 3019 /* Creating some index sets needed */ 3020 /* For submatrices */ 3021 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 3022 if (n_vertices) { 3023 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 3024 } 3025 if (n_constraints) { 3026 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 3027 } 3028 3029 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3030 { 3031 PetscInt *aux_array1; 3032 PetscInt *aux_array2; 3033 PetscInt *idx_I_local; 3034 3035 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 3036 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 3037 3038 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);CHKERRQ(ierr); 3039 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3040 for (i=0; i<n_D; i++) { array[idx_I_local[i]] = 0; } 3041 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idx_I_local);CHKERRQ(ierr); 3042 for (i=0, j=0; i<n_R; i++) { if ( array[idx_R_local[i]] == one ) { aux_array1[j] = i; j++; } } 3043 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3044 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 3045 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3046 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3047 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3048 for (i=0, j=0; i<n_B; i++) { if ( array[i] == one ) { aux_array2[j] = i; j++; } } 3049 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3050 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 3051 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3052 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 3053 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 3054 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3055 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3056 3057 if (pcbddc->prec_type || dbg_flag ) { 3058 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 3059 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3060 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == zero) { aux_array1[j] = i; j++; } } 3061 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3062 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 3063 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3064 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 3065 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3066 } 3067 } 3068 3069 /* Creating PC contexts for local Dirichlet and Neumann problems */ 3070 { 3071 Mat A_RR; 3072 PC pc_temp; 3073 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 3074 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3075 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3076 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 3077 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3078 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 3079 /* default */ 3080 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3081 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3082 /* Allow user's customization */ 3083 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3084 /* umfpack interface has a bug when matrix dimension is zero */ 3085 if (!n_D) { 3086 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3087 } 3088 /* Set Up KSP for Dirichlet problem of BDDC */ 3089 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 3090 /* set ksp_D into pcis data */ 3091 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 3092 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 3093 pcis->ksp_D = pcbddc->ksp_D; 3094 /* Matrix for Neumann problem is A_RR -> we need to create it */ 3095 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 3096 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 3097 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 3098 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 3099 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 3100 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 3101 /* default */ 3102 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 3103 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3104 /* Allow user's customization */ 3105 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 3106 /* umfpack interface has a bug when matrix dimension is zero */ 3107 if (!pcis->n) { 3108 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3109 } 3110 /* Set Up KSP for Neumann problem of BDDC */ 3111 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 3112 /* check Dirichlet and Neumann solvers */ 3113 { 3114 Vec temp_vec; 3115 PetscReal value; 3116 PetscMPIInt use_exact,use_exact_reduced; 3117 3118 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 3119 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 3120 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 3121 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 3122 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 3123 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 3124 use_exact = 1; 3125 if (PetscAbsReal(value) > 1.e-4) { 3126 use_exact = 0; 3127 } 3128 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,((PetscObject)pc)->comm);CHKERRQ(ierr); 3129 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 3130 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 3131 if (dbg_flag) { 3132 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3133 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3134 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 3135 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 3136 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 3137 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 3138 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3139 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 3140 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 3141 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 3142 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 3143 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 3144 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3145 } 3146 } 3147 /* free Neumann problem's matrix */ 3148 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 3149 } 3150 3151 /* Assemble all remaining stuff needed to apply BDDC */ 3152 { 3153 Mat A_RV,A_VR,A_VV; 3154 Mat M1; 3155 Mat C_CR; 3156 Mat AUXMAT; 3157 Vec vec1_C; 3158 Vec vec2_C; 3159 Vec vec1_V; 3160 Vec vec2_V; 3161 PetscInt *nnz; 3162 PetscInt *auxindices; 3163 PetscInt index; 3164 PetscScalar* array2; 3165 MatFactorInfo matinfo; 3166 3167 /* Allocating some extra storage just to be safe */ 3168 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 3169 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 3170 for (i=0;i<pcis->n;i++) {auxindices[i]=i;} 3171 3172 /* some work vectors on vertices and/or constraints */ 3173 if (n_vertices) { 3174 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 3175 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 3176 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 3177 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 3178 } 3179 if (n_constraints) { 3180 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 3181 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 3182 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 3183 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 3184 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 3185 } 3186 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3187 if (n_constraints) { 3188 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3189 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 3190 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 3191 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 3192 3193 /* Create Constraint matrix on R nodes: C_{CR} */ 3194 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3195 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 3196 3197 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 3198 for (i=0;i<n_constraints;i++) { 3199 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 3200 /* Get row of constraint matrix in R numbering */ 3201 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 3202 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3203 for (j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 3204 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 3205 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 3206 /* Solve for row of constraint matrix in R numbering */ 3207 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3208 /* Set values */ 3209 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 3210 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3211 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 3212 } 3213 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3214 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3215 3216 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3217 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 3218 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 3219 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 3220 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 3221 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3222 3223 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 3224 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 3225 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 3226 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 3227 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 3228 for (i=0;i<n_constraints;i++) { 3229 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 3230 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 3231 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 3232 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 3233 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 3234 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 3235 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 3236 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3237 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 3238 } 3239 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3240 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3241 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3242 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 3243 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3244 3245 } 3246 3247 /* Get submatrices from subdomain matrix */ 3248 if (n_vertices){ 3249 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3250 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3251 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3252 } 3253 3254 /* Matrix of coarse basis functions (local) */ 3255 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3256 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 3257 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 3258 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 3259 if (pcbddc->prec_type || dbg_flag ) { 3260 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3261 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 3262 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 3263 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 3264 } 3265 3266 if (dbg_flag) { 3267 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 3268 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 3269 } 3270 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3271 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 3272 3273 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3274 for (i=0;i<n_vertices;i++){ 3275 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 3276 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 3277 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 3278 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 3279 /* solution of saddle point problem */ 3280 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 3281 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3282 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 3283 if (n_constraints) { 3284 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 3285 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 3286 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3287 } 3288 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 3289 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 3290 3291 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3292 /* coarse basis functions */ 3293 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3294 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3295 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3296 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3297 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3298 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3299 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3300 if ( pcbddc->prec_type || dbg_flag ) { 3301 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3302 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3303 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3304 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3305 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3306 } 3307 /* subdomain contribution to coarse matrix */ 3308 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3309 for (j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 3310 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3311 if (n_constraints) { 3312 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3313 for (j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 3314 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3315 } 3316 3317 if ( dbg_flag ) { 3318 /* assemble subdomain vector on nodes */ 3319 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3320 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3321 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3322 for (j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3323 array[ vertices[i] ] = one; 3324 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3325 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3326 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3327 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3328 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3329 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3330 for (j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3331 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3332 if (n_constraints) { 3333 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3334 for (j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3335 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3336 } 3337 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3338 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3339 /* check saddle point solution */ 3340 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3341 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3342 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3343 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3344 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3345 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3346 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3347 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3348 } 3349 } 3350 3351 for (i=0;i<n_constraints;i++){ 3352 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3353 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3354 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3355 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3356 /* solution of saddle point problem */ 3357 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3358 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3359 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3360 if (n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3361 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3362 /* coarse basis functions */ 3363 index=i+n_vertices; 3364 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3365 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3366 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3367 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3368 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3369 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3370 if ( pcbddc->prec_type || dbg_flag ) { 3371 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3372 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3373 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3374 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3375 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3376 } 3377 /* subdomain contribution to coarse matrix */ 3378 if (n_vertices) { 3379 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3380 for (j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3381 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3382 } 3383 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3384 for (j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3385 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3386 3387 if ( dbg_flag ) { 3388 /* assemble subdomain vector on nodes */ 3389 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3390 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3391 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3392 for (j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3393 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3394 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3395 /* assemble subdomain vector of lagrange multipliers */ 3396 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3397 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3398 if ( n_vertices) { 3399 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3400 for (j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3401 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3402 } 3403 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3404 for (j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3405 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3406 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3407 /* check saddle point solution */ 3408 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3409 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3410 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3411 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3412 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3413 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3414 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3415 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3416 } 3417 } 3418 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3419 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3420 if ( pcbddc->prec_type || dbg_flag ) { 3421 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3422 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3423 } 3424 /* Checking coarse_sub_mat and coarse basis functios */ 3425 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3426 if (dbg_flag) { 3427 Mat coarse_sub_mat; 3428 Mat TM1,TM2,TM3,TM4; 3429 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3430 MatType checkmattype=MATSEQAIJ; 3431 PetscScalar value; 3432 3433 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3434 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3435 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3436 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3437 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3438 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3439 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3440 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3441 3442 /*PetscViewer view_out; 3443 PetscMPIInt myrank; 3444 char filename[256]; 3445 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3446 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3447 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3448 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3449 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3450 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3451 3452 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3453 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3454 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3455 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3456 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3457 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3458 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3459 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3460 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3461 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3462 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3463 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3464 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3465 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3466 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3467 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3470 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3472 for (i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3473 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3474 for (i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3475 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3476 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3477 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3478 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3479 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3480 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3481 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3482 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3483 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3484 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3485 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3486 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3487 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3488 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3489 } 3490 /* free memory */ 3491 if (n_vertices) { 3492 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3493 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3494 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3495 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3496 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3497 } 3498 if (n_constraints) { 3499 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3500 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3501 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3502 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3503 } 3504 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3505 ierr = PetscFree(nnz);CHKERRQ(ierr); 3506 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3507 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3508 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3509 } 3510 /* free memory */ 3511 if (n_vertices) { 3512 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3513 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3514 } 3515 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3516 3517 PetscFunctionReturn(0); 3518 } 3519 3520 /* -------------------------------------------------------------------------- */ 3521 3522 #undef __FUNCT__ 3523 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3524 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3525 { 3526 3527 3528 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3529 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3530 PC_IS *pcis = (PC_IS*)pc->data; 3531 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3532 MPI_Comm coarse_comm; 3533 3534 /* common to all choiches */ 3535 PetscScalar *temp_coarse_mat_vals; 3536 PetscScalar *ins_coarse_mat_vals; 3537 PetscInt *ins_local_primal_indices; 3538 PetscMPIInt *localsizes2,*localdispl2; 3539 PetscMPIInt size_prec_comm; 3540 PetscMPIInt rank_prec_comm; 3541 PetscMPIInt active_rank=MPI_PROC_NULL; 3542 PetscMPIInt master_proc=0; 3543 PetscInt ins_local_primal_size; 3544 /* specific to MULTILEVEL_BDDC */ 3545 PetscMPIInt *ranks_recv; 3546 PetscMPIInt count_recv=0; 3547 PetscMPIInt rank_coarse_proc_send_to; 3548 PetscMPIInt coarse_color = MPI_UNDEFINED; 3549 ISLocalToGlobalMapping coarse_ISLG; 3550 /* some other variables */ 3551 PetscErrorCode ierr; 3552 MatType coarse_mat_type; 3553 PCType coarse_pc_type; 3554 KSPType coarse_ksp_type; 3555 PC pc_temp; 3556 PetscInt i,j,k; 3557 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3558 /* verbose output viewer */ 3559 PetscViewer viewer=pcbddc->dbg_viewer; 3560 PetscBool dbg_flag=pcbddc->dbg_flag; 3561 3562 PetscInt offset,offset2; 3563 PetscMPIInt im_active,active_procs; 3564 PetscInt *dnz,*onz; 3565 3566 PetscBool setsym,issym=PETSC_FALSE; 3567 3568 PetscFunctionBegin; 3569 ins_local_primal_indices = 0; 3570 ins_coarse_mat_vals = 0; 3571 localsizes2 = 0; 3572 localdispl2 = 0; 3573 temp_coarse_mat_vals = 0; 3574 coarse_ISLG = 0; 3575 3576 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3577 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3578 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3579 3580 /* Assign global numbering to coarse dofs */ 3581 { 3582 PetscInt *auxlocal_primal; 3583 PetscInt *row_cmat_indices; 3584 PetscInt *aux_ordering; 3585 PetscInt *row_cmat_global_indices; 3586 PetscInt *dof_sizes,*dof_displs; 3587 PetscInt size_of_constraint; 3588 PetscBool *array_bool; 3589 PetscBool first_found; 3590 PetscInt first_index,old_index,s; 3591 PetscMPIInt mpi_local_primal_size; 3592 PetscScalar coarsesum,*array; 3593 3594 mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3595 3596 /* Construct needed data structures for message passing */ 3597 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3598 j = 0; 3599 if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3600 j = size_prec_comm; 3601 } 3602 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3603 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3604 /* Gather local_primal_size information for all processes */ 3605 if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3606 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3607 } else { 3608 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3609 } 3610 pcbddc->replicated_primal_size = 0; 3611 for (i=0; i<j; i++) { 3612 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3613 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3614 } 3615 3616 /* First let's count coarse dofs. 3617 This code fragment assumes that the number of local constraints per connected component 3618 is not greater than the number of nodes defined for the connected component 3619 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3620 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3621 j = 0; 3622 for (i=0;i<pcbddc->local_primal_size;i++) { 3623 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3624 if ( j < size_of_constraint ) { 3625 j = size_of_constraint; 3626 } 3627 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3628 } 3629 ierr = PetscMalloc(j*sizeof(PetscInt),&aux_ordering);CHKERRQ(ierr); 3630 ierr = PetscMalloc(j*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 3631 ierr = PetscMalloc(pcis->n*sizeof(PetscBool),&array_bool);CHKERRQ(ierr); 3632 for (i=0;i<pcis->n;i++) { 3633 array_bool[i] = PETSC_FALSE; 3634 } 3635 for (i=0;i<pcbddc->local_primal_size;i++) { 3636 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3637 for (j=0; j<size_of_constraint; j++) { 3638 aux_ordering[j] = j; 3639 } 3640 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3641 ierr = PetscSortIntWithPermutation(size_of_constraint,row_cmat_global_indices,aux_ordering);CHKERRQ(ierr); 3642 for (j=0; j<size_of_constraint; j++) { 3643 k = row_cmat_indices[aux_ordering[j]]; 3644 if ( !array_bool[k] ) { 3645 array_bool[k] = PETSC_TRUE; 3646 auxlocal_primal[i] = k; 3647 break; 3648 } 3649 } 3650 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3651 } 3652 ierr = PetscFree(aux_ordering);CHKERRQ(ierr); 3653 ierr = PetscFree(array_bool);CHKERRQ(ierr); 3654 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3655 3656 /* Compute number of coarse dofs */ 3657 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3658 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3659 for (i=0;i<pcbddc->local_primal_size;i++) { 3660 array[auxlocal_primal[i]]=1.0; 3661 } 3662 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3663 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3664 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3665 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3666 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3667 pcbddc->coarse_size = (PetscInt)coarsesum; 3668 3669 /* Fill pcis->vec1_global with cumulative function for global numbering */ 3670 ierr = VecGetArray(pcis->vec1_global,&array);CHKERRQ(ierr); 3671 ierr = VecGetLocalSize(pcis->vec1_global,&s);CHKERRQ(ierr); 3672 k = 0; 3673 first_index = -1; 3674 first_found = PETSC_FALSE; 3675 for (i=0;i<s;i++) { 3676 if (!first_found && array[i] > 0.0) { 3677 first_found = PETSC_TRUE; 3678 first_index = i; 3679 } 3680 k += (PetscInt)array[i]; 3681 } 3682 j = ( !rank_prec_comm ? size_prec_comm : 0); 3683 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 3684 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 3685 ierr = MPI_Gather(&k,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3686 if (!rank_prec_comm) { 3687 dof_displs[0]=0; 3688 for (i=1;i<size_prec_comm;i++) { 3689 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3690 } 3691 } 3692 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&k,1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3693 if (first_found) { 3694 array[first_index] += k; 3695 old_index = first_index; 3696 for (i=first_index+1;i<s;i++) { 3697 if (array[i] > 0.0) { 3698 array[i] += array[old_index]; 3699 old_index = i; 3700 } 3701 } 3702 } 3703 ierr = VecRestoreArray(pcis->vec1_global,&array);CHKERRQ(ierr); 3704 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3705 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3706 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3707 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3708 for (i=0;i<pcbddc->local_primal_size;i++) { 3709 pcbddc->local_primal_indices[i] = (PetscInt)array[auxlocal_primal[i]]-1; 3710 } 3711 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3712 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3713 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3714 3715 if (dbg_flag) { 3716 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3717 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3718 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr); 3719 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3720 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3721 for (i=0;i<pcbddc->local_primal_size;i++) { 3722 array[auxlocal_primal[i]]=1.0; 3723 } 3724 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3725 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3726 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3727 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3728 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3729 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3730 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3731 for (i=0;i<pcis->n;i++) { 3732 if (array[i] == 1.0) { 3733 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr); 3734 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr); 3735 } 3736 } 3737 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3738 for (i=0;i<pcis->n;i++) { 3739 if( array[i] > 0.0) { 3740 array[i] = 1.0/array[i]; 3741 } 3742 } 3743 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3744 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3745 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3746 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3747 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3748 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr); 3749 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3750 } 3751 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3752 } 3753 3754 if (dbg_flag) { 3755 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3756 /*ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3757 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3758 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3759 for (i=0;i<pcbddc->local_primal_size;i++) { 3760 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]); 3761 }*/ 3762 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3763 } 3764 3765 im_active = 0; 3766 if (pcis->n) { im_active = 1; } 3767 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr); 3768 3769 /* adapt coarse problem type */ 3770 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3771 if (pcbddc->current_level < pcbddc->max_levels) { 3772 if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) { 3773 if (dbg_flag) { 3774 ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3775 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3776 } 3777 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3778 } 3779 } else { 3780 if (dbg_flag) { 3781 ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3782 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3783 } 3784 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3785 } 3786 } 3787 3788 switch(pcbddc->coarse_problem_type){ 3789 3790 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3791 { 3792 /* we need additional variables */ 3793 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3794 MetisInt *metis_coarse_subdivision; 3795 MetisInt options[METIS_NOPTIONS]; 3796 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3797 PetscMPIInt procs_jumps_coarse_comm; 3798 PetscMPIInt *coarse_subdivision; 3799 PetscMPIInt *total_count_recv; 3800 PetscMPIInt *total_ranks_recv; 3801 PetscMPIInt *displacements_recv; 3802 PetscMPIInt *my_faces_connectivity; 3803 PetscMPIInt *petsc_faces_adjncy; 3804 MetisInt *faces_adjncy; 3805 MetisInt *faces_xadj; 3806 PetscMPIInt *number_of_faces; 3807 PetscMPIInt *faces_displacements; 3808 PetscInt *array_int; 3809 PetscMPIInt my_faces=0; 3810 PetscMPIInt total_faces=0; 3811 PetscInt ranks_stretching_ratio; 3812 3813 /* define some quantities */ 3814 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3815 coarse_mat_type = MATIS; 3816 coarse_pc_type = PCBDDC; 3817 coarse_ksp_type = KSPRICHARDSON; 3818 3819 /* details of coarse decomposition */ 3820 n_subdomains = active_procs; 3821 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3822 ranks_stretching_ratio = size_prec_comm/active_procs; 3823 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3824 3825 #if 0 3826 PetscMPIInt *old_ranks; 3827 PetscInt *new_ranks,*jj,*ii; 3828 MatPartitioning mat_part; 3829 IS coarse_new_decomposition,is_numbering; 3830 PetscViewer viewer_test; 3831 MPI_Comm test_coarse_comm; 3832 PetscMPIInt test_coarse_color; 3833 Mat mat_adj; 3834 /* Create new communicator for coarse problem splitting the old one */ 3835 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3836 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3837 test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED ); 3838 test_coarse_comm = MPI_COMM_NULL; 3839 ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr); 3840 if (im_active) { 3841 ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks); 3842 ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks); 3843 ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3844 ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr); 3845 ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr); 3846 for (i=0;i<size_prec_comm;i++) { 3847 new_ranks[i] = -1; 3848 } 3849 for (i=0;i<n_subdomains;i++) { 3850 new_ranks[old_ranks[i]] = i; 3851 } 3852 ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr); 3853 k = pcis->n_neigh-1; 3854 ierr = PetscMalloc(2*sizeof(PetscInt),&ii); 3855 ii[0]=0; 3856 ii[1]=k; 3857 ierr = PetscMalloc(k*sizeof(PetscInt),&jj); 3858 for (i=0;i<k;i++) { 3859 jj[i]=new_ranks[pcis->neigh[i+1]]; 3860 } 3861 ierr = PetscSortInt(k,jj);CHKERRQ(ierr); 3862 ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,PETSC_NULL,&mat_adj);CHKERRQ(ierr); 3863 ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr); 3864 ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr); 3865 ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr); 3866 ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr); 3867 printf("Setting Nparts %d\n",n_parts); 3868 ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr); 3869 ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr); 3870 ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr); 3871 ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr); 3872 ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr); 3873 ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr); 3874 ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr); 3875 ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr); 3876 ierr = ISDestroy(&is_numbering);CHKERRQ(ierr); 3877 ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr); 3878 ierr = PetscFree(old_ranks);CHKERRQ(ierr); 3879 ierr = PetscFree(new_ranks);CHKERRQ(ierr); 3880 ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr); 3881 } 3882 #endif 3883 3884 /* build CSR graph of subdomains' connectivity */ 3885 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3886 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3887 for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3888 for (j=0;j<pcis->n_shared[i];j++){ 3889 array_int[ pcis->shared[i][j] ]+=1; 3890 } 3891 } 3892 for (i=1;i<pcis->n_neigh;i++){ 3893 for (j=0;j<pcis->n_shared[i];j++){ 3894 if (array_int[ pcis->shared[i][j] ] > 0 ){ 3895 my_faces++; 3896 break; 3897 } 3898 } 3899 } 3900 3901 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3902 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3903 my_faces=0; 3904 for (i=1;i<pcis->n_neigh;i++){ 3905 for (j=0;j<pcis->n_shared[i];j++){ 3906 if (array_int[ pcis->shared[i][j] ] > 0 ){ 3907 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3908 my_faces++; 3909 break; 3910 } 3911 } 3912 } 3913 if (rank_prec_comm == master_proc) { 3914 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3915 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3916 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3917 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3918 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3919 } 3920 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3921 if (rank_prec_comm == master_proc) { 3922 faces_xadj[0]=0; 3923 faces_displacements[0]=0; 3924 j=0; 3925 for (i=1;i<size_prec_comm+1;i++) { 3926 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3927 if (number_of_faces[i-1]) { 3928 j++; 3929 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3930 } 3931 } 3932 } 3933 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3934 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3935 ierr = PetscFree(array_int);CHKERRQ(ierr); 3936 if (rank_prec_comm == master_proc) { 3937 for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3938 /*printf("This is the face connectivity (actual ranks)\n"); 3939 for (i=0;i<n_subdomains;i++){ 3940 printf("proc %d is connected with \n",i); 3941 for (j=faces_xadj[i];j<faces_xadj[i+1];j++) 3942 printf("%d ",faces_adjncy[j]); 3943 printf("\n"); 3944 }*/ 3945 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3946 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3947 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3948 } 3949 3950 if ( rank_prec_comm == master_proc ) { 3951 3952 PetscInt heuristic_for_metis=3; 3953 3954 ncon=1; 3955 faces_nvtxs=n_subdomains; 3956 /* partition graoh induced by face connectivity */ 3957 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3958 ierr = METIS_SetDefaultOptions(options); 3959 /* we need a contiguous partition of the coarse mesh */ 3960 options[METIS_OPTION_CONTIG]=1; 3961 options[METIS_OPTION_NITER]=30; 3962 if (pcbddc->coarsening_ratio > 1) { 3963 if (n_subdomains>n_parts*heuristic_for_metis) { 3964 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3965 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3966 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3967 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3968 } else { 3969 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3970 if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3971 } 3972 } else { 3973 for (i=0;i<n_subdomains;i++) { 3974 metis_coarse_subdivision[i]=i; 3975 } 3976 } 3977 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3978 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3979 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr); 3980 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3981 for (i=0;i<size_prec_comm;i++) { coarse_subdivision[i]=MPI_PROC_NULL; } 3982 for (i=0;i<n_subdomains;i++) { coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); } 3983 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3984 } 3985 3986 /* Create new communicator for coarse problem splitting the old one */ 3987 if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3988 coarse_color=0; /* for communicator splitting */ 3989 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3990 } 3991 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3992 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3993 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3994 3995 if ( coarse_color == 0 ) { 3996 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3997 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3998 } else { 3999 rank_coarse_comm = MPI_PROC_NULL; 4000 } 4001 4002 /* master proc take care of arranging and distributing coarse information */ 4003 if (rank_coarse_comm == master_proc) { 4004 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 4005 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 4006 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr); 4007 /* some initializations */ 4008 displacements_recv[0]=0; 4009 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 4010 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 4011 for (j=0;j<size_coarse_comm;j++) { 4012 for (i=0;i<size_prec_comm;i++) { 4013 if (coarse_subdivision[i]==j) { 4014 total_count_recv[j]++; 4015 } 4016 } 4017 } 4018 /* displacements needed for scatterv of total_ranks_recv */ 4019 for (i=1;i<size_coarse_comm;i++) { displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; } 4020 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 4021 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 4022 for (j=0;j<size_coarse_comm;j++) { 4023 for (i=0;i<size_prec_comm;i++) { 4024 if (coarse_subdivision[i]==j) { 4025 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 4026 total_count_recv[j]+=1; 4027 } 4028 } 4029 } 4030 /*for (j=0;j<size_coarse_comm;j++) { 4031 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 4032 for (i=0;i<total_count_recv[j];i++) { 4033 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 4034 } 4035 printf("\n"); 4036 }*/ 4037 4038 /* identify new decomposition in terms of ranks in the old communicator */ 4039 for (i=0;i<n_subdomains;i++) { 4040 coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 4041 } 4042 /*printf("coarse_subdivision in old end new ranks\n"); 4043 for (i=0;i<size_prec_comm;i++) 4044 if (coarse_subdivision[i]!=MPI_PROC_NULL) { 4045 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 4046 } else { 4047 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 4048 } 4049 printf("\n");*/ 4050 } 4051 4052 /* Scatter new decomposition for send details */ 4053 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4054 /* Scatter receiving details to members of coarse decomposition */ 4055 if ( coarse_color == 0) { 4056 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 4057 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 4058 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 4059 } 4060 4061 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 4062 if (coarse_color == 0) { 4063 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 4064 for (i=0;i<count_recv;i++) 4065 printf("%d ",ranks_recv[i]); 4066 printf("\n"); 4067 }*/ 4068 4069 if (rank_prec_comm == master_proc) { 4070 ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 4071 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 4072 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr); 4073 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 4074 } 4075 break; 4076 } 4077 4078 case(REPLICATED_BDDC): 4079 4080 pcbddc->coarse_communications_type = GATHERS_BDDC; 4081 coarse_mat_type = MATSEQAIJ; 4082 coarse_pc_type = PCLU; 4083 coarse_ksp_type = KSPPREONLY; 4084 coarse_comm = PETSC_COMM_SELF; 4085 active_rank = rank_prec_comm; 4086 break; 4087 4088 case(PARALLEL_BDDC): 4089 4090 pcbddc->coarse_communications_type = SCATTERS_BDDC; 4091 coarse_mat_type = MATMPIAIJ; 4092 coarse_pc_type = PCREDUNDANT; 4093 coarse_ksp_type = KSPPREONLY; 4094 coarse_comm = prec_comm; 4095 active_rank = rank_prec_comm; 4096 break; 4097 4098 case(SEQUENTIAL_BDDC): 4099 pcbddc->coarse_communications_type = GATHERS_BDDC; 4100 coarse_mat_type = MATSEQAIJ; 4101 coarse_pc_type = PCLU; 4102 coarse_ksp_type = KSPPREONLY; 4103 coarse_comm = PETSC_COMM_SELF; 4104 active_rank = master_proc; 4105 break; 4106 } 4107 4108 switch(pcbddc->coarse_communications_type){ 4109 4110 case(SCATTERS_BDDC): 4111 { 4112 if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 4113 4114 IS coarse_IS; 4115 4116 if(pcbddc->coarsening_ratio == 1) { 4117 ins_local_primal_size = pcbddc->local_primal_size; 4118 ins_local_primal_indices = pcbddc->local_primal_indices; 4119 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 4120 /* nonzeros */ 4121 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 4122 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 4123 for (i=0;i<ins_local_primal_size;i++) { 4124 dnz[i] = ins_local_primal_size; 4125 } 4126 } else { 4127 PetscMPIInt send_size; 4128 PetscMPIInt *send_buffer; 4129 PetscInt *aux_ins_indices; 4130 PetscInt ii,jj; 4131 MPI_Request *requests; 4132 4133 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 4134 /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */ 4135 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 4136 ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 4137 pcbddc->replicated_primal_size = count_recv; 4138 j = 0; 4139 for (i=0;i<count_recv;i++) { 4140 pcbddc->local_primal_displacements[i] = j; 4141 j += pcbddc->local_primal_sizes[ranks_recv[i]]; 4142 } 4143 pcbddc->local_primal_displacements[count_recv] = j; 4144 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 4145 /* allocate auxiliary space */ 4146 ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 4147 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 4148 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 4149 /* allocate stuffs for message massing */ 4150 ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 4151 for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; } 4152 /* send indices to be inserted */ 4153 for (i=0;i<count_recv;i++) { 4154 send_size = pcbddc->local_primal_sizes[ranks_recv[i]]; 4155 ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr); 4156 } 4157 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 4158 send_size = pcbddc->local_primal_size; 4159 ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4160 for (i=0;i<send_size;i++) { 4161 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 4162 } 4163 ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 4164 } 4165 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4166 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 4167 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4168 } 4169 j = 0; 4170 for (i=0;i<count_recv;i++) { 4171 ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i]; 4172 localsizes2[i] = ii*ii; 4173 localdispl2[i] = j; 4174 j += localsizes2[i]; 4175 jj = pcbddc->local_primal_displacements[i]; 4176 /* it counts the coarse subdomains sharing the coarse node */ 4177 for (k=0;k<ii;k++) { 4178 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1; 4179 } 4180 } 4181 /* temp_coarse_mat_vals used to store matrix values to be received */ 4182 ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 4183 /* evaluate how many values I will insert in coarse mat */ 4184 ins_local_primal_size = 0; 4185 for (i=0;i<pcbddc->coarse_size;i++) { 4186 if (aux_ins_indices[i]) { 4187 ins_local_primal_size++; 4188 } 4189 } 4190 /* evaluate indices I will insert in coarse mat */ 4191 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4192 j = 0; 4193 for(i=0;i<pcbddc->coarse_size;i++) { 4194 if(aux_ins_indices[i]) { 4195 ins_local_primal_indices[j] = i; 4196 j++; 4197 } 4198 } 4199 /* processes partecipating in coarse problem receive matrix data from their friends */ 4200 for (i=0;i<count_recv;i++) { 4201 ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 4202 } 4203 if (rank_coarse_proc_send_to != MPI_PROC_NULL ) { 4204 send_size = pcbddc->local_primal_size*pcbddc->local_primal_size; 4205 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 4206 } 4207 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4208 /* nonzeros */ 4209 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr); 4210 ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 4211 /* use aux_ins_indices to realize a global to local mapping */ 4212 j=0; 4213 for(i=0;i<pcbddc->coarse_size;i++){ 4214 if(aux_ins_indices[i]==0){ 4215 aux_ins_indices[i]=-1; 4216 } else { 4217 aux_ins_indices[i]=j; 4218 j++; 4219 } 4220 } 4221 for (i=0;i<count_recv;i++) { 4222 j = pcbddc->local_primal_sizes[ranks_recv[i]]; 4223 for (k=0;k<j;k++) { 4224 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j; 4225 } 4226 } 4227 /* check */ 4228 for (i=0;i<ins_local_primal_size;i++) { 4229 if (dnz[i] > ins_local_primal_size) { 4230 dnz[i] = ins_local_primal_size; 4231 } 4232 } 4233 ierr = PetscFree(requests);CHKERRQ(ierr); 4234 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 4235 if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 4236 } 4237 /* create local to global mapping needed by coarse MATIS */ 4238 if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);} 4239 coarse_comm = prec_comm; 4240 active_rank = rank_prec_comm; 4241 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 4242 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 4243 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 4244 } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) { 4245 /* arrays for values insertion */ 4246 ins_local_primal_size = pcbddc->local_primal_size; 4247 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4248 ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 4249 for (j=0;j<ins_local_primal_size;j++){ 4250 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 4251 for (i=0;i<ins_local_primal_size;i++) { 4252 ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 4253 } 4254 } 4255 } 4256 break; 4257 4258 } 4259 4260 case(GATHERS_BDDC): 4261 { 4262 4263 PetscMPIInt mysize,mysize2; 4264 PetscMPIInt *send_buffer; 4265 4266 if (rank_prec_comm==active_rank) { 4267 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 4268 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr); 4269 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 4270 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 4271 /* arrays for values insertion */ 4272 for (i=0;i<size_prec_comm;i++) { localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; } 4273 localdispl2[0]=0; 4274 for (i=1;i<size_prec_comm;i++) { localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; } 4275 j=0; 4276 for (i=0;i<size_prec_comm;i++) { j+=localsizes2[i]; } 4277 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 4278 } 4279 4280 mysize=pcbddc->local_primal_size; 4281 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 4282 ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4283 for (i=0;i<mysize;i++) { 4284 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i]; 4285 } 4286 if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 4287 ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 4288 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 4289 } else { 4290 ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 4291 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 4292 } 4293 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4294 break; 4295 }/* switch on coarse problem and communications associated with finished */ 4296 } 4297 4298 /* Now create and fill up coarse matrix */ 4299 if ( rank_prec_comm == active_rank ) { 4300 4301 Mat matis_coarse_local_mat; 4302 4303 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 4304 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 4305 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 4306 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 4307 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 4308 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 4309 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4310 } else { 4311 ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 4312 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 4313 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 4314 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 4315 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 4316 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 4317 } 4318 /* preallocation */ 4319 if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 4320 4321 PetscInt lrows,lcols; 4322 4323 ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr); 4324 ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr); 4325 4326 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 4327 4328 Vec vec_dnz,vec_onz; 4329 PetscScalar *my_dnz,*my_onz,*array; 4330 PetscInt *mat_ranges,*row_ownership; 4331 PetscInt coarse_index_row,coarse_index_col,owner; 4332 4333 ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr); 4334 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,pcbddc->coarse_size);CHKERRQ(ierr); 4335 ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr); 4336 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 4337 4338 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr); 4339 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr); 4340 ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 4341 ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 4342 4343 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr); 4344 ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 4345 for (i=0;i<size_prec_comm;i++) { 4346 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 4347 row_ownership[j]=i; 4348 } 4349 } 4350 4351 for (i=0;i<pcbddc->local_primal_size;i++) { 4352 coarse_index_row = pcbddc->local_primal_indices[i]; 4353 owner = row_ownership[coarse_index_row]; 4354 for (j=i;j<pcbddc->local_primal_size;j++) { 4355 owner = row_ownership[coarse_index_row]; 4356 coarse_index_col = pcbddc->local_primal_indices[j]; 4357 if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) { 4358 my_dnz[i] += 1.0; 4359 } else { 4360 my_onz[i] += 1.0; 4361 } 4362 if (i != j) { 4363 owner = row_ownership[coarse_index_col]; 4364 if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) { 4365 my_dnz[j] += 1.0; 4366 } else { 4367 my_onz[j] += 1.0; 4368 } 4369 } 4370 } 4371 } 4372 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 4373 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 4374 if (pcbddc->local_primal_size) { 4375 ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 4376 ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 4377 } 4378 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 4379 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 4380 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 4381 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 4382 j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm]; 4383 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 4384 for (i=0;i<j;i++) { 4385 dnz[i] = (PetscInt)array[i]; 4386 } 4387 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 4388 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 4389 for (i=0;i<j;i++) { 4390 onz[i] = (PetscInt)array[i]; 4391 } 4392 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 4393 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 4394 ierr = PetscFree(my_onz);CHKERRQ(ierr); 4395 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 4396 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 4397 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 4398 } else { 4399 for (k=0;k<size_prec_comm;k++){ 4400 offset=pcbddc->local_primal_displacements[k]; 4401 offset2=localdispl2[k]; 4402 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 4403 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4404 for (j=0;j<ins_local_primal_size;j++){ 4405 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4406 } 4407 for (j=0;j<ins_local_primal_size;j++) { 4408 ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr); 4409 } 4410 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4411 } 4412 } 4413 /* check */ 4414 for (i=0;i<lrows;i++) { 4415 if (dnz[i]>lcols) { 4416 dnz[i]=lcols; 4417 } 4418 if (onz[i]>pcbddc->coarse_size-lcols) { 4419 onz[i]=pcbddc->coarse_size-lcols; 4420 } 4421 } 4422 ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,PETSC_NULL,dnz);CHKERRQ(ierr); 4423 ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,PETSC_NULL,dnz,PETSC_NULL,onz);CHKERRQ(ierr); 4424 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4425 } else { 4426 ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr); 4427 ierr = PetscFree(dnz);CHKERRQ(ierr); 4428 } 4429 /* insert values */ 4430 if (pcbddc->coarse_problem_type == PARALLEL_BDDC) { 4431 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4432 } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4433 if (pcbddc->coarsening_ratio == 1) { 4434 ins_coarse_mat_vals = coarse_submat_vals; 4435 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr); 4436 } else { 4437 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4438 for (k=0;k<pcbddc->replicated_primal_size;k++) { 4439 offset = pcbddc->local_primal_displacements[k]; 4440 offset2 = localdispl2[k]; 4441 ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k]; 4442 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4443 for (j=0;j<ins_local_primal_size;j++){ 4444 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4445 } 4446 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 4447 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4448 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4449 } 4450 } 4451 ins_local_primal_indices = 0; 4452 ins_coarse_mat_vals = 0; 4453 } else { 4454 for (k=0;k<size_prec_comm;k++){ 4455 offset=pcbddc->local_primal_displacements[k]; 4456 offset2=localdispl2[k]; 4457 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 4458 ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 4459 for (j=0;j<ins_local_primal_size;j++){ 4460 ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j]; 4461 } 4462 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 4463 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 4464 ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); 4465 } 4466 ins_local_primal_indices = 0; 4467 ins_coarse_mat_vals = 0; 4468 } 4469 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4470 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4471 /* symmetry of coarse matrix */ 4472 if (issym) { 4473 ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4474 } 4475 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 4476 } 4477 4478 /* create loc to glob scatters if needed */ 4479 if (pcbddc->coarse_communications_type == SCATTERS_BDDC) { 4480 IS local_IS,global_IS; 4481 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 4482 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 4483 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4484 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 4485 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 4486 } 4487 4488 /* free memory no longer needed */ 4489 if (coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 4490 if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 4491 if (ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); } 4492 if (localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr); } 4493 if (localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr); } 4494 if (temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); } 4495 4496 /* Eval coarse null space */ 4497 if (pcbddc->NullSpace) { 4498 const Vec *nsp_vecs; 4499 PetscInt nsp_size,coarse_nsp_size; 4500 PetscBool nsp_has_cnst; 4501 PetscReal test_null; 4502 Vec *coarse_nsp_vecs; 4503 4504 coarse_nsp_size = 0; 4505 coarse_nsp_vecs = 0; 4506 ierr = MatNullSpaceGetVecs(pcbddc->NullSpace,&nsp_has_cnst,&nsp_size,&nsp_vecs);CHKERRQ(ierr); 4507 if (rank_prec_comm == active_rank) { 4508 ierr = PetscMalloc((nsp_size+1)*sizeof(Vec),&coarse_nsp_vecs);CHKERRQ(ierr); 4509 for (i=0;i<nsp_size+1;i++) { 4510 ierr = VecDuplicate(pcbddc->coarse_vec,&coarse_nsp_vecs[i]);CHKERRQ(ierr); 4511 } 4512 } 4513 if (nsp_has_cnst) { 4514 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4515 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 4516 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4517 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4518 if (rank_prec_comm == active_rank) { 4519 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4520 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&test_null);CHKERRQ(ierr); 4521 if (test_null > 1.0e-12 && pcbddc->dbg_flag ) { 4522 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Constant coarse null space error % 1.14e\n",test_null);CHKERRQ(ierr); 4523 } 4524 ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); 4525 coarse_nsp_size++; 4526 } 4527 } 4528 for (i=0;i<nsp_size;i++) { 4529 ierr = VecScatterBegin(matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4530 ierr = VecScatterEnd (matis->ctx,nsp_vecs[i],pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4531 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 4532 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4533 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4534 if (rank_prec_comm == active_rank) { 4535 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4536 ierr = VecNorm(pcbddc->coarse_rhs,NORM_2,&test_null);CHKERRQ(ierr); 4537 if (test_null > 1.0e-12 && pcbddc->dbg_flag ) { 4538 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Vec %d coarse null space error % 1.14e\n",i,test_null);CHKERRQ(ierr); 4539 } 4540 ierr = VecCopy(pcbddc->coarse_vec,coarse_nsp_vecs[coarse_nsp_size]);CHKERRQ(ierr); 4541 coarse_nsp_size++; 4542 } 4543 } 4544 if (coarse_nsp_size > 0) { 4545 /* TODO orthonormalize vecs */ 4546 ierr = VecNormalize(coarse_nsp_vecs[0],PETSC_NULL);CHKERRQ(ierr); 4547 ierr = MatNullSpaceCreate(coarse_comm,PETSC_FALSE,coarse_nsp_size,coarse_nsp_vecs,&pcbddc->CoarseNullSpace);CHKERRQ(ierr); 4548 for (i=0;i<nsp_size+1;i++) { 4549 ierr = VecDestroy(&coarse_nsp_vecs[i]);CHKERRQ(ierr); 4550 } 4551 } 4552 ierr = PetscFree(coarse_nsp_vecs);CHKERRQ(ierr); 4553 } 4554 4555 /* KSP for coarse problem */ 4556 if (rank_prec_comm == active_rank) { 4557 PetscBool isbddc=PETSC_FALSE; 4558 4559 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 4560 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4561 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4562 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 4563 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4564 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4565 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4566 /* Allow user's customization */ 4567 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 4568 /* Set Up PC for coarse problem BDDC */ 4569 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4570 i = pcbddc->current_level+1; 4571 ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr); 4572 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4573 ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4574 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 4575 if (pcbddc->CoarseNullSpace) { ierr = PCBDDCSetNullSpace(pc_temp,pcbddc->CoarseNullSpace);CHKERRQ(ierr); } 4576 if (dbg_flag) { 4577 ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr); 4578 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4579 } 4580 } 4581 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4582 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4583 4584 ierr = KSPGetTolerances(pcbddc->coarse_ksp,PETSC_NULL,PETSC_NULL,PETSC_NULL,&j);CHKERRQ(ierr); 4585 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4586 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4587 if (j == 1) { 4588 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4589 if (isbddc) { 4590 ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr); 4591 } 4592 } 4593 } 4594 /* Check coarse problem if requested */ 4595 if ( dbg_flag && rank_prec_comm == active_rank ) { 4596 KSP check_ksp; 4597 PC check_pc; 4598 Vec check_vec; 4599 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 4600 KSPType check_ksp_type; 4601 4602 /* Create ksp object suitable for extreme eigenvalues' estimation */ 4603 ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr); 4604 ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 4605 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4606 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4607 if (issym) { 4608 check_ksp_type = KSPCG; 4609 } else { 4610 check_ksp_type = KSPGMRES; 4611 } 4612 ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr); 4613 } else { 4614 check_ksp_type = KSPPREONLY; 4615 } 4616 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4617 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4618 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4619 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4620 /* create random vec */ 4621 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 4622 ierr = VecSetRandom(check_vec,PETSC_NULL);CHKERRQ(ierr); 4623 if (pcbddc->CoarseNullSpace) { ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,check_vec,PETSC_NULL);CHKERRQ(ierr); } 4624 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4625 /* solve coarse problem */ 4626 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 4627 if (pcbddc->CoarseNullSpace) { ierr = MatNullSpaceRemove(pcbddc->CoarseNullSpace,pcbddc->coarse_vec,PETSC_NULL);CHKERRQ(ierr); } 4628 /* check coarse problem residual error */ 4629 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 4630 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4631 ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 4632 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4633 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4634 /* get eigenvalue estimation if inexact */ 4635 if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 4636 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 4637 ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr); 4638 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr); 4639 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 4640 } 4641 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error : %1.14e\n",infty_error);CHKERRQ(ierr); 4642 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr); 4643 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4644 } 4645 if (dbg_flag) { ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); } 4646 4647 PetscFunctionReturn(0); 4648 } 4649 4650 #undef __FUNCT__ 4651 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 4652 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 4653 { 4654 4655 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4656 PC_IS *pcis = (PC_IS*)pc->data; 4657 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 4658 PCBDDCGraph mat_graph=pcbddc->mat_graph; 4659 PetscInt *is_indices,*auxis; 4660 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 4661 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 4662 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 4663 PetscBool same_set; 4664 MPI_Comm interface_comm=((PetscObject)pc)->comm; 4665 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 4666 const PetscInt *neumann_nodes; 4667 const PetscInt *dirichlet_nodes; 4668 IS used_IS,*custom_ISForDofs; 4669 PetscScalar *array; 4670 PetscScalar *array2; 4671 PetscViewer viewer=pcbddc->dbg_viewer; 4672 PetscInt *queue_in_global_numbering; 4673 4674 PetscFunctionBegin; 4675 /* Setup local adjacency graph */ 4676 mat_graph->nvtxs=pcis->n; 4677 if (!mat_graph->xadj) { NEUMANNCNT = 1; } 4678 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 4679 i = mat_graph->nvtxs; 4680 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 4681 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4682 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4683 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4684 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4685 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4686 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4687 4688 /* Setting dofs splitting in mat_graph->which_dof 4689 Get information about dofs' splitting if provided by the user 4690 Otherwise it assumes a constant block size */ 4691 vertex_size=0; 4692 if (!pcbddc->n_ISForDofs) { 4693 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4694 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4695 for (i=0;i<bs;i++) { 4696 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4697 } 4698 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4699 vertex_size=1; 4700 /* remove my references to IS objects */ 4701 for (i=0;i<bs;i++) { 4702 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4703 } 4704 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4705 } 4706 for (i=0;i<pcbddc->n_ISForDofs;i++) { 4707 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4708 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4709 for (j=0;j<k;j++) { 4710 mat_graph->which_dof[is_indices[j]]=i; 4711 } 4712 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4713 } 4714 /* use mat block size as vertex size if it has not yet set */ 4715 if (!vertex_size) { 4716 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4717 } 4718 4719 /* count number of neigh per node */ 4720 total_counts=0; 4721 for (i=1;i<pcis->n_neigh;i++){ 4722 s=pcis->n_shared[i]; 4723 total_counts+=s; 4724 for (j=0;j<s;j++){ 4725 mat_graph->count[pcis->shared[i][j]] += 1; 4726 } 4727 } 4728 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4729 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4730 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4731 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4732 if (used_IS) { 4733 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4734 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4735 for (i=0;i<neumann_bsize;i++){ 4736 iindex = neumann_nodes[i]; 4737 if (mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4738 mat_graph->count[iindex]+=1; 4739 total_counts++; 4740 array[iindex]=array[iindex]+1.0; 4741 } else if (array[iindex]>0.0) { 4742 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4743 } 4744 } 4745 } 4746 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4747 /* allocate space for storing the set of neighbours for each node */ 4748 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4749 if (mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4750 for (i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4751 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4752 for (i=1;i<pcis->n_neigh;i++){ 4753 s=pcis->n_shared[i]; 4754 for (j=0;j<s;j++) { 4755 k=pcis->shared[i][j]; 4756 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4757 mat_graph->count[k]+=1; 4758 } 4759 } 4760 /* Check consistency of Neumann nodes */ 4761 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4762 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4763 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4764 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4765 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4766 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4767 /* set -1 fake neighbour to mimic Neumann boundary */ 4768 if (used_IS) { 4769 for (i=0;i<neumann_bsize;i++){ 4770 iindex = neumann_nodes[i]; 4771 if (mat_graph->count[iindex] > NEUMANNCNT){ 4772 if (mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4773 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4774 } 4775 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4776 mat_graph->count[iindex]+=1; 4777 } 4778 } 4779 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4780 } 4781 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4782 /* sort set of sharing subdomains */ 4783 for (i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4784 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4785 for (i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4786 nodes_touched=0; 4787 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4788 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4789 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4790 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4791 if (used_IS) { 4792 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4793 if (dirichlet_bsize && matis->pure_neumann) { 4794 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4795 } 4796 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4797 for (i=0;i<dirichlet_bsize;i++){ 4798 iindex=dirichlet_nodes[i]; 4799 if (mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4800 if (array[iindex]>0.0) { 4801 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4802 } 4803 mat_graph->touched[iindex]=PETSC_TRUE; 4804 mat_graph->where[iindex]=0; 4805 nodes_touched++; 4806 array2[iindex]=array2[iindex]+1.0; 4807 } 4808 } 4809 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4810 } 4811 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4812 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4813 /* Check consistency of Dirichlet nodes */ 4814 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4815 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4816 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4817 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4818 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4819 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4820 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4821 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4822 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4823 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4824 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4825 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4826 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4827 if (used_IS) { 4828 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4829 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4830 for (i=0;i<dirichlet_bsize;i++){ 4831 iindex=dirichlet_nodes[i]; 4832 if (array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4833 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4834 } 4835 } 4836 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4837 } 4838 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4839 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4840 4841 for (i=0;i<mat_graph->nvtxs;i++){ 4842 if (!mat_graph->count[i]){ /* interior nodes */ 4843 mat_graph->touched[i]=PETSC_TRUE; 4844 mat_graph->where[i]=0; 4845 nodes_touched++; 4846 } 4847 } 4848 mat_graph->ncmps = 0; 4849 i=0; 4850 while(nodes_touched<mat_graph->nvtxs) { 4851 /* find first untouched node in local ordering */ 4852 while(mat_graph->touched[i]) i++; 4853 mat_graph->touched[i]=PETSC_TRUE; 4854 mat_graph->where[i]=where_values; 4855 nodes_touched++; 4856 /* now find all other nodes having the same set of sharing subdomains */ 4857 for (j=i+1;j<mat_graph->nvtxs;j++){ 4858 /* check for same number of sharing subdomains and dof number */ 4859 if (!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4860 /* check for same set of sharing subdomains */ 4861 same_set=PETSC_TRUE; 4862 for (k=0;k<mat_graph->count[j];k++){ 4863 if (mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4864 same_set=PETSC_FALSE; 4865 } 4866 } 4867 /* I found a friend of mine */ 4868 if (same_set) { 4869 mat_graph->where[j]=where_values; 4870 mat_graph->touched[j]=PETSC_TRUE; 4871 nodes_touched++; 4872 } 4873 } 4874 } 4875 where_values++; 4876 } 4877 where_values--; if (where_values<0) where_values=0; 4878 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4879 /* Find connected components defined on the shared interface */ 4880 if (where_values) { 4881 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4882 } 4883 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 4884 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4885 for (i=0;i<where_values;i++) { 4886 /* We are not sure that on a given subset of the local interface, 4887 two connected components will be the same among sharing subdomains */ 4888 if (mat_graph->where_ncmps[i]>1) { 4889 adapt_interface=1; 4890 break; 4891 } 4892 } 4893 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4894 if (pcbddc->dbg_flag && adapt_interface_reduced) { 4895 ierr = PetscViewerASCIIPrintf(viewer,"Adapting interface\n");CHKERRQ(ierr); 4896 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4897 } 4898 if (where_values && adapt_interface_reduced) { 4899 4900 PetscInt sum_requests=0,my_rank; 4901 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4902 PetscInt temp_buffer_size,ins_val,global_where_counter; 4903 PetscInt *cum_recv_counts; 4904 PetscInt *where_to_nodes_indices; 4905 PetscInt *petsc_buffer; 4906 PetscMPIInt *recv_buffer; 4907 PetscMPIInt *recv_buffer_where; 4908 PetscMPIInt *send_buffer; 4909 PetscMPIInt size_of_send; 4910 PetscInt *sizes_of_sends; 4911 MPI_Request *send_requests; 4912 MPI_Request *recv_requests; 4913 PetscInt *where_cc_adapt; 4914 PetscInt **temp_buffer; 4915 PetscInt *nodes_to_temp_buffer_indices; 4916 PetscInt *add_to_where; 4917 PetscInt *aux_new_xadj,*new_xadj,*new_adjncy; 4918 4919 /* Retrict adjacency graph using information from connected components */ 4920 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&aux_new_xadj);CHKERRQ(ierr); 4921 for (i=0;i<mat_graph->nvtxs;i++) { 4922 aux_new_xadj[i]=1; 4923 } 4924 for (i=0;i<mat_graph->ncmps;i++) { 4925 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4926 for (j=0;j<k;j++) { 4927 aux_new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]=k; 4928 } 4929 } 4930 j = 0; 4931 for (i=0;i<mat_graph->nvtxs;i++) { 4932 j += aux_new_xadj[i]; 4933 } 4934 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&new_xadj);CHKERRQ(ierr); 4935 ierr = PetscMalloc(j*sizeof(PetscInt),&new_adjncy);CHKERRQ(ierr); 4936 new_xadj[0]=0; 4937 for (i=0;i<mat_graph->nvtxs;i++) { 4938 new_xadj[i+1]=new_xadj[i]+aux_new_xadj[i]; 4939 if (aux_new_xadj[i]==1) { 4940 new_adjncy[new_xadj[i]]=i; 4941 } 4942 } 4943 ierr = PetscFree(aux_new_xadj);CHKERRQ(ierr); 4944 for (i=0;i<mat_graph->ncmps;i++) { 4945 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4946 for (j=0;j<k;j++) { 4947 ierr = PetscMemcpy(&new_adjncy[new_xadj[mat_graph->queue[mat_graph->cptr[i]+j]]],&mat_graph->queue[mat_graph->cptr[i]],k*sizeof(PetscInt));CHKERRQ(ierr); 4948 } 4949 } 4950 ierr = PCBDDCSetLocalAdjacencyGraph(pc,mat_graph->nvtxs,new_xadj,new_adjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 4951 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4952 for (i=0;i<mat_graph->ncmps;i++) { 4953 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4954 ierr = ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4955 ierr = PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4956 } 4957 /* allocate some space */ 4958 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4959 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4960 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4961 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4962 /* first count how many neighbours per connected component I will receive from */ 4963 cum_recv_counts[0]=0; 4964 for (i=1;i<where_values+1;i++){ 4965 j=0; 4966 while(mat_graph->where[j] != i) { j++; } 4967 where_to_nodes_indices[i-1]=j; 4968 if (mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4969 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4970 } 4971 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4972 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4973 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4974 for (i=0;i<cum_recv_counts[where_values];i++) { 4975 send_requests[i]=MPI_REQUEST_NULL; 4976 recv_requests[i]=MPI_REQUEST_NULL; 4977 } 4978 /* exchange with my neighbours the number of my connected components on the shared interface */ 4979 for (i=0;i<where_values;i++){ 4980 j=where_to_nodes_indices[i]; 4981 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4982 for (;k<mat_graph->count[j];k++){ 4983 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4984 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4985 sum_requests++; 4986 } 4987 } 4988 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4989 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4990 /* determine the connected component I need to adapt */ 4991 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4992 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4993 for (i=0;i<where_values;i++){ 4994 for (j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4995 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4996 if ( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4997 where_cc_adapt[i]=PETSC_TRUE; 4998 break; 4999 } 5000 } 5001 } 5002 buffer_size = 0; 5003 for (i=0;i<where_values;i++) { 5004 if (where_cc_adapt[i]) { 5005 for (j=i;j<mat_graph->ncmps;j++) { 5006 if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 5007 buffer_size += 1 + mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5008 } 5009 } 5010 } 5011 } 5012 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 5013 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 5014 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 5015 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 5016 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 5017 sum_requests=0; 5018 start_of_send=0; 5019 start_of_recv=cum_recv_counts[where_values]; 5020 for (i=0;i<where_values;i++) { 5021 if (where_cc_adapt[i]) { 5022 size_of_send=0; 5023 for (j=i;j<mat_graph->ncmps;j++) { 5024 if (mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 5025 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5026 size_of_send+=1; 5027 for (k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 5028 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 5029 } 5030 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 5031 } 5032 } 5033 j = where_to_nodes_indices[i]; 5034 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5035 sizes_of_sends[i]=size_of_send; 5036 for (;k<mat_graph->count[j];k++){ 5037 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5038 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5039 sum_requests++; 5040 } 5041 start_of_send+=size_of_send; 5042 } 5043 } 5044 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5045 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5046 buffer_size=0; 5047 for (k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 5048 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 5049 /* now exchange the data */ 5050 start_of_recv=0; 5051 start_of_send=0; 5052 sum_requests=0; 5053 for (i=0;i<where_values;i++) { 5054 if (where_cc_adapt[i]) { 5055 size_of_send = sizes_of_sends[i]; 5056 j = where_to_nodes_indices[i]; 5057 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 5058 for (;k<mat_graph->count[j];k++){ 5059 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 5060 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 5061 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 5062 start_of_recv+=size_of_recv; 5063 sum_requests++; 5064 } 5065 start_of_send+=size_of_send; 5066 } 5067 } 5068 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5069 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5070 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 5071 for (k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 5072 for (j=0;j<buffer_size;) { 5073 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 5074 k=petsc_buffer[j]+1; 5075 j+=k; 5076 } 5077 sum_requests=cum_recv_counts[where_values]; 5078 start_of_recv=0; 5079 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 5080 global_where_counter=0; 5081 for (i=0;i<where_values;i++){ 5082 if (where_cc_adapt[i]){ 5083 temp_buffer_size=0; 5084 /* find nodes on the shared interface we need to adapt */ 5085 for (j=0;j<mat_graph->nvtxs;j++){ 5086 if (mat_graph->where[j]==i+1) { 5087 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 5088 temp_buffer_size++; 5089 } else { 5090 nodes_to_temp_buffer_indices[j]=-1; 5091 } 5092 } 5093 /* allocate some temporary space */ 5094 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 5095 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 5096 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 5097 for (j=1;j<temp_buffer_size;j++){ 5098 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 5099 } 5100 /* analyze contributions from neighbouring subdomains for i-th conn comp 5101 temp buffer structure: 5102 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 5103 3 neighs procs with structured connected components: 5104 neigh 0: [0 1 4], [2 3]; (2 connected components) 5105 neigh 1: [0 1], [2 3 4]; (2 connected components) 5106 neigh 2: [0 4], [1], [2 3]; (3 connected components) 5107 tempbuffer (row-oriented) should be filled as: 5108 [ 0, 0, 0; 5109 0, 0, 1; 5110 1, 1, 2; 5111 1, 1, 2; 5112 0, 1, 0; ]; 5113 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 5114 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 5115 */ 5116 for (j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 5117 ins_val=0; 5118 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 5119 for (buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 5120 for (k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 5121 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 5122 } 5123 buffer_size+=k; 5124 ins_val++; 5125 } 5126 start_of_recv+=size_of_recv; 5127 sum_requests++; 5128 } 5129 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 5130 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 5131 for (j=0;j<temp_buffer_size;j++){ 5132 if (!add_to_where[j]){ /* found a new cc */ 5133 global_where_counter++; 5134 add_to_where[j]=global_where_counter; 5135 for (k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 5136 same_set=PETSC_TRUE; 5137 for (s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 5138 if (temp_buffer[j][s]!=temp_buffer[k][s]) { 5139 same_set=PETSC_FALSE; 5140 break; 5141 } 5142 } 5143 if (same_set) { add_to_where[k]=global_where_counter; } 5144 } 5145 } 5146 } 5147 /* insert new data in where array */ 5148 temp_buffer_size=0; 5149 for (j=0;j<mat_graph->nvtxs;j++){ 5150 if (mat_graph->where[j]==i+1) { 5151 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 5152 temp_buffer_size++; 5153 } 5154 } 5155 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 5156 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 5157 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 5158 } 5159 } 5160 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 5161 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 5162 ierr = PetscFree(send_requests);CHKERRQ(ierr); 5163 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 5164 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 5165 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 5166 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 5167 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 5168 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 5169 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 5170 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 5171 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 5172 if (global_where_counter) { 5173 for (i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 5174 global_where_counter=0; 5175 for (i=0;i<mat_graph->nvtxs;i++){ 5176 if (mat_graph->where[i] && !mat_graph->touched[i]) { 5177 global_where_counter++; 5178 for (j=i+1;j<mat_graph->nvtxs;j++){ 5179 if (!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 5180 mat_graph->where[j]=global_where_counter; 5181 mat_graph->touched[j]=PETSC_TRUE; 5182 } 5183 } 5184 mat_graph->where[i]=global_where_counter; 5185 mat_graph->touched[i]=PETSC_TRUE; 5186 } 5187 } 5188 where_values=global_where_counter; 5189 } 5190 if (global_where_counter) { 5191 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 5192 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 5193 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 5194 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 5195 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 5196 } 5197 } /* Finished adapting interface */ 5198 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 5199 for (i=0;i<mat_graph->ncmps;i++) { 5200 k = mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5201 ierr = ISLocalToGlobalMappingApply(matis->mapping,k,&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 5202 ierr = PetscSortIntWithArray(k,&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 5203 } 5204 5205 PetscInt nfc=0; 5206 PetscInt nec=0; 5207 PetscInt nvc=0; 5208 PetscBool twodim_flag=PETSC_FALSE; 5209 for (i=0; i<mat_graph->ncmps; i++) { 5210 if ( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 5211 if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 5212 nfc++; 5213 } else { /* note that nec will be zero in 2d */ 5214 nec++; 5215 } 5216 } else { 5217 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5218 } 5219 } 5220 if (!nec) { /* we are in a 2d case -> no faces, only edges */ 5221 nec = nfc; 5222 nfc = 0; 5223 twodim_flag = PETSC_TRUE; 5224 } 5225 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 5226 k=0; 5227 for (i=0; i<mat_graph->ncmps; i++) { 5228 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 5229 if ( j > k) { 5230 k=j; 5231 } 5232 if (j<=vertex_size) { 5233 k+=vertex_size; 5234 } 5235 } 5236 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 5237 if (!pcbddc->vertices_flag && !pcbddc->edges_flag) { 5238 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 5239 use_faces=PETSC_TRUE; 5240 } 5241 if (!pcbddc->vertices_flag && !pcbddc->faces_flag) { 5242 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 5243 use_edges=PETSC_TRUE; 5244 } 5245 nfc=0; 5246 nec=0; 5247 for (i=0; i<mat_graph->ncmps; i++) { 5248 if ( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 5249 for (j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 5250 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 5251 } 5252 if (mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 5253 if (twodim_flag) { 5254 if (use_edges) { 5255 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 5256 nec++; 5257 } 5258 } else { 5259 if (use_faces) { 5260 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 5261 nfc++; 5262 } 5263 } 5264 } else { 5265 if (use_edges) { 5266 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 5267 nec++; 5268 } 5269 } 5270 } 5271 } 5272 pcbddc->n_ISForFaces=nfc; 5273 pcbddc->n_ISForEdges=nec; 5274 nvc=0; 5275 if ( !pcbddc->constraints_flag ) { 5276 for (i=0; i<mat_graph->ncmps; i++) { 5277 if ( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 5278 for ( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 5279 auxis[nvc]=mat_graph->queue[j]; 5280 nvc++; 5281 } 5282 } 5283 } 5284 } 5285 /* sort vertex set (by local ordering) */ 5286 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 5287 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 5288 if (pcbddc->dbg_flag) { 5289 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5290 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5291 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 5292 for (i=0;i<mat_graph->ncmps;i++) { 5293 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 5294 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 5295 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 5296 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 5297 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 5298 } 5299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 5300 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 5301 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",mat_graph->queue[j],queue_in_global_numbering[j]);CHKERRQ(ierr); 5302 } 5303 } 5304 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 5305 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 5306 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 5307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 5308 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 5309 } 5310 ierr = PetscFree(auxis);CHKERRQ(ierr); 5311 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 5312 PetscFunctionReturn(0); 5313 } 5314 5315 /* -------------------------------------------------------------------------- */ 5316 5317 /* The following code has been adapted from function IsConnectedSubdomain contained 5318 in source file contig.c of METIS library (version 5.0.1) 5319 It finds connected components of each partition labeled from 1 to n_dist */ 5320 5321 #undef __FUNCT__ 5322 #define __FUNCT__ "PCBDDCFindConnectedComponents" 5323 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 5324 { 5325 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 5326 PetscInt *xadj, *adjncy, *where, *queue; 5327 PetscInt *cptr; 5328 PetscBool *touched; 5329 5330 PetscFunctionBegin; 5331 5332 nvtxs = graph->nvtxs; 5333 xadj = graph->xadj; 5334 adjncy = graph->adjncy; 5335 where = graph->where; 5336 touched = graph->touched; 5337 queue = graph->queue; 5338 cptr = graph->cptr; 5339 5340 for (i=0; i<nvtxs; i++) { 5341 touched[i] = PETSC_FALSE; 5342 } 5343 5344 cum_queue=0; 5345 ncmps=0; 5346 5347 for (n=0; n<n_dist; n++) { 5348 pid = n+1; /* partition labeled by 0 is discarded */ 5349 nleft = 0; 5350 for (i=0; i<nvtxs; i++) { 5351 if (where[i] == pid) 5352 nleft++; 5353 } 5354 for (i=0; i<nvtxs; i++) { 5355 if (where[i] == pid) 5356 break; 5357 } 5358 touched[i] = PETSC_TRUE; 5359 queue[cum_queue] = i; 5360 first = 0; last = 1; 5361 cptr[ncmps] = cum_queue; /* This actually points to queue */ 5362 ncmps_pid = 0; 5363 while (first != nleft) { 5364 if (first == last) { /* Find another starting vertex */ 5365 cptr[++ncmps] = first+cum_queue; 5366 ncmps_pid++; 5367 for (i=0; i<nvtxs; i++) { 5368 if (where[i] == pid && !touched[i]) 5369 break; 5370 } 5371 queue[cum_queue+last] = i; 5372 last++; 5373 touched[i] = PETSC_TRUE; 5374 } 5375 i = queue[cum_queue+first]; 5376 first++; 5377 for (j=xadj[i]; j<xadj[i+1]; j++) { 5378 k = adjncy[j]; 5379 if (where[k] == pid && !touched[k]) { 5380 queue[cum_queue+last] = k; 5381 last++; 5382 touched[k] = PETSC_TRUE; 5383 } 5384 } 5385 } 5386 cptr[++ncmps] = first+cum_queue; 5387 ncmps_pid++; 5388 cum_queue=cptr[ncmps]; 5389 graph->where_ncmps[n] = ncmps_pid; 5390 } 5391 graph->ncmps = ncmps; 5392 5393 PetscFunctionReturn(0); 5394 } 5395