1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 Exact solvers: Solve local saddle point directly 5 - change prec_type to switch_inexact_prec_type 6 - add bool solve_exact_saddle_point slot to pdbddc data 7 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 8 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 9 - mind the problem with coarsening_factor 10 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 11 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 12 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 13 - Add levels' slot to bddc data structure and associated Set/Get functions 14 code refactoring: 15 - pick up better names for static functions 16 change options structure: 17 - insert BDDC into MG framework? 18 provide other ops? Ask to developers 19 remove all unused printf 20 man pages 21 */ 22 23 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 24 Implementation of BDDC preconditioner based on: 25 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 26 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 27 28 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 29 #include <petscblaslapack.h> 30 31 /* -------------------------------------------------------------------------- */ 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCSetFromOptions_BDDC" 34 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 35 { 36 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 37 PetscErrorCode ierr; 38 39 PetscFunctionBegin; 40 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 41 /* Verbose debugging of main data structures */ 42 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 43 /* Some customization for default primal space */ 44 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 47 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 48 /* Coarse solver context */ 49 static const char *avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel",""}; /*order of choiches depends on ENUM defined in bddc.h */ 50 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 51 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 52 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 53 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 55 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 56 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 57 ierr = PetscOptionsTail();CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 /* -------------------------------------------------------------------------- */ 61 EXTERN_C_BEGIN 62 #undef __FUNCT__ 63 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 64 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 65 { 66 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 67 68 PetscFunctionBegin; 69 pcbddc->coarse_problem_type = CPT; 70 PetscFunctionReturn(0); 71 } 72 EXTERN_C_END 73 #undef __FUNCT__ 74 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 75 /*@ 76 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 77 78 Not collective 79 80 Input Parameters: 81 + pc - the preconditioning context 82 - CoarseProblemType - pick a better name and explain what this is 83 84 Level: intermediate 85 86 Notes: 87 Not collective but all procs must call with same arguments. 88 89 .seealso: PCBDDC 90 @*/ 91 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 92 { 93 PetscErrorCode ierr; 94 95 PetscFunctionBegin; 96 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 97 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 98 PetscFunctionReturn(0); 99 } 100 /* -------------------------------------------------------------------------- */ 101 EXTERN_C_BEGIN 102 #undef __FUNCT__ 103 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 104 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 105 { 106 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 111 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 112 pcbddc->DirichletBoundaries=DirichletBoundaries; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 #undef __FUNCT__ 117 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 118 /*@ 119 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 120 of Dirichlet boundaries for the global problem. 121 122 Not collective 123 124 Input Parameters: 125 + pc - the preconditioning context 126 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 127 128 Level: intermediate 129 130 Notes: 131 132 .seealso: PCBDDC 133 @*/ 134 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 135 { 136 PetscErrorCode ierr; 137 138 PetscFunctionBegin; 139 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 140 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 141 PetscFunctionReturn(0); 142 } 143 /* -------------------------------------------------------------------------- */ 144 EXTERN_C_BEGIN 145 #undef __FUNCT__ 146 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 147 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 148 { 149 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 154 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 155 pcbddc->NeumannBoundaries=NeumannBoundaries; 156 PetscFunctionReturn(0); 157 } 158 EXTERN_C_END 159 #undef __FUNCT__ 160 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 161 /*@ 162 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 163 of Neumann boundaries for the global problem. 164 165 Not collective 166 167 Input Parameters: 168 + pc - the preconditioning context 169 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 170 171 Level: intermediate 172 173 Notes: 174 175 .seealso: PCBDDC 176 @*/ 177 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 178 { 179 PetscErrorCode ierr; 180 181 PetscFunctionBegin; 182 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 183 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 /* -------------------------------------------------------------------------- */ 187 EXTERN_C_BEGIN 188 #undef __FUNCT__ 189 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 190 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 191 { 192 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 193 194 PetscFunctionBegin; 195 *DirichletBoundaries = pcbddc->DirichletBoundaries; 196 PetscFunctionReturn(0); 197 } 198 EXTERN_C_END 199 #undef __FUNCT__ 200 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 201 /*@ 202 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 203 of Dirichlet boundaries for the global problem. 204 205 Not collective 206 207 Input Parameters: 208 + pc - the preconditioning context 209 210 Output Parameters: 211 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 212 213 Level: intermediate 214 215 Notes: 216 217 .seealso: PCBDDC 218 @*/ 219 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 220 { 221 PetscErrorCode ierr; 222 223 PetscFunctionBegin; 224 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 225 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 /* -------------------------------------------------------------------------- */ 229 EXTERN_C_BEGIN 230 #undef __FUNCT__ 231 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 232 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 233 { 234 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 235 236 PetscFunctionBegin; 237 *NeumannBoundaries = pcbddc->NeumannBoundaries; 238 PetscFunctionReturn(0); 239 } 240 EXTERN_C_END 241 #undef __FUNCT__ 242 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 243 /*@ 244 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 245 of Neumann boundaries for the global problem. 246 247 Not collective 248 249 Input Parameters: 250 + pc - the preconditioning context 251 252 Output Parameters: 253 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 254 255 Level: intermediate 256 257 Notes: 258 259 .seealso: PCBDDC 260 @*/ 261 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 262 { 263 PetscErrorCode ierr; 264 265 PetscFunctionBegin; 266 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 267 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 /* -------------------------------------------------------------------------- */ 271 EXTERN_C_BEGIN 272 #undef __FUNCT__ 273 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 274 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 275 { 276 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 277 PCBDDCGraph mat_graph=pcbddc->mat_graph; 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 mat_graph->nvtxs=nvtxs; 282 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 284 if(copymode == PETSC_COPY_VALUES) { 285 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 286 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 287 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 288 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 289 } else if(copymode == PETSC_OWN_POINTER) { 290 mat_graph->xadj=xadj; 291 mat_graph->adjncy=adjncy; 292 } else { 293 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 294 } 295 PetscFunctionReturn(0); 296 } 297 EXTERN_C_END 298 #undef __FUNCT__ 299 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 300 /*@ 301 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 302 303 Not collective 304 305 Input Parameters: 306 + pc - the preconditioning context 307 - nvtxs - number of local vertices of the graph 308 - xadj, adjncy - the CSR graph 309 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 310 in the latter case, memory must be obtained with PetscMalloc. 311 312 Level: intermediate 313 314 Notes: 315 316 .seealso: PCBDDC 317 @*/ 318 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 319 { 320 PetscInt nrows,ncols; 321 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 322 PetscErrorCode ierr; 323 324 PetscFunctionBegin; 325 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 326 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 327 if(nvtxs != nrows) { 328 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 329 } else { 330 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 331 } 332 PetscFunctionReturn(0); 333 } 334 /* -------------------------------------------------------------------------- */ 335 EXTERN_C_BEGIN 336 #undef __FUNCT__ 337 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 338 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 339 { 340 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 341 PetscInt i; 342 PetscErrorCode ierr; 343 344 PetscFunctionBegin; 345 /* Destroy ISes if they were already set */ 346 for(i=0;i<pcbddc->n_ISForDofs;i++) { 347 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 348 } 349 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 350 /* allocate space then set */ 351 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 352 for(i=0;i<n_is;i++) { 353 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 354 pcbddc->ISForDofs[i]=ISForDofs[i]; 355 } 356 pcbddc->n_ISForDofs=n_is; 357 PetscFunctionReturn(0); 358 } 359 EXTERN_C_END 360 #undef __FUNCT__ 361 #define __FUNCT__ "PCBDDCSetDofsSplitting" 362 /*@ 363 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 364 365 Not collective 366 367 Input Parameters: 368 + pc - the preconditioning context 369 - n - number of index sets defining the fields 370 - IS[] - array of IS describing the fields 371 372 Level: intermediate 373 374 Notes: 375 376 .seealso: PCBDDC 377 @*/ 378 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 379 { 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 384 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 /* -------------------------------------------------------------------------- */ 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCPreSolve_BDDC" 390 /* -------------------------------------------------------------------------- */ 391 /* 392 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 393 guess if a transformation of basis approach has been selected. 394 395 Input Parameter: 396 + pc - the preconditioner contex 397 398 Application Interface Routine: PCPreSolve() 399 400 Notes: 401 The interface routine PCPreSolve() is not usually called directly by 402 the user, but instead is called by KSPSolve(). 403 */ 404 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 405 { 406 PetscErrorCode ierr; 407 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 408 PC_IS *pcis = (PC_IS*)(pc->data); 409 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 410 Mat temp_mat; 411 IS dirIS; 412 PetscInt dirsize,i,*is_indices; 413 PetscScalar *array_x,*array_diagonal; 414 Vec used_vec; 415 PetscBool guess_nonzero; 416 417 PetscFunctionBegin; 418 if(x) { 419 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 420 used_vec = x; 421 } else { 422 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 423 used_vec = pcbddc->temp_solution; 424 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 425 } 426 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 427 if (ksp) { 428 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 429 if( !guess_nonzero ) { 430 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 431 } 432 } 433 /* store the original rhs */ 434 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 435 if(pcbddc->usechangeofbasis) { 436 /* swap pointers for local matrices */ 437 temp_mat = matis->A; 438 matis->A = pcbddc->local_mat; 439 pcbddc->local_mat = temp_mat; 440 /* Get local rhs and apply transformation of basis */ 441 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 442 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 443 /* from original basis to modified basis */ 444 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 445 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 446 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 447 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 448 } 449 450 /* Take into account zeroed rows -> change rhs and store solution removed */ 451 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 452 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 453 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 454 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 455 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 456 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 457 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 458 if(dirIS) { 459 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 460 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 461 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 462 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 463 for(i=0;i<dirsize;i++) { 464 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 465 } 466 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 467 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 468 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 469 } 470 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 472 /* remove the computed solution from the rhs */ 473 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 474 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 475 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 476 if(x) { 477 /* store partially computed solution and set initial guess to 0 */ 478 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 479 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 480 } 481 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 /* -------------------------------------------------------------------------- */ 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCPostSolve_BDDC" 487 /* -------------------------------------------------------------------------- */ 488 /* 489 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 490 approach has been selected. Also, restores rhs to its original state. 491 492 Input Parameter: 493 + pc - the preconditioner contex 494 495 Application Interface Routine: PCPostSolve() 496 497 Notes: 498 The interface routine PCPostSolve() is not usually called directly by 499 the user, but instead is called by KSPSolve(). 500 */ 501 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 502 { 503 PetscErrorCode ierr; 504 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 505 PC_IS *pcis = (PC_IS*)(pc->data); 506 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 507 Mat temp_mat; 508 509 PetscFunctionBegin; 510 if(pcbddc->usechangeofbasis) { 511 /* swap pointers for local matrices */ 512 temp_mat = matis->A; 513 matis->A = pcbddc->local_mat; 514 pcbddc->local_mat = temp_mat; 515 /* restore rhs to its original state */ 516 if(rhs) ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 517 /* Get Local boundary and apply transformation of basis to solution vector */ 518 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 519 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 520 /* from modified basis to original basis */ 521 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 522 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 523 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 } 526 /* add solution removed in presolve */ 527 if(x) ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 /* -------------------------------------------------------------------------- */ 531 #undef __FUNCT__ 532 #define __FUNCT__ "PCSetUp_BDDC" 533 /* -------------------------------------------------------------------------- */ 534 /* 535 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 536 by setting data structures and options. 537 538 Input Parameter: 539 + pc - the preconditioner context 540 541 Application Interface Routine: PCSetUp() 542 543 Notes: 544 The interface routine PCSetUp() is not usually called directly by 545 the user, but instead is called by PCApply() if necessary. 546 */ 547 PetscErrorCode PCSetUp_BDDC(PC pc) 548 { 549 PetscErrorCode ierr; 550 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 551 PC_IS *pcis = (PC_IS*)(pc->data); 552 553 PetscFunctionBegin; 554 if (!pc->setupcalled) { 555 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 556 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 557 Also, we decide to directly build the (same) Dirichlet problem */ 558 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 559 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 560 /* Set up all the "iterative substructuring" common block */ 561 ierr = PCISSetUp(pc);CHKERRQ(ierr); 562 /* Get stdout for dbg */ 563 if(pcbddc->dbg_flag) { 564 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 565 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 566 } 567 /* TODO MOVE CODE FRAGMENT */ 568 PetscInt im_active=0; 569 if(pcis->n) im_active = 1; 570 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 571 /* Analyze local interface */ 572 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 573 /* Set up local constraint matrix */ 574 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 575 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 576 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 577 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 578 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 579 } 580 PetscFunctionReturn(0); 581 } 582 583 /* -------------------------------------------------------------------------- */ 584 /* 585 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 586 587 Input Parameters: 588 . pc - the preconditioner context 589 . r - input vector (global) 590 591 Output Parameter: 592 . z - output vector (global) 593 594 Application Interface Routine: PCApply() 595 */ 596 #undef __FUNCT__ 597 #define __FUNCT__ "PCApply_BDDC" 598 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 599 { 600 PC_IS *pcis = (PC_IS*)(pc->data); 601 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 602 PetscErrorCode ierr; 603 const PetscScalar one = 1.0; 604 const PetscScalar m_one = -1.0; 605 const PetscScalar zero = 0.0; 606 607 /* This code is similar to that provided in nn.c for PCNN 608 NN interface preconditioner changed to BDDC 609 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 610 611 PetscFunctionBegin; 612 /* First Dirichlet solve */ 613 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 614 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 615 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 616 /* 617 Assembling right hand side for BDDC operator 618 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 619 - the interface part of the global vector z 620 */ 621 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 622 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 623 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 624 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 625 ierr = VecCopy(r,z);CHKERRQ(ierr); 626 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 627 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 628 629 /* Get Local boundary and apply partition of unity */ 630 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 631 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 632 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 633 634 /* Apply interface preconditioner 635 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 636 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 637 638 /* Apply partition of unity and sum boundary values */ 639 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 640 ierr = VecSet(z,zero);CHKERRQ(ierr); 641 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 642 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 643 644 /* Second Dirichlet solve and assembling of output */ 645 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 646 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 647 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 648 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 649 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 650 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 651 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 652 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 653 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 654 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 657 } 658 /* -------------------------------------------------------------------------- */ 659 #undef __FUNCT__ 660 #define __FUNCT__ "PCDestroy_BDDC" 661 PetscErrorCode PCDestroy_BDDC(PC pc) 662 { 663 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 664 PetscErrorCode ierr; 665 666 PetscFunctionBegin; 667 /* free data created by PCIS */ 668 ierr = PCISDestroy(pc);CHKERRQ(ierr); 669 /* free BDDC data */ 670 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 671 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 672 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 673 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 674 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 675 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 676 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 677 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 678 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 679 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 680 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 681 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 682 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 683 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 684 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 685 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 686 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 687 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 688 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 689 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 690 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 691 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 692 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 693 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 695 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 696 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 697 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 698 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 699 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 700 PetscInt i; 701 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 702 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 703 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 704 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 705 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 706 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 707 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 708 /* Free graph structure */ 709 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 710 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 711 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 712 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 713 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 714 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 715 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 716 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 717 /* Free the private data structure that was hanging off the PC */ 718 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 /* -------------------------------------------------------------------------- */ 723 /*MC 724 PCBDDC - Balancing Domain Decomposition by Constraints. 725 726 Options Database Keys: 727 . -pcbddc ??? - 728 729 Level: intermediate 730 731 Notes: The matrix used with this preconditioner must be of type MATIS 732 733 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 734 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 735 on the subdomains). 736 737 Options for the coarse grid preconditioner can be set with - 738 Options for the Dirichlet subproblem can be set with - 739 Options for the Neumann subproblem can be set with - 740 741 Contributed by Stefano Zampini 742 743 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 744 M*/ 745 EXTERN_C_BEGIN 746 #undef __FUNCT__ 747 #define __FUNCT__ "PCCreate_BDDC" 748 PetscErrorCode PCCreate_BDDC(PC pc) 749 { 750 PetscErrorCode ierr; 751 PC_BDDC *pcbddc; 752 PCBDDCGraph mat_graph; 753 754 PetscFunctionBegin; 755 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 756 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 757 pc->data = (void*)pcbddc; 758 759 /* create PCIS data structure */ 760 ierr = PCISCreate(pc);CHKERRQ(ierr); 761 762 /* BDDC specific */ 763 pcbddc->temp_solution = 0; 764 pcbddc->original_rhs = 0; 765 pcbddc->local_mat = 0; 766 pcbddc->ChangeOfBasisMatrix = 0; 767 pcbddc->usechangeofbasis = PETSC_TRUE; 768 pcbddc->usechangeonfaces = PETSC_FALSE; 769 pcbddc->coarse_vec = 0; 770 pcbddc->coarse_rhs = 0; 771 pcbddc->coarse_ksp = 0; 772 pcbddc->coarse_phi_B = 0; 773 pcbddc->coarse_phi_D = 0; 774 pcbddc->vec1_P = 0; 775 pcbddc->vec1_R = 0; 776 pcbddc->vec2_R = 0; 777 pcbddc->local_auxmat1 = 0; 778 pcbddc->local_auxmat2 = 0; 779 pcbddc->R_to_B = 0; 780 pcbddc->R_to_D = 0; 781 pcbddc->ksp_D = 0; 782 pcbddc->ksp_R = 0; 783 pcbddc->local_primal_indices = 0; 784 pcbddc->prec_type = PETSC_FALSE; 785 pcbddc->NeumannBoundaries = 0; 786 pcbddc->ISForDofs = 0; 787 pcbddc->ISForVertices = 0; 788 pcbddc->n_ISForFaces = 0; 789 pcbddc->n_ISForEdges = 0; 790 pcbddc->ConstraintMatrix = 0; 791 pcbddc->use_nnsp_true = PETSC_FALSE; 792 pcbddc->local_primal_sizes = 0; 793 pcbddc->local_primal_displacements = 0; 794 pcbddc->replicated_local_primal_indices = 0; 795 pcbddc->replicated_local_primal_values = 0; 796 pcbddc->coarse_loc_to_glob = 0; 797 pcbddc->dbg_flag = PETSC_FALSE; 798 pcbddc->coarsening_ratio = 8; 799 800 /* allocate and initialize needed graph structure */ 801 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 802 pcbddc->mat_graph->xadj = 0; 803 pcbddc->mat_graph->adjncy = 0; 804 805 /* function pointers */ 806 pc->ops->apply = PCApply_BDDC; 807 pc->ops->applytranspose = 0; 808 pc->ops->setup = PCSetUp_BDDC; 809 pc->ops->destroy = PCDestroy_BDDC; 810 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 811 pc->ops->view = 0; 812 pc->ops->applyrichardson = 0; 813 pc->ops->applysymmetricleft = 0; 814 pc->ops->applysymmetricright = 0; 815 pc->ops->presolve = PCPreSolve_BDDC; 816 pc->ops->postsolve = PCPostSolve_BDDC; 817 818 /* composing function */ 819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 820 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 822 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 823 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 824 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 826 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 828 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 829 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 830 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 832 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 833 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 834 PCPreSolve_BDDC);CHKERRQ(ierr); 835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 836 PCPostSolve_BDDC);CHKERRQ(ierr); 837 PetscFunctionReturn(0); 838 } 839 EXTERN_C_END 840 /* -------------------------------------------------------------------------- */ 841 /* All static functions from now on */ 842 /* -------------------------------------------------------------------------- */ 843 #undef __FUNCT__ 844 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 845 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 846 { 847 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 848 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 849 PetscInt nvtxs,*xadj,*adjncy; 850 Mat mat_adj; 851 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 852 PCBDDCGraph mat_graph=pcbddc->mat_graph; 853 PetscErrorCode ierr; 854 855 PetscFunctionBegin; 856 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 857 if(!mat_graph->xadj) { 858 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 859 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 860 if(!flg_row) { 861 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 862 } 863 /* Get adjacency into BDDC workspace */ 864 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 865 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 866 if(!flg_row) { 867 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 868 } 869 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 870 } 871 PetscFunctionReturn(0); 872 } 873 /* -------------------------------------------------------------------------- */ 874 #undef __FUNCT__ 875 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 876 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 877 { 878 PetscErrorCode ierr; 879 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 880 PC_IS* pcis = (PC_IS*) (pc->data); 881 const PetscScalar zero = 0.0; 882 883 PetscFunctionBegin; 884 /* Application of PHI^T */ 885 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 886 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 887 888 /* Scatter data of coarse_rhs */ 889 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 890 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 892 /* Local solution on R nodes */ 893 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 894 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 895 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 896 if(pcbddc->prec_type) { 897 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 898 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 899 } 900 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 901 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 902 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 if(pcbddc->prec_type) { 905 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 } 908 909 /* Coarse solution */ 910 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 911 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 912 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 913 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 914 915 /* Sum contributions from two levels */ 916 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 917 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 918 PetscFunctionReturn(0); 919 } 920 /* -------------------------------------------------------------------------- */ 921 #undef __FUNCT__ 922 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 923 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 924 { 925 PetscErrorCode ierr; 926 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 927 928 PetscFunctionBegin; 929 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 930 if(pcbddc->local_auxmat1) { 931 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 932 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 933 } 934 PetscFunctionReturn(0); 935 } 936 /* -------------------------------------------------------------------------- */ 937 #undef __FUNCT__ 938 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 939 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 940 { 941 PetscErrorCode ierr; 942 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 943 944 PetscFunctionBegin; 945 switch(pcbddc->coarse_communications_type){ 946 case SCATTERS_BDDC: 947 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 948 break; 949 case GATHERS_BDDC: 950 break; 951 } 952 PetscFunctionReturn(0); 953 } 954 /* -------------------------------------------------------------------------- */ 955 #undef __FUNCT__ 956 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 957 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 958 { 959 PetscErrorCode ierr; 960 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 961 PetscScalar* array_to; 962 PetscScalar* array_from; 963 MPI_Comm comm=((PetscObject)pc)->comm; 964 PetscInt i; 965 966 PetscFunctionBegin; 967 968 switch(pcbddc->coarse_communications_type){ 969 case SCATTERS_BDDC: 970 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 971 break; 972 case GATHERS_BDDC: 973 if(vec_from) VecGetArray(vec_from,&array_from); 974 if(vec_to) VecGetArray(vec_to,&array_to); 975 switch(pcbddc->coarse_problem_type){ 976 case SEQUENTIAL_BDDC: 977 if(smode == SCATTER_FORWARD) { 978 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 979 if(vec_to) { 980 for(i=0;i<pcbddc->replicated_primal_size;i++) 981 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 982 } 983 } else { 984 if(vec_from) 985 for(i=0;i<pcbddc->replicated_primal_size;i++) 986 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 987 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 988 } 989 break; 990 case REPLICATED_BDDC: 991 if(smode == SCATTER_FORWARD) { 992 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 993 for(i=0;i<pcbddc->replicated_primal_size;i++) 994 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 995 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 996 for(i=0;i<pcbddc->local_primal_size;i++) 997 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 998 } 999 break; 1000 case MULTILEVEL_BDDC: 1001 break; 1002 case PARALLEL_BDDC: 1003 break; 1004 } 1005 if(vec_from) VecRestoreArray(vec_from,&array_from); 1006 if(vec_to) VecRestoreArray(vec_to,&array_to); 1007 break; 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 /* -------------------------------------------------------------------------- */ 1012 #ifdef BDDC_USE_POD 1013 #if !defined(PETSC_MISSING_LAPACK_GESVD) 1014 #define PETSC_MISSING_LAPACK_GESVD 1 1015 #define UNDEF_PETSC_MISSING_LAPACK_GESVD 1 1016 #endif 1017 #endif 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1021 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1022 { 1023 PetscErrorCode ierr; 1024 PC_IS* pcis = (PC_IS*)(pc->data); 1025 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1026 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1027 PetscInt *nnz,*is_indices; 1028 PetscScalar *temp_quadrature_constraint; 1029 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1030 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1031 PetscInt n_constraints,n_vertices,size_of_constraint; 1032 PetscScalar quad_value; 1033 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1034 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1035 IS *used_IS; 1036 const MatType impMatType=MATSEQAIJ; 1037 PetscBLASInt Bs,Bt,lwork,lierr; 1038 PetscReal tol=1.0e-8; 1039 MatNullSpace nearnullsp; 1040 const Vec *nearnullvecs; 1041 Vec *localnearnullsp; 1042 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 1043 PetscReal *rwork,*singular_vals; 1044 PetscBLASInt Bone=1,*ipiv; 1045 Vec temp_vec; 1046 Mat temp_mat; 1047 KSP temp_ksp; 1048 PetscInt s,start_constraint,dual_dofs; 1049 PetscBool compute_submatrix,useksp=PETSC_FALSE; 1050 PetscInt *aux_primal_permutation,*aux_primal_numbering; 1051 PetscBool boolforface,*change_basis; 1052 /* some ugly conditional declarations */ 1053 #if defined(PETSC_MISSING_LAPACK_GESVD) 1054 PetscScalar dot_result; 1055 PetscScalar one=1.0,zero=0.0; 1056 PetscInt ii; 1057 #if defined(PETSC_USE_COMPLEX) 1058 PetscScalar val1,val2; 1059 #endif 1060 #else 1061 PetscBLASInt dummy_int; 1062 PetscScalar dummy_scalar; 1063 #endif 1064 1065 PetscFunctionBegin; 1066 /* check if near null space is attached to global mat */ 1067 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1068 if (nearnullsp) { 1069 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1070 } else { /* if near null space is not provided it uses constants */ 1071 nnsp_has_cnst = PETSC_TRUE; 1072 use_nnsp_true = PETSC_TRUE; 1073 } 1074 if(nnsp_has_cnst) { 1075 nnsp_addone = 1; 1076 } 1077 /* 1078 Evaluate maximum storage size needed by the procedure 1079 - temp_indices will contain start index of each constraint stored as follows 1080 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1081 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1082 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1083 */ 1084 1085 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 1086 total_counts *= (nnsp_addone+nnsp_size); 1087 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 1088 total_counts += n_vertices; 1089 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1090 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 1091 total_counts = 0; 1092 max_size_of_constraint = 0; 1093 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 1094 if(i<pcbddc->n_ISForEdges){ 1095 used_IS = &pcbddc->ISForEdges[i]; 1096 } else { 1097 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 1098 } 1099 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1100 total_counts += j; 1101 if(j>max_size_of_constraint) max_size_of_constraint=j; 1102 } 1103 total_counts *= (nnsp_addone+nnsp_size); 1104 total_counts += n_vertices; 1105 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1106 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1107 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1108 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1109 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1110 for(i=0;i<pcis->n;i++) { 1111 local_to_B[i]=-1; 1112 } 1113 for(i=0;i<pcis->n_B;i++) { 1114 local_to_B[is_indices[i]]=i; 1115 } 1116 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1117 1118 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 1119 rwork = 0; 1120 work = 0; 1121 singular_vals = 0; 1122 temp_basis = 0; 1123 correlation_mat = 0; 1124 if(!pcbddc->use_nnsp_true) { 1125 PetscScalar temp_work; 1126 #if defined(PETSC_MISSING_LAPACK_GESVD) 1127 /* POD */ 1128 PetscInt max_n; 1129 max_n = nnsp_addone+nnsp_size; 1130 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 1131 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1132 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1133 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_COMPLEX) 1135 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1136 #endif 1137 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1138 Bt = PetscBLASIntCast(max_n); 1139 lwork=-1; 1140 #if !defined(PETSC_USE_COMPLEX) 1141 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); 1142 #else 1143 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); 1144 #endif 1145 #else /* on missing GESVD */ 1146 /* SVD */ 1147 PetscInt max_n,min_n; 1148 max_n = max_size_of_constraint; 1149 min_n = nnsp_addone+nnsp_size; 1150 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 1151 min_n = max_size_of_constraint; 1152 max_n = nnsp_addone+nnsp_size; 1153 } 1154 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1155 #if defined(PETSC_USE_COMPLEX) 1156 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1157 #endif 1158 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1159 lwork=-1; 1160 Bs = PetscBLASIntCast(max_n); 1161 Bt = PetscBLASIntCast(min_n); 1162 dummy_int = Bs; 1163 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1164 #if !defined(PETSC_USE_COMPLEX) 1165 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 1166 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 1167 #else 1168 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 1169 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 1170 #endif 1171 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 1172 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1173 #endif 1174 /* Allocate optimal workspace */ 1175 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 1176 total_counts = (PetscInt)lwork; 1177 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1178 } 1179 /* get local part of global near null space vectors */ 1180 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1181 for(k=0;k<nnsp_size;k++) { 1182 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1183 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1184 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1185 } 1186 /* Now we can loop on constraining sets */ 1187 total_counts=0; 1188 temp_indices[0]=0; 1189 /* vertices */ 1190 PetscBool used_vertex; 1191 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1192 if(nnsp_has_cnst) { /* consider all vertices */ 1193 for(i=0;i<n_vertices;i++) { 1194 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1195 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1196 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1197 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1198 change_basis[total_counts]=PETSC_FALSE; 1199 total_counts++; 1200 } 1201 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1202 for(i=0;i<n_vertices;i++) { 1203 used_vertex=PETSC_FALSE; 1204 k=0; 1205 while(!used_vertex && k<nnsp_size) { 1206 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1207 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 1208 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1209 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1210 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1211 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1212 change_basis[total_counts]=PETSC_FALSE; 1213 total_counts++; 1214 used_vertex=PETSC_TRUE; 1215 } 1216 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1217 k++; 1218 } 1219 } 1220 } 1221 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1222 n_vertices=total_counts; 1223 /* edges and faces */ 1224 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 1225 if(i<pcbddc->n_ISForEdges){ 1226 used_IS = &pcbddc->ISForEdges[i]; 1227 boolforface = pcbddc->usechangeofbasis; 1228 } else { 1229 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 1230 boolforface = pcbddc->usechangeonfaces; 1231 } 1232 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1233 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1234 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1235 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1236 if(nnsp_has_cnst) { 1237 temp_constraints++; 1238 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1239 for(j=0;j<size_of_constraint;j++) { 1240 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1241 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1242 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1243 } 1244 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1245 change_basis[total_counts]=boolforface; 1246 total_counts++; 1247 } 1248 for(k=0;k<nnsp_size;k++) { 1249 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1250 for(j=0;j<size_of_constraint;j++) { 1251 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1252 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1253 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 1254 } 1255 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 1256 quad_value = 1.0; 1257 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 1258 Bs = PetscBLASIntCast(size_of_constraint); 1259 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 1260 } 1261 if ( quad_value > 0.0 ) { /* keep indices and values */ 1262 temp_constraints++; 1263 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1264 change_basis[total_counts]=boolforface; 1265 total_counts++; 1266 } 1267 } 1268 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1269 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 1270 if(!use_nnsp_true) { 1271 1272 Bs = PetscBLASIntCast(size_of_constraint); 1273 Bt = PetscBLASIntCast(temp_constraints); 1274 1275 #if defined(PETSC_MISSING_LAPACK_GESVD) 1276 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 1277 /* Store upper triangular part of correlation matrix */ 1278 for(j=0;j<temp_constraints;j++) { 1279 for(k=0;k<j+1;k++) { 1280 #if defined(PETSC_USE_COMPLEX) 1281 /* hand made complex dot product */ 1282 dot_result = 0.0; 1283 for (ii=0; ii<size_of_constraint; ii++) { 1284 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 1285 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 1286 dot_result += val1*PetscConj(val2); 1287 } 1288 #else 1289 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 1290 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 1291 #endif 1292 correlation_mat[j*temp_constraints+k]=dot_result; 1293 } 1294 } 1295 #if !defined(PETSC_USE_COMPLEX) 1296 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); 1297 #else 1298 LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,rwork,&lierr); 1299 #endif 1300 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in EV Lapack routine %d",(int)lierr); 1301 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 1302 j=0; 1303 while( j < Bt && singular_vals[j] < tol) j++; 1304 total_counts=total_counts-j; 1305 if(j<temp_constraints) { 1306 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 1307 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 1308 /* copy POD basis into used quadrature memory */ 1309 for(k=0;k<Bt-j;k++) { 1310 for(ii=0;ii<size_of_constraint;ii++) { 1311 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 1312 } 1313 } 1314 } 1315 1316 #else /* on missing GESVD */ 1317 1318 PetscInt min_n = temp_constraints; 1319 if(min_n > size_of_constraint) min_n = size_of_constraint; 1320 dummy_int = Bs; 1321 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1322 #if !defined(PETSC_USE_COMPLEX) 1323 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1324 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 1325 #else 1326 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 1327 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 1328 #endif 1329 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 1330 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1331 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 1332 j=0; 1333 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 1334 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 1335 #endif 1336 } 1337 } 1338 1339 n_constraints=total_counts-n_vertices; 1340 local_primal_size = total_counts; 1341 /* set quantities in pcbddc data structure */ 1342 pcbddc->n_vertices = n_vertices; 1343 pcbddc->n_constraints = n_constraints; 1344 pcbddc->local_primal_size = local_primal_size; 1345 1346 /* Create constraint matrix */ 1347 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1348 /* so we need to set it up properly either with or without change of basis */ 1349 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1350 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1351 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 1352 /* compute a local numbering of constraints : vertices first then constraints */ 1353 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1354 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 1355 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1356 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 1357 total_counts=0; 1358 /* find vertices: subdomain corners plus dofs with basis changed */ 1359 for(i=0;i<local_primal_size;i++) { 1360 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1361 if(change_basis[i] || size_of_constraint == 1) { 1362 k=0; 1363 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 1364 k=k+1; 1365 } 1366 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 1367 array_vector[j] = 1.0; 1368 aux_primal_numbering[total_counts]=j; 1369 aux_primal_permutation[total_counts]=total_counts; 1370 total_counts++; 1371 } 1372 } 1373 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 1374 /* permute indices in order to have a sorted set of vertices */ 1375 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 1376 /* nonzero structure */ 1377 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1378 for(i=0;i<total_counts;i++) { 1379 nnz[i]=1; 1380 } 1381 j=total_counts; 1382 for(i=n_vertices;i<local_primal_size;i++) { 1383 if(!change_basis[i]) { 1384 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1385 j++; 1386 } 1387 } 1388 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1389 ierr = PetscFree(nnz);CHKERRQ(ierr); 1390 /* set values in constraint matrix */ 1391 for(i=0;i<total_counts;i++) { 1392 j = aux_primal_permutation[i]; 1393 k = aux_primal_numbering[j]; 1394 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 1395 } 1396 for(i=n_vertices;i<local_primal_size;i++) { 1397 if(!change_basis[i]) { 1398 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1399 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1400 total_counts++; 1401 } 1402 } 1403 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1404 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 1405 /* assembling */ 1406 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1408 1409 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 1410 if(pcbddc->usechangeofbasis) { 1411 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1412 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1413 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1414 /* work arrays */ 1415 /* we need to reuse these arrays, so we free them */ 1416 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1417 ierr = PetscFree(work);CHKERRQ(ierr); 1418 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1419 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1420 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1421 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 1422 for(i=0;i<pcis->n_B;i++) { 1423 nnz[i]=1; 1424 } 1425 /* Overestimated nonzeros per row */ 1426 k=1; 1427 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 1428 if(change_basis[i]) { 1429 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1430 if(k < size_of_constraint) { 1431 k = size_of_constraint; 1432 } 1433 for(j=0;j<size_of_constraint;j++) { 1434 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1435 } 1436 } 1437 } 1438 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1439 ierr = PetscFree(nnz);CHKERRQ(ierr); 1440 /* Temporary array to store indices */ 1441 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 1442 /* Set initial identity in the matrix */ 1443 for(i=0;i<pcis->n_B;i++) { 1444 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1445 } 1446 /* Now we loop on the constraints which need a change of basis */ 1447 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 1448 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 1449 temp_constraints = 0; 1450 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 1451 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 1452 if(change_basis[i]) { 1453 compute_submatrix = PETSC_FALSE; 1454 useksp = PETSC_FALSE; 1455 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 1456 temp_constraints++; 1457 if(temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 1458 compute_submatrix = PETSC_TRUE; 1459 } 1460 } 1461 if(compute_submatrix) { 1462 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 1463 useksp = PETSC_TRUE; 1464 } 1465 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1466 if(useksp) { /* experimental */ 1467 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 1468 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 1469 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 1470 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 1471 } 1472 /* First _size_of_constraint-temp_constraints_ columns */ 1473 dual_dofs = size_of_constraint-temp_constraints; 1474 start_constraint = i+1-temp_constraints; 1475 for(s=0;s<dual_dofs;s++) { 1476 is_indices[0] = s; 1477 for(j=0;j<temp_constraints;j++) { 1478 for(k=0;k<temp_constraints;k++) { 1479 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 1480 } 1481 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 1482 is_indices[j+1]=s+j+1; 1483 } 1484 Bt = temp_constraints; 1485 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1486 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 1487 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 1488 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1489 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 1490 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 1491 if(useksp) { 1492 /* temp mat with transposed rows and columns */ 1493 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 1494 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 1495 } 1496 } 1497 if(useksp) { 1498 /* last rows of temp_mat */ 1499 for(j=0;j<size_of_constraint;j++) { 1500 is_indices[j] = j; 1501 } 1502 for(s=0;s<temp_constraints;s++) { 1503 k = s + dual_dofs; 1504 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 1505 } 1506 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1507 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1508 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 1509 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 1510 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 1511 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 1512 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 1513 for(s=0;s<temp_constraints;s++) { 1514 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 1515 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 1516 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 1517 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 1518 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 1519 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 1520 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 1521 /* last columns of change of basis matrix associated to new primal dofs */ 1522 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 1523 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 1524 } 1525 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 1526 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 1527 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1528 } else { 1529 /* last columns of change of basis matrix associated to new primal dofs */ 1530 for(s=0;s<temp_constraints;s++) { 1531 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 1532 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 1533 } 1534 } 1535 /* prepare for the next cycle */ 1536 temp_constraints = 0; 1537 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 1538 } 1539 } 1540 } 1541 /* assembling */ 1542 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1543 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1544 ierr = PetscFree(ipiv);CHKERRQ(ierr); 1545 ierr = PetscFree(is_indices);CHKERRQ(ierr); 1546 } 1547 /* free workspace no longer needed */ 1548 ierr = PetscFree(rwork);CHKERRQ(ierr); 1549 ierr = PetscFree(work);CHKERRQ(ierr); 1550 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1551 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1552 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1553 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1554 ierr = PetscFree(change_basis);CHKERRQ(ierr); 1555 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1556 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1557 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1558 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1559 for(k=0;k<nnsp_size;k++) { 1560 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1561 } 1562 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 #ifdef UNDEF_PETSC_MISSING_LAPACK_GESVD 1566 #undef PETSC_MISSING_LAPACK_GESVD 1567 #endif 1568 /* -------------------------------------------------------------------------- */ 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "PCBDDCCoarseSetUp" 1571 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 1572 { 1573 PetscErrorCode ierr; 1574 1575 PC_IS* pcis = (PC_IS*)(pc->data); 1576 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1577 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1578 Mat change_mat_all; 1579 IS is_R_local; 1580 IS is_V_local; 1581 IS is_C_local; 1582 IS is_aux1; 1583 IS is_aux2; 1584 const VecType impVecType; 1585 const MatType impMatType; 1586 PetscInt n_R=0; 1587 PetscInt n_D=0; 1588 PetscInt n_B=0; 1589 PetscScalar zero=0.0; 1590 PetscScalar one=1.0; 1591 PetscScalar m_one=-1.0; 1592 PetscScalar* array; 1593 PetscScalar *coarse_submat_vals; 1594 PetscInt *idx_R_local; 1595 PetscInt *idx_V_B; 1596 PetscScalar *coarsefunctions_errors; 1597 PetscScalar *constraints_errors; 1598 /* auxiliary indices */ 1599 PetscInt i,j,k; 1600 /* for verbose output of bddc */ 1601 PetscViewer viewer=pcbddc->dbg_viewer; 1602 PetscBool dbg_flag=pcbddc->dbg_flag; 1603 /* for counting coarse dofs */ 1604 PetscInt n_vertices,n_constraints; 1605 PetscInt size_of_constraint; 1606 PetscInt *row_cmat_indices; 1607 PetscScalar *row_cmat_values; 1608 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 1609 1610 PetscFunctionBegin; 1611 /* Set Non-overlapping dimensions */ 1612 n_B = pcis->n_B; n_D = pcis->n - n_B; 1613 /* Set types for local objects needed by BDDC precondtioner */ 1614 impMatType = MATSEQDENSE; 1615 impVecType = VECSEQ; 1616 /* get vertex indices from constraint matrix */ 1617 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 1618 n_vertices=0; 1619 for(i=0;i<pcbddc->local_primal_size;i++) { 1620 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1621 if(size_of_constraint == 1) { 1622 vertices[n_vertices]=row_cmat_indices[0]; 1623 n_vertices++; 1624 } 1625 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 1626 } 1627 /* Set number of constraints */ 1628 n_constraints = pcbddc->local_primal_size-n_vertices; 1629 1630 /* vertices in boundary numbering */ 1631 if(n_vertices) { 1632 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 1633 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1634 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 1635 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1636 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1638 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 1639 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1640 for (i=0; i<n_vertices; i++) { 1641 j=0; 1642 while (array[j] != i ) {j++;} 1643 idx_V_B[i]=j; 1644 } 1645 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1646 } 1647 1648 /* transform local matrices if needed */ 1649 if(pcbddc->usechangeofbasis) { 1650 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1651 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1652 for(i=0;i<n_D;i++) { 1653 nnz[is_indices[i]]=1; 1654 } 1655 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1656 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1657 k=1; 1658 for(i=0;i<n_B;i++) { 1659 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 1660 nnz[is_indices[i]]=j; 1661 if( k < j) { 1662 k = j; 1663 } 1664 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 1665 } 1666 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1667 /* assemble change of basis matrix on the whole set of local dofs */ 1668 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1669 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 1670 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 1671 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 1672 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 1673 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1674 for(i=0;i<n_D;i++) { 1675 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1676 } 1677 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1678 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1679 for(i=0;i<n_B;i++) { 1680 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1681 for(k=0;k<j;k++) { 1682 temp_indices[k]=is_indices[row_cmat_indices[k]]; 1683 } 1684 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 1685 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1686 } 1687 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1688 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1689 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 1690 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1691 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1692 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 1693 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 1694 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 1695 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 1696 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 1697 ierr = PetscFree(nnz);CHKERRQ(ierr); 1698 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1699 } else { 1700 /* without change of basis, the local matrix is unchanged */ 1701 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 1702 pcbddc->local_mat = matis->A; 1703 } 1704 1705 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1706 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 1707 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1708 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 1709 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 1710 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 1711 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1712 if(dbg_flag) { 1713 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1714 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1715 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1716 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1717 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 1718 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1719 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1720 } 1721 1722 /* Allocate needed vectors */ 1723 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 1724 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 1725 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 1726 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 1727 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 1728 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1729 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1730 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 1731 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 1732 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1733 1734 /* Creating some index sets needed */ 1735 /* For submatrices */ 1736 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 1737 if(n_vertices) { 1738 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 1739 } 1740 if(n_constraints) { 1741 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 1742 } 1743 1744 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1745 { 1746 PetscInt *aux_array1; 1747 PetscInt *aux_array2; 1748 1749 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1750 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 1751 1752 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 1753 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1754 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1755 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1756 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1757 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1758 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1759 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1760 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 1761 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1762 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1763 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1764 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 1765 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 1766 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 1767 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1768 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1769 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 1770 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1771 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1772 1773 if(pcbddc->prec_type || dbg_flag ) { 1774 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 1775 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1776 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 1777 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1778 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 1779 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1780 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 1781 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1782 } 1783 } 1784 1785 /* Creating PC contexts for local Dirichlet and Neumann problems */ 1786 { 1787 Mat A_RR; 1788 PC pc_temp; 1789 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 1790 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1791 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1792 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 1793 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1794 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 1795 /* default */ 1796 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1797 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1798 /* Allow user's customization */ 1799 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1800 /* Set Up KSP for Dirichlet problem of BDDC */ 1801 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1802 /* set ksp_D into pcis data */ 1803 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1804 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1805 pcis->ksp_D = pcbddc->ksp_D; 1806 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_D,PETSC_VIEWER_STDOUT_SELF); 1807 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1808 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1809 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1810 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1811 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 1812 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1813 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 1814 /* default */ 1815 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1816 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1817 /* Allow user's customization */ 1818 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1819 /* Set Up KSP for Neumann problem of BDDC */ 1820 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1821 if(pcbddc->dbg_flag) ierr = KSPView(pcbddc->ksp_R,PETSC_VIEWER_STDOUT_SELF); 1822 /* check Dirichlet and Neumann solvers */ 1823 if(pcbddc->dbg_flag) { 1824 Vec temp_vec; 1825 PetscScalar value; 1826 1827 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 1828 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 1829 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1830 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 1831 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 1832 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1833 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1834 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1837 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1838 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 1839 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 1840 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1841 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 1842 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 1843 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 1844 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 1845 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 1846 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1847 } 1848 /* free Neumann problem's matrix */ 1849 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1850 } 1851 1852 /* Assemble all remaining stuff needed to apply BDDC */ 1853 { 1854 Mat A_RV,A_VR,A_VV; 1855 Mat M1,M2; 1856 Mat C_CR; 1857 Mat AUXMAT; 1858 Vec vec1_C; 1859 Vec vec2_C; 1860 Vec vec1_V; 1861 Vec vec2_V; 1862 PetscInt *nnz; 1863 PetscInt *auxindices; 1864 PetscInt index; 1865 PetscScalar* array2; 1866 MatFactorInfo matinfo; 1867 1868 /* Allocating some extra storage just to be safe */ 1869 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1870 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 1871 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 1872 1873 /* some work vectors on vertices and/or constraints */ 1874 if(n_vertices) { 1875 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 1876 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 1877 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 1878 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 1879 } 1880 if(n_constraints) { 1881 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 1882 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 1883 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 1884 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 1885 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 1886 } 1887 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1888 if(n_constraints) { 1889 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1890 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 1891 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 1892 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 1893 1894 /* Create Constraint matrix on R nodes: C_{CR} */ 1895 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1896 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 1897 1898 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 1899 for(i=0;i<n_constraints;i++) { 1900 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1901 /* Get row of constraint matrix in R numbering */ 1902 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 1903 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1904 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 1905 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 1906 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 1907 /* Solve for row of constraint matrix in R numbering */ 1908 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1909 /* Set values */ 1910 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1911 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1912 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1913 } 1914 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1915 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1916 1917 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1918 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 1919 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 1920 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 1921 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 1922 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1923 1924 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 1925 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 1926 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 1927 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 1928 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 1929 for(i=0;i<n_constraints;i++) { 1930 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 1931 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 1932 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 1933 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 1934 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 1935 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 1936 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 1937 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1938 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 1939 } 1940 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1941 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1942 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1943 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 1944 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1945 1946 } 1947 1948 /* Get submatrices from subdomain matrix */ 1949 if(n_vertices){ 1950 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1951 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1952 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1953 /* Assemble M2 = A_RR^{-1}A_RV */ 1954 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 1955 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 1956 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 1957 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 1958 for(i=0;i<n_vertices;i++) { 1959 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1960 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1961 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1962 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1963 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 1964 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1965 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1966 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 1967 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 1968 } 1969 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1970 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1971 } 1972 1973 /* Matrix of coarse basis functions (local) */ 1974 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1975 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 1976 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 1977 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 1978 if(pcbddc->prec_type || dbg_flag ) { 1979 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1980 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 1981 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 1982 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 1983 } 1984 1985 if(dbg_flag) { 1986 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 1987 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 1988 } 1989 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1990 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 1991 1992 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1993 for(i=0;i<n_vertices;i++){ 1994 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 1995 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 1996 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 1997 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 1998 /* solution of saddle point problem */ 1999 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2000 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2001 if(n_constraints) { 2002 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2003 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2004 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2005 } 2006 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 2007 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 2008 2009 /* Set values in coarse basis function and subdomain part of coarse_mat */ 2010 /* coarse basis functions */ 2011 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2012 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2013 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2014 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2015 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2016 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2017 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 2018 if( pcbddc->prec_type || dbg_flag ) { 2019 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2020 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2021 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2022 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2023 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2024 } 2025 /* subdomain contribution to coarse matrix */ 2026 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2027 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 2028 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2029 if(n_constraints) { 2030 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2031 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 2032 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2033 } 2034 2035 if( dbg_flag ) { 2036 /* assemble subdomain vector on nodes */ 2037 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2038 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2039 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2040 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 2041 array[ vertices[i] ] = one; 2042 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2043 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2044 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 2045 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 2046 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2047 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2048 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 2049 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2050 if(n_constraints) { 2051 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2052 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 2053 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2054 } 2055 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2056 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 2057 /* check saddle point solution */ 2058 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2059 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 2060 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 2061 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 2062 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2063 array[i]=array[i]+m_one; /* shift by the identity matrix */ 2064 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2065 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 2066 } 2067 } 2068 2069 for(i=0;i<n_constraints;i++){ 2070 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 2071 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 2072 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 2073 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 2074 /* solution of saddle point problem */ 2075 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 2076 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 2077 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2078 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 2079 /* Set values in coarse basis function and subdomain part of coarse_mat */ 2080 /* coarse basis functions */ 2081 index=i+n_vertices; 2082 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 2083 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2085 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2086 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 2087 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2088 if( pcbddc->prec_type || dbg_flag ) { 2089 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2091 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2092 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 2093 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 2094 } 2095 /* subdomain contribution to coarse matrix */ 2096 if(n_vertices) { 2097 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2098 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 2099 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2100 } 2101 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2102 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 2103 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2104 2105 if( dbg_flag ) { 2106 /* assemble subdomain vector on nodes */ 2107 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2108 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2109 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2110 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 2111 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 2112 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2113 /* assemble subdomain vector of lagrange multipliers */ 2114 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 2115 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2116 if( n_vertices) { 2117 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 2118 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 2119 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 2120 } 2121 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 2122 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 2123 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 2124 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 2125 /* check saddle point solution */ 2126 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2127 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 2128 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 2129 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 2130 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2131 array[index]=array[index]+m_one; /* shift by the identity matrix */ 2132 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2133 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 2134 } 2135 } 2136 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2137 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2138 if( pcbddc->prec_type || dbg_flag ) { 2139 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2140 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2141 } 2142 /* Checking coarse_sub_mat and coarse basis functios */ 2143 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 2144 if(dbg_flag) { 2145 2146 Mat coarse_sub_mat; 2147 Mat TM1,TM2,TM3,TM4; 2148 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 2149 const MatType checkmattype=MATSEQAIJ; 2150 PetscScalar value; 2151 2152 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 2153 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 2154 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 2155 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 2156 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 2157 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 2158 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 2159 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 2160 2161 /*PetscViewer view_out; 2162 PetscMPIInt myrank; 2163 char filename[256]; 2164 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 2165 sprintf(filename,"coarsesubmat_%04d.m",myrank); 2166 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 2167 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2168 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 2169 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 2170 2171 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2172 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 2173 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2174 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 2175 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 2176 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2177 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 2178 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2179 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 2180 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 2181 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2182 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2183 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2184 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2185 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 2186 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 2187 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 2188 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 2189 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 2190 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 2191 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 2192 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 2193 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 2194 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2195 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 2196 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 2197 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 2198 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 2199 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 2200 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 2201 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 2202 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 2203 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 2204 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 2205 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 2206 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 2207 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 2208 } 2209 2210 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 2211 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 2212 /* free memory */ 2213 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2214 ierr = PetscFree(auxindices);CHKERRQ(ierr); 2215 ierr = PetscFree(nnz);CHKERRQ(ierr); 2216 if(n_vertices) { 2217 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 2218 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 2219 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2220 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2221 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 2222 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 2223 } 2224 if(n_constraints) { 2225 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 2226 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 2227 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2228 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 2229 } 2230 } 2231 /* free memory */ 2232 if(n_vertices) { 2233 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 2234 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 2235 } 2236 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 2237 2238 PetscFunctionReturn(0); 2239 } 2240 2241 /* -------------------------------------------------------------------------- */ 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 2245 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 2246 { 2247 2248 2249 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2250 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2251 PC_IS *pcis = (PC_IS*)pc->data; 2252 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 2253 MPI_Comm coarse_comm; 2254 2255 /* common to all choiches */ 2256 PetscScalar *temp_coarse_mat_vals; 2257 PetscScalar *ins_coarse_mat_vals; 2258 PetscInt *ins_local_primal_indices; 2259 PetscMPIInt *localsizes2,*localdispl2; 2260 PetscMPIInt size_prec_comm; 2261 PetscMPIInt rank_prec_comm; 2262 PetscMPIInt active_rank=MPI_PROC_NULL; 2263 PetscMPIInt master_proc=0; 2264 PetscInt ins_local_primal_size; 2265 /* specific to MULTILEVEL_BDDC */ 2266 PetscMPIInt *ranks_recv; 2267 PetscMPIInt count_recv=0; 2268 PetscMPIInt rank_coarse_proc_send_to; 2269 PetscMPIInt coarse_color = MPI_UNDEFINED; 2270 ISLocalToGlobalMapping coarse_ISLG; 2271 /* some other variables */ 2272 PetscErrorCode ierr; 2273 const MatType coarse_mat_type; 2274 const PCType coarse_pc_type; 2275 const KSPType coarse_ksp_type; 2276 PC pc_temp; 2277 PetscInt i,j,k,bs; 2278 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 2279 /* verbose output viewer */ 2280 PetscViewer viewer=pcbddc->dbg_viewer; 2281 PetscBool dbg_flag=pcbddc->dbg_flag; 2282 2283 PetscFunctionBegin; 2284 2285 ins_local_primal_indices = 0; 2286 ins_coarse_mat_vals = 0; 2287 localsizes2 = 0; 2288 localdispl2 = 0; 2289 temp_coarse_mat_vals = 0; 2290 coarse_ISLG = 0; 2291 2292 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 2293 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 2294 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2295 2296 /* Assign global numbering to coarse dofs */ 2297 { 2298 PetscScalar one=1.,zero=0.; 2299 PetscScalar *array; 2300 PetscMPIInt *auxlocal_primal; 2301 PetscMPIInt *auxglobal_primal; 2302 PetscMPIInt *all_auxglobal_primal; 2303 PetscMPIInt *all_auxglobal_primal_dummy; 2304 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 2305 PetscInt *row_cmat_indices; 2306 PetscInt size_of_constraint; 2307 PetscScalar coarsesum; 2308 2309 /* Construct needed data structures for message passing */ 2310 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 2311 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 2312 ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 2313 /* Gather local_primal_size information for all processes */ 2314 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 2315 pcbddc->replicated_primal_size = 0; 2316 for (i=0; i<size_prec_comm; i++) { 2317 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 2318 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 2319 } 2320 if(rank_prec_comm == 0) { 2321 /* allocate some auxiliary space */ 2322 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 2323 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal_dummy),&all_auxglobal_primal_dummy);CHKERRQ(ierr); 2324 } 2325 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 2326 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 2327 2328 /* First let's count coarse dofs. 2329 This code fragment assumes that the number of local constraints per connected component 2330 is not greater than the number of nodes defined for the connected component 2331 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2332 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 2333 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 2334 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2335 for(i=0;i<pcbddc->local_primal_size;i++) { 2336 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2337 for (j=0; j<size_of_constraint; j++) { 2338 k = row_cmat_indices[j]; 2339 if( array[k] == zero ) { 2340 array[k] = one; 2341 auxlocal_primal[i] = k; 2342 break; 2343 } 2344 } 2345 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2346 } 2347 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2348 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2349 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2350 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2351 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2352 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2353 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2354 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 2355 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2356 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2357 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2358 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2359 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 2360 pcbddc->coarse_size = (PetscInt) coarsesum; 2361 2362 /* Now assign them a global numbering */ 2363 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 2364 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 2365 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 2366 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2367 2368 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 2369 /* It implements a function similar to PetscSortRemoveDupsInt */ 2370 if(rank_prec_comm==0) { 2371 /* dummy argument since PetscSortMPIInt doesn't exist! */ 2372 ierr = PetscSortMPIIntWithArray(pcbddc->replicated_primal_size,all_auxglobal_primal,all_auxglobal_primal_dummy);CHKERRQ(ierr); 2373 k=1; 2374 j=all_auxglobal_primal[0]; /* first dof in global numbering */ 2375 for(i=1;i< pcbddc->replicated_primal_size ;i++) { 2376 if(j != all_auxglobal_primal[i] ) { 2377 all_auxglobal_primal[k]=all_auxglobal_primal[i]; 2378 k++; 2379 j=all_auxglobal_primal[i]; 2380 } 2381 } 2382 } else { 2383 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 2384 } 2385 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 2386 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 2387 2388 /* Now get global coarse numbering of local primal nodes */ 2389 for(i=0;i<pcbddc->local_primal_size;i++) { 2390 k=0; 2391 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 2392 pcbddc->local_primal_indices[i]=k; 2393 } 2394 if(dbg_flag) { 2395 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2396 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 2397 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2398 } 2399 /* free allocated memory */ 2400 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 2401 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 2402 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 2403 if(rank_prec_comm == 0) { 2404 ierr = PetscFree(all_auxglobal_primal_dummy);CHKERRQ(ierr); 2405 } 2406 } 2407 2408 /* adapt coarse problem type */ 2409 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 2410 pcbddc->coarse_problem_type = PARALLEL_BDDC; 2411 2412 switch(pcbddc->coarse_problem_type){ 2413 2414 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 2415 { 2416 /* we need additional variables */ 2417 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 2418 MetisInt *metis_coarse_subdivision; 2419 MetisInt options[METIS_NOPTIONS]; 2420 PetscMPIInt size_coarse_comm,rank_coarse_comm; 2421 PetscMPIInt procs_jumps_coarse_comm; 2422 PetscMPIInt *coarse_subdivision; 2423 PetscMPIInt *total_count_recv; 2424 PetscMPIInt *total_ranks_recv; 2425 PetscMPIInt *displacements_recv; 2426 PetscMPIInt *my_faces_connectivity; 2427 PetscMPIInt *petsc_faces_adjncy; 2428 MetisInt *faces_adjncy; 2429 MetisInt *faces_xadj; 2430 PetscMPIInt *number_of_faces; 2431 PetscMPIInt *faces_displacements; 2432 PetscInt *array_int; 2433 PetscMPIInt my_faces=0; 2434 PetscMPIInt total_faces=0; 2435 PetscInt ranks_stretching_ratio; 2436 2437 /* define some quantities */ 2438 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2439 coarse_mat_type = MATIS; 2440 coarse_pc_type = PCBDDC; 2441 coarse_ksp_type = KSPCHEBYSHEV; 2442 2443 /* details of coarse decomposition */ 2444 n_subdomains = pcbddc->active_procs; 2445 n_parts = n_subdomains/pcbddc->coarsening_ratio; 2446 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 2447 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 2448 2449 /*printf("Coarse algorithm details: \n"); 2450 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 2451 2452 /* build CSR graph of subdomains' connectivity through faces */ 2453 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 2454 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 2455 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2456 for(j=0;j<pcis->n_shared[i];j++){ 2457 array_int[ pcis->shared[i][j] ]+=1; 2458 } 2459 } 2460 for(i=1;i<pcis->n_neigh;i++){ 2461 for(j=0;j<pcis->n_shared[i];j++){ 2462 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2463 my_faces++; 2464 break; 2465 } 2466 } 2467 } 2468 2469 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 2470 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 2471 my_faces=0; 2472 for(i=1;i<pcis->n_neigh;i++){ 2473 for(j=0;j<pcis->n_shared[i];j++){ 2474 if(array_int[ pcis->shared[i][j] ] == 1 ){ 2475 my_faces_connectivity[my_faces]=pcis->neigh[i]; 2476 my_faces++; 2477 break; 2478 } 2479 } 2480 } 2481 if(rank_prec_comm == master_proc) { 2482 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 2483 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 2484 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 2485 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 2486 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 2487 } 2488 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2489 if(rank_prec_comm == master_proc) { 2490 faces_xadj[0]=0; 2491 faces_displacements[0]=0; 2492 j=0; 2493 for(i=1;i<size_prec_comm+1;i++) { 2494 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 2495 if(number_of_faces[i-1]) { 2496 j++; 2497 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 2498 } 2499 } 2500 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 2501 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 2502 } 2503 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2504 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 2505 ierr = PetscFree(array_int);CHKERRQ(ierr); 2506 if(rank_prec_comm == master_proc) { 2507 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 2508 /*printf("This is the face connectivity (actual ranks)\n"); 2509 for(i=0;i<n_subdomains;i++){ 2510 printf("proc %d is connected with \n",i); 2511 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 2512 printf("%d ",faces_adjncy[j]); 2513 printf("\n"); 2514 }*/ 2515 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 2516 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 2517 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 2518 } 2519 2520 if( rank_prec_comm == master_proc ) { 2521 2522 PetscInt heuristic_for_metis=3; 2523 2524 ncon=1; 2525 faces_nvtxs=n_subdomains; 2526 /* partition graoh induced by face connectivity */ 2527 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 2528 ierr = METIS_SetDefaultOptions(options); 2529 /* we need a contiguous partition of the coarse mesh */ 2530 options[METIS_OPTION_CONTIG]=1; 2531 options[METIS_OPTION_DBGLVL]=1; 2532 options[METIS_OPTION_NITER]=30; 2533 if(n_subdomains>n_parts*heuristic_for_metis) { 2534 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 2535 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 2536 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2537 } else { 2538 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 2539 } 2540 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 2541 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 2542 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 2543 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 2544 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 2545 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 2546 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 2547 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 2548 } 2549 2550 /* Create new communicator for coarse problem splitting the old one */ 2551 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 2552 coarse_color=0; /* for communicator splitting */ 2553 active_rank=rank_prec_comm; /* for insertion of matrix values */ 2554 } 2555 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 2556 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 2557 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 2558 2559 if( coarse_color == 0 ) { 2560 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 2561 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 2562 /*printf("Details of coarse comm\n"); 2563 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 2564 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 2565 } else { 2566 rank_coarse_comm = MPI_PROC_NULL; 2567 } 2568 2569 /* master proc take care of arranging and distributing coarse informations */ 2570 if(rank_coarse_comm == master_proc) { 2571 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 2572 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 2573 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 2574 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 2575 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 2576 /* some initializations */ 2577 displacements_recv[0]=0; 2578 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 2579 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 2580 for(j=0;j<size_coarse_comm;j++) 2581 for(i=0;i<size_prec_comm;i++) 2582 if(coarse_subdivision[i]==j) 2583 total_count_recv[j]++; 2584 /* displacements needed for scatterv of total_ranks_recv */ 2585 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 2586 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 2587 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 2588 for(j=0;j<size_coarse_comm;j++) { 2589 for(i=0;i<size_prec_comm;i++) { 2590 if(coarse_subdivision[i]==j) { 2591 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 2592 total_count_recv[j]+=1; 2593 } 2594 } 2595 } 2596 /*for(j=0;j<size_coarse_comm;j++) { 2597 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 2598 for(i=0;i<total_count_recv[j];i++) { 2599 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 2600 } 2601 printf("\n"); 2602 }*/ 2603 2604 /* identify new decomposition in terms of ranks in the old communicator */ 2605 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 2606 /*printf("coarse_subdivision in old end new ranks\n"); 2607 for(i=0;i<size_prec_comm;i++) 2608 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 2609 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 2610 } else { 2611 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 2612 } 2613 printf("\n");*/ 2614 } 2615 2616 /* Scatter new decomposition for send details */ 2617 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2618 /* Scatter receiving details to members of coarse decomposition */ 2619 if( coarse_color == 0) { 2620 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2621 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 2622 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 2623 } 2624 2625 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 2626 if(coarse_color == 0) { 2627 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 2628 for(i=0;i<count_recv;i++) 2629 printf("%d ",ranks_recv[i]); 2630 printf("\n"); 2631 }*/ 2632 2633 if(rank_prec_comm == master_proc) { 2634 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 2635 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 2636 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 2637 free(coarse_subdivision); 2638 free(total_count_recv); 2639 free(total_ranks_recv); 2640 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 2641 } 2642 break; 2643 } 2644 2645 case(REPLICATED_BDDC): 2646 2647 pcbddc->coarse_communications_type = GATHERS_BDDC; 2648 coarse_mat_type = MATSEQAIJ; 2649 coarse_pc_type = PCLU; 2650 coarse_ksp_type = KSPPREONLY; 2651 coarse_comm = PETSC_COMM_SELF; 2652 active_rank = rank_prec_comm; 2653 break; 2654 2655 case(PARALLEL_BDDC): 2656 2657 pcbddc->coarse_communications_type = SCATTERS_BDDC; 2658 coarse_mat_type = MATMPIAIJ; 2659 coarse_pc_type = PCREDUNDANT; 2660 coarse_ksp_type = KSPPREONLY; 2661 coarse_comm = prec_comm; 2662 active_rank = rank_prec_comm; 2663 break; 2664 2665 case(SEQUENTIAL_BDDC): 2666 pcbddc->coarse_communications_type = GATHERS_BDDC; 2667 coarse_mat_type = MATSEQAIJ; 2668 coarse_pc_type = PCLU; 2669 coarse_ksp_type = KSPPREONLY; 2670 coarse_comm = PETSC_COMM_SELF; 2671 active_rank = master_proc; 2672 break; 2673 } 2674 2675 switch(pcbddc->coarse_communications_type){ 2676 2677 case(SCATTERS_BDDC): 2678 { 2679 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 2680 2681 PetscMPIInt send_size; 2682 PetscInt *aux_ins_indices; 2683 PetscInt ii,jj; 2684 MPI_Request *requests; 2685 2686 /* allocate auxiliary space */ 2687 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2688 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2689 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 2690 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 2691 /* allocate stuffs for message massing */ 2692 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 2693 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 2694 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2695 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2696 /* fill up quantities */ 2697 j=0; 2698 for(i=0;i<count_recv;i++){ 2699 ii = ranks_recv[i]; 2700 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 2701 localdispl2[i]=j; 2702 j+=localsizes2[i]; 2703 jj = pcbddc->local_primal_displacements[ii]; 2704 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 2705 } 2706 /*printf("aux_ins_indices 1\n"); 2707 for(i=0;i<pcbddc->coarse_size;i++) 2708 printf("%d ",aux_ins_indices[i]); 2709 printf("\n");*/ 2710 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 2711 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2712 /* evaluate how many values I will insert in coarse mat */ 2713 ins_local_primal_size=0; 2714 for(i=0;i<pcbddc->coarse_size;i++) 2715 if(aux_ins_indices[i]) 2716 ins_local_primal_size++; 2717 /* evaluate indices I will insert in coarse mat */ 2718 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 2719 j=0; 2720 for(i=0;i<pcbddc->coarse_size;i++) 2721 if(aux_ins_indices[i]) 2722 ins_local_primal_indices[j++]=i; 2723 /* use aux_ins_indices to realize a global to local mapping */ 2724 j=0; 2725 for(i=0;i<pcbddc->coarse_size;i++){ 2726 if(aux_ins_indices[i]==0){ 2727 aux_ins_indices[i]=-1; 2728 } else { 2729 aux_ins_indices[i]=j; 2730 j++; 2731 } 2732 } 2733 2734 /*printf("New details localsizes2 localdispl2\n"); 2735 for(i=0;i<count_recv;i++) 2736 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 2737 printf("\n"); 2738 printf("aux_ins_indices 2\n"); 2739 for(i=0;i<pcbddc->coarse_size;i++) 2740 printf("%d ",aux_ins_indices[i]); 2741 printf("\n"); 2742 printf("ins_local_primal_indices\n"); 2743 for(i=0;i<ins_local_primal_size;i++) 2744 printf("%d ",ins_local_primal_indices[i]); 2745 printf("\n"); 2746 printf("coarse_submat_vals\n"); 2747 for(i=0;i<pcbddc->local_primal_size;i++) 2748 for(j=0;j<pcbddc->local_primal_size;j++) 2749 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 2750 printf("\n");*/ 2751 2752 /* processes partecipating in coarse problem receive matrix data from their friends */ 2753 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 2754 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 2755 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 2756 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 2757 } 2758 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2759 2760 /*if(coarse_color == 0) { 2761 printf("temp_coarse_mat_vals\n"); 2762 for(k=0;k<count_recv;k++){ 2763 printf("---- %d ----\n",ranks_recv[k]); 2764 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 2765 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 2766 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 2767 printf("\n"); 2768 } 2769 }*/ 2770 /* calculate data to insert in coarse mat */ 2771 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2772 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 2773 2774 PetscMPIInt rr,kk,lps,lpd; 2775 PetscInt row_ind,col_ind; 2776 for(k=0;k<count_recv;k++){ 2777 rr = ranks_recv[k]; 2778 kk = localdispl2[k]; 2779 lps = pcbddc->local_primal_sizes[rr]; 2780 lpd = pcbddc->local_primal_displacements[rr]; 2781 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 2782 for(j=0;j<lps;j++){ 2783 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 2784 for(i=0;i<lps;i++){ 2785 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 2786 /*printf("%d %d\n",row_ind,col_ind);*/ 2787 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 2788 } 2789 } 2790 } 2791 ierr = PetscFree(requests);CHKERRQ(ierr); 2792 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 2793 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 2794 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 2795 2796 /* create local to global mapping needed by coarse MATIS */ 2797 { 2798 IS coarse_IS; 2799 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 2800 coarse_comm = prec_comm; 2801 active_rank=rank_prec_comm; 2802 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 2803 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 2804 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 2805 } 2806 } 2807 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 2808 /* arrays for values insertion */ 2809 ins_local_primal_size = pcbddc->local_primal_size; 2810 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2811 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2812 for(j=0;j<ins_local_primal_size;j++){ 2813 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 2814 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 2815 } 2816 } 2817 break; 2818 2819 } 2820 2821 case(GATHERS_BDDC): 2822 { 2823 2824 PetscMPIInt mysize,mysize2; 2825 2826 if(rank_prec_comm==active_rank) { 2827 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 2828 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 2829 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 2830 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 2831 /* arrays for values insertion */ 2832 ins_local_primal_size = pcbddc->coarse_size; 2833 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 2834 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 2835 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 2836 localdispl2[0]=0; 2837 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 2838 j=0; 2839 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 2840 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 2841 } 2842 2843 mysize=pcbddc->local_primal_size; 2844 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 2845 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 2846 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 2847 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 2848 } else { 2849 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 2850 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 2851 } 2852 2853 /* free data structures no longer needed and allocate some space which will be needed in BDDC application */ 2854 if(rank_prec_comm==active_rank) { 2855 PetscInt offset,offset2,row_ind,col_ind; 2856 for(j=0;j<ins_local_primal_size;j++){ 2857 ins_local_primal_indices[j]=j; 2858 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=0.0; 2859 } 2860 for(k=0;k<size_prec_comm;k++){ 2861 offset=pcbddc->local_primal_displacements[k]; 2862 offset2=localdispl2[k]; 2863 for(j=0;j<pcbddc->local_primal_sizes[k];j++){ 2864 col_ind=pcbddc->replicated_local_primal_indices[offset+j]; 2865 for(i=0;i<pcbddc->local_primal_sizes[k];i++){ 2866 row_ind=pcbddc->replicated_local_primal_indices[offset+i]; 2867 ins_coarse_mat_vals[col_ind*pcbddc->coarse_size+row_ind]+=temp_coarse_mat_vals[offset2+j*pcbddc->local_primal_sizes[k]+i]; 2868 } 2869 } 2870 } 2871 } 2872 break; 2873 }/* switch on coarse problem and communications associated with finished */ 2874 } 2875 2876 /* Now create and fill up coarse matrix */ 2877 if( rank_prec_comm == active_rank ) { 2878 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 2879 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 2880 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 2881 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 2882 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2883 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2884 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2885 } else { 2886 Mat matis_coarse_local_mat; 2887 /* remind bs */ 2888 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 2889 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 2890 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 2891 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 2892 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 2893 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 2894 } 2895 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2896 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 2897 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2898 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2899 2900 /* PetscViewer view_out; 2901 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 2902 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2903 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 2904 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 2905 2906 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 2907 /* Preconditioner for coarse problem */ 2908 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 2909 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 2910 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2911 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2912 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2913 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 2914 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 2915 /* Allow user's customization */ 2916 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 2917 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2918 /* Set Up PC for coarse problem BDDC */ 2919 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2920 if(dbg_flag) { 2921 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 2922 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2923 } 2924 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 2925 } 2926 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2927 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 2928 if(dbg_flag) { 2929 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 2930 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2931 } 2932 } 2933 } 2934 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 2935 IS local_IS,global_IS; 2936 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 2937 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 2938 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2939 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 2940 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 2941 } 2942 2943 2944 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 2945 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 2946 PetscScalar m_one=-1.0; 2947 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 2948 const KSPType check_ksp_type=KSPGMRES; 2949 2950 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 2951 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 2952 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 2953 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 2954 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2955 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 2956 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 2957 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 2958 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 2959 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 2960 if(dbg_flag) { 2961 kappa_2=lambda_max/lambda_min; 2962 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 2963 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 2964 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2965 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 2966 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 2967 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 2968 } 2969 /* restore coarse ksp to default values */ 2970 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 2971 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 2972 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 2973 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 2974 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 2975 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 2976 } 2977 2978 /* free data structures no longer needed */ 2979 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 2980 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 2981 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 2982 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 2983 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 2984 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 2985 2986 PetscFunctionReturn(0); 2987 } 2988 2989 #undef __FUNCT__ 2990 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 2991 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 2992 { 2993 2994 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2995 PC_IS *pcis = (PC_IS*)pc->data; 2996 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2997 PCBDDCGraph mat_graph=pcbddc->mat_graph; 2998 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 2999 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 3000 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 3001 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 3002 PetscBool same_set; 3003 MPI_Comm interface_comm=((PetscObject)pc)->comm; 3004 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 3005 const PetscInt *neumann_nodes; 3006 const PetscInt *dirichlet_nodes; 3007 IS used_IS,*custom_ISForDofs; 3008 PetscScalar *array; 3009 PetscScalar *array2; 3010 PetscViewer viewer=pcbddc->dbg_viewer; 3011 3012 PetscFunctionBegin; 3013 /* Setup local adjacency graph */ 3014 mat_graph->nvtxs=pcis->n; 3015 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 3016 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 3017 i = mat_graph->nvtxs; 3018 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 3019 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 3020 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 3021 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3022 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3023 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3024 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3025 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3026 3027 /* Setting dofs splitting in mat_graph->which_dof 3028 Get information about dofs' splitting if provided by the user 3029 Otherwise it assumes a constant block size */ 3030 vertex_size=0; 3031 if(!pcbddc->n_ISForDofs) { 3032 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3033 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 3034 for(i=0;i<bs;i++) { 3035 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 3036 } 3037 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 3038 vertex_size=1; 3039 /* remove my references to IS objects */ 3040 for(i=0;i<bs;i++) { 3041 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 3042 } 3043 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 3044 } 3045 for(i=0;i<pcbddc->n_ISForDofs;i++) { 3046 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 3047 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 3048 for(j=0;j<k;j++) { 3049 mat_graph->which_dof[is_indices[j]]=i; 3050 } 3051 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 3052 } 3053 /* use mat block size as vertex size if it has not yet set */ 3054 if(!vertex_size) { 3055 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3056 } 3057 3058 /* count number of neigh per node */ 3059 total_counts=0; 3060 for(i=1;i<pcis->n_neigh;i++){ 3061 s=pcis->n_shared[i]; 3062 total_counts+=s; 3063 for(j=0;j<s;j++){ 3064 mat_graph->count[pcis->shared[i][j]] += 1; 3065 } 3066 } 3067 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 3068 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 3069 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3070 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3071 if(used_IS) { 3072 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 3073 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 3074 for(i=0;i<neumann_bsize;i++){ 3075 iindex = neumann_nodes[i]; 3076 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 3077 mat_graph->count[iindex]+=1; 3078 total_counts++; 3079 array[iindex]=array[iindex]+1.0; 3080 } else if(array[iindex]>0.0) { 3081 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 3082 } 3083 } 3084 } 3085 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3086 /* allocate space for storing the set of neighbours for each node */ 3087 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 3088 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 3089 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 3090 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3091 for(i=1;i<pcis->n_neigh;i++){ 3092 s=pcis->n_shared[i]; 3093 for(j=0;j<s;j++) { 3094 k=pcis->shared[i][j]; 3095 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 3096 mat_graph->count[k]+=1; 3097 } 3098 } 3099 /* Check consistency of Neumann nodes */ 3100 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3101 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3102 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3103 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3106 /* set -1 fake neighbour to mimic Neumann boundary */ 3107 if(used_IS) { 3108 for(i=0;i<neumann_bsize;i++){ 3109 iindex = neumann_nodes[i]; 3110 if(mat_graph->count[iindex] > NEUMANNCNT){ 3111 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 3112 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 3113 } 3114 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 3115 mat_graph->count[iindex]+=1; 3116 } 3117 } 3118 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 3119 } 3120 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3121 /* sort set of sharing subdomains */ 3122 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 3123 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 3124 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 3125 nodes_touched=0; 3126 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 3127 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 3128 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3129 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3130 if(used_IS) { 3131 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 3132 if(dirichlet_bsize && matis->pure_neumann) { 3133 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 3134 } 3135 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3136 for(i=0;i<dirichlet_bsize;i++){ 3137 iindex=dirichlet_nodes[i]; 3138 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 3139 if(array[iindex]>0.0) { 3140 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 3141 } 3142 mat_graph->touched[iindex]=PETSC_TRUE; 3143 mat_graph->where[iindex]=0; 3144 nodes_touched++; 3145 array2[iindex]=array2[iindex]+1.0; 3146 } 3147 } 3148 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3149 } 3150 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3151 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3152 /* Check consistency of Dirichlet nodes */ 3153 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3154 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3155 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3156 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3157 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3158 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3159 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3160 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3161 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3162 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3163 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3164 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3165 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3166 if(used_IS) { 3167 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 3168 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3169 for(i=0;i<dirichlet_bsize;i++){ 3170 iindex=dirichlet_nodes[i]; 3171 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 3172 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 3173 } 3174 } 3175 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 3176 } 3177 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3178 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 3179 3180 for(i=0;i<mat_graph->nvtxs;i++){ 3181 if(!mat_graph->count[i]){ /* interior nodes */ 3182 mat_graph->touched[i]=PETSC_TRUE; 3183 mat_graph->where[i]=0; 3184 nodes_touched++; 3185 } 3186 } 3187 mat_graph->ncmps = 0; 3188 i=0; 3189 while(nodes_touched<mat_graph->nvtxs) { 3190 /* find first untouched node in local ordering */ 3191 while(mat_graph->touched[i]) i++; 3192 mat_graph->touched[i]=PETSC_TRUE; 3193 mat_graph->where[i]=where_values; 3194 nodes_touched++; 3195 /* now find all other nodes having the same set of sharing subdomains */ 3196 for(j=i+1;j<mat_graph->nvtxs;j++){ 3197 /* check for same number of sharing subdomains and dof number */ 3198 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 3199 /* check for same set of sharing subdomains */ 3200 same_set=PETSC_TRUE; 3201 for(k=0;k<mat_graph->count[j];k++){ 3202 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 3203 same_set=PETSC_FALSE; 3204 } 3205 } 3206 /* I found a friend of mine */ 3207 if(same_set) { 3208 mat_graph->where[j]=where_values; 3209 mat_graph->touched[j]=PETSC_TRUE; 3210 nodes_touched++; 3211 } 3212 } 3213 } 3214 where_values++; 3215 } 3216 where_values--; if(where_values<0) where_values=0; 3217 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 3218 /* Find connected components defined on the shared interface */ 3219 if(where_values) { 3220 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 3221 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 3222 for(i=0;i<mat_graph->ncmps;i++) { 3223 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 3224 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 3225 } 3226 } 3227 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 3228 for(i=0;i<where_values;i++) { 3229 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 3230 if(mat_graph->where_ncmps[i]>1) { 3231 adapt_interface=1; 3232 break; 3233 } 3234 } 3235 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 3236 if(pcbddc->dbg_flag && adapt_interface_reduced) { 3237 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 3238 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3239 } 3240 if(where_values && adapt_interface_reduced) { 3241 3242 PetscInt sum_requests=0,my_rank; 3243 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 3244 PetscInt temp_buffer_size,ins_val,global_where_counter; 3245 PetscInt *cum_recv_counts; 3246 PetscInt *where_to_nodes_indices; 3247 PetscInt *petsc_buffer; 3248 PetscMPIInt *recv_buffer; 3249 PetscMPIInt *recv_buffer_where; 3250 PetscMPIInt *send_buffer; 3251 PetscMPIInt size_of_send; 3252 PetscInt *sizes_of_sends; 3253 MPI_Request *send_requests; 3254 MPI_Request *recv_requests; 3255 PetscInt *where_cc_adapt; 3256 PetscInt **temp_buffer; 3257 PetscInt *nodes_to_temp_buffer_indices; 3258 PetscInt *add_to_where; 3259 3260 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 3261 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 3262 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 3263 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 3264 /* first count how many neighbours per connected component I will receive from */ 3265 cum_recv_counts[0]=0; 3266 for(i=1;i<where_values+1;i++){ 3267 j=0; 3268 while(mat_graph->where[j] != i) j++; 3269 where_to_nodes_indices[i-1]=j; 3270 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 3271 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 3272 } 3273 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 3274 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 3275 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 3276 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 3277 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 3278 for(i=0;i<cum_recv_counts[where_values];i++) { 3279 send_requests[i]=MPI_REQUEST_NULL; 3280 recv_requests[i]=MPI_REQUEST_NULL; 3281 } 3282 /* exchange with my neighbours the number of my connected components on the shared interface */ 3283 for(i=0;i<where_values;i++){ 3284 j=where_to_nodes_indices[i]; 3285 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3286 for(;k<mat_graph->count[j];k++){ 3287 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3288 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3289 sum_requests++; 3290 } 3291 } 3292 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3293 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3294 /* determine the connected component I need to adapt */ 3295 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 3296 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 3297 for(i=0;i<where_values;i++){ 3298 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 3299 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 3300 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 3301 where_cc_adapt[i]=PETSC_TRUE; 3302 break; 3303 } 3304 } 3305 } 3306 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 3307 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 3308 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 3309 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 3310 sum_requests=0; 3311 start_of_send=0; 3312 start_of_recv=cum_recv_counts[where_values]; 3313 for(i=0;i<where_values;i++) { 3314 if(where_cc_adapt[i]) { 3315 size_of_send=0; 3316 for(j=i;j<mat_graph->ncmps;j++) { 3317 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 3318 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 3319 size_of_send+=1; 3320 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 3321 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 3322 } 3323 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 3324 } 3325 } 3326 j = where_to_nodes_indices[i]; 3327 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3328 sizes_of_sends[i]=size_of_send; 3329 for(;k<mat_graph->count[j];k++){ 3330 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3331 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3332 sum_requests++; 3333 } 3334 start_of_send+=size_of_send; 3335 } 3336 } 3337 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3338 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3339 buffer_size=0; 3340 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 3341 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 3342 /* now exchange the data */ 3343 start_of_recv=0; 3344 start_of_send=0; 3345 sum_requests=0; 3346 for(i=0;i<where_values;i++) { 3347 if(where_cc_adapt[i]) { 3348 size_of_send = sizes_of_sends[i]; 3349 j = where_to_nodes_indices[i]; 3350 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 3351 for(;k<mat_graph->count[j];k++){ 3352 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 3353 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 3354 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 3355 start_of_recv+=size_of_recv; 3356 sum_requests++; 3357 } 3358 start_of_send+=size_of_send; 3359 } 3360 } 3361 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3362 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3363 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 3364 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 3365 for(j=0;j<buffer_size;) { 3366 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 3367 k=petsc_buffer[j]+1; 3368 j+=k; 3369 } 3370 sum_requests=cum_recv_counts[where_values]; 3371 start_of_recv=0; 3372 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 3373 global_where_counter=0; 3374 for(i=0;i<where_values;i++){ 3375 if(where_cc_adapt[i]){ 3376 temp_buffer_size=0; 3377 /* find nodes on the shared interface we need to adapt */ 3378 for(j=0;j<mat_graph->nvtxs;j++){ 3379 if(mat_graph->where[j]==i+1) { 3380 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 3381 temp_buffer_size++; 3382 } else { 3383 nodes_to_temp_buffer_indices[j]=-1; 3384 } 3385 } 3386 /* allocate some temporary space */ 3387 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 3388 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 3389 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 3390 for(j=1;j<temp_buffer_size;j++){ 3391 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 3392 } 3393 /* analyze contributions from neighbouring subdomains for i-th conn comp 3394 temp buffer structure: 3395 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 3396 3 neighs procs with structured connected components: 3397 neigh 0: [0 1 4], [2 3]; (2 connected components) 3398 neigh 1: [0 1], [2 3 4]; (2 connected components) 3399 neigh 2: [0 4], [1], [2 3]; (3 connected components) 3400 tempbuffer (row-oriented) should be filled as: 3401 [ 0, 0, 0; 3402 0, 0, 1; 3403 1, 1, 2; 3404 1, 1, 2; 3405 0, 1, 0; ]; 3406 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 3407 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 3408 */ 3409 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 3410 ins_val=0; 3411 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 3412 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 3413 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 3414 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 3415 } 3416 buffer_size+=k; 3417 ins_val++; 3418 } 3419 start_of_recv+=size_of_recv; 3420 sum_requests++; 3421 } 3422 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 3423 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 3424 for(j=0;j<temp_buffer_size;j++){ 3425 if(!add_to_where[j]){ /* found a new cc */ 3426 global_where_counter++; 3427 add_to_where[j]=global_where_counter; 3428 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 3429 same_set=PETSC_TRUE; 3430 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 3431 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 3432 same_set=PETSC_FALSE; 3433 break; 3434 } 3435 } 3436 if(same_set) add_to_where[k]=global_where_counter; 3437 } 3438 } 3439 } 3440 /* insert new data in where array */ 3441 temp_buffer_size=0; 3442 for(j=0;j<mat_graph->nvtxs;j++){ 3443 if(mat_graph->where[j]==i+1) { 3444 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 3445 temp_buffer_size++; 3446 } 3447 } 3448 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 3449 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 3450 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 3451 } 3452 } 3453 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 3454 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 3455 ierr = PetscFree(send_requests);CHKERRQ(ierr); 3456 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 3457 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 3458 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 3459 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 3460 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 3461 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 3462 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 3463 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 3464 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 3465 if(global_where_counter) { 3466 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 3467 global_where_counter=0; 3468 for(i=0;i<mat_graph->nvtxs;i++){ 3469 if(mat_graph->where[i] && !mat_graph->touched[i]) { 3470 global_where_counter++; 3471 for(j=i+1;j<mat_graph->nvtxs;j++){ 3472 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 3473 mat_graph->where[j]=global_where_counter; 3474 mat_graph->touched[j]=PETSC_TRUE; 3475 } 3476 } 3477 mat_graph->where[i]=global_where_counter; 3478 mat_graph->touched[i]=PETSC_TRUE; 3479 } 3480 } 3481 where_values=global_where_counter; 3482 } 3483 if(global_where_counter) { 3484 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3485 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 3486 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 3487 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 3488 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 3489 for(i=0;i<mat_graph->ncmps;i++) { 3490 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 3491 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 3492 } 3493 } 3494 } /* Finished adapting interface */ 3495 PetscInt nfc=0; 3496 PetscInt nec=0; 3497 PetscInt nvc=0; 3498 PetscBool twodim_flag=PETSC_FALSE; 3499 for (i=0; i<mat_graph->ncmps; i++) { 3500 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3501 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 3502 nfc++; 3503 } else { /* note that nec will be zero in 2d */ 3504 nec++; 3505 } 3506 } else { 3507 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 3508 } 3509 } 3510 3511 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 3512 nec = nfc; 3513 nfc = 0; 3514 twodim_flag = PETSC_TRUE; 3515 } 3516 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 3517 k=0; 3518 for (i=0; i<mat_graph->ncmps; i++) { 3519 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 3520 if( j > k) { 3521 k=j; 3522 } 3523 } 3524 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 3525 3526 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 3527 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 3528 use_faces=PETSC_TRUE; 3529 } 3530 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 3531 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 3532 use_edges=PETSC_TRUE; 3533 } 3534 nfc=0; 3535 nec=0; 3536 for (i=0; i<mat_graph->ncmps; i++) { 3537 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 3538 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 3539 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 3540 } 3541 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 3542 if(twodim_flag) { 3543 if(use_edges) { 3544 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3545 nec++; 3546 } 3547 } else { 3548 if(use_faces) { 3549 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 3550 nfc++; 3551 } 3552 } 3553 } else { 3554 if(use_edges) { 3555 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 3556 nec++; 3557 } 3558 } 3559 } 3560 } 3561 pcbddc->n_ISForFaces=nfc; 3562 pcbddc->n_ISForEdges=nec; 3563 nvc=0; 3564 if( !pcbddc->constraints_flag ) { 3565 for (i=0; i<mat_graph->ncmps; i++) { 3566 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 3567 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 3568 auxis[nvc]=mat_graph->queue[j]; 3569 nvc++; 3570 } 3571 } 3572 } 3573 } 3574 /* sort vertex set (by local ordering) */ 3575 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 3576 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 3577 3578 if(pcbddc->dbg_flag) { 3579 3580 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3581 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3582 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3583 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 3584 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3585 for(i=0;i<mat_graph->nvtxs;i++) { 3586 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 3587 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 3588 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 3589 } 3590 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3591 }*/ 3592 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 3593 for(i=0;i<mat_graph->ncmps;i++) { 3594 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 3595 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 3596 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 3597 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 3598 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 3599 } 3600 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 3601 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 3602 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 3603 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 3604 } 3605 } 3606 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 3607 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 3608 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 3609 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 3610 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3611 } 3612 3613 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 3614 ierr = PetscFree(auxis);CHKERRQ(ierr); 3615 PetscFunctionReturn(0); 3616 3617 } 3618 3619 /* -------------------------------------------------------------------------- */ 3620 3621 /* The following code has been adapted from function IsConnectedSubdomain contained 3622 in source file contig.c of METIS library (version 5.0.1) 3623 It finds connected components of each partition labeled from 1 to n_dist */ 3624 3625 #undef __FUNCT__ 3626 #define __FUNCT__ "PCBDDCFindConnectedComponents" 3627 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 3628 { 3629 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 3630 PetscInt *xadj, *adjncy, *where, *queue; 3631 PetscInt *cptr; 3632 PetscBool *touched; 3633 3634 PetscFunctionBegin; 3635 3636 nvtxs = graph->nvtxs; 3637 xadj = graph->xadj; 3638 adjncy = graph->adjncy; 3639 where = graph->where; 3640 touched = graph->touched; 3641 queue = graph->queue; 3642 cptr = graph->cptr; 3643 3644 for (i=0; i<nvtxs; i++) 3645 touched[i] = PETSC_FALSE; 3646 3647 cum_queue=0; 3648 ncmps=0; 3649 3650 for(n=0; n<n_dist; n++) { 3651 pid = n+1; /* partition labeled by 0 is discarded */ 3652 nleft = 0; 3653 for (i=0; i<nvtxs; i++) { 3654 if (where[i] == pid) 3655 nleft++; 3656 } 3657 for (i=0; i<nvtxs; i++) { 3658 if (where[i] == pid) 3659 break; 3660 } 3661 touched[i] = PETSC_TRUE; 3662 queue[cum_queue] = i; 3663 first = 0; last = 1; 3664 cptr[ncmps] = cum_queue; /* This actually points to queue */ 3665 ncmps_pid = 0; 3666 while (first != nleft) { 3667 if (first == last) { /* Find another starting vertex */ 3668 cptr[++ncmps] = first+cum_queue; 3669 ncmps_pid++; 3670 for (i=0; i<nvtxs; i++) { 3671 if (where[i] == pid && !touched[i]) 3672 break; 3673 } 3674 queue[cum_queue+last] = i; 3675 last++; 3676 touched[i] = PETSC_TRUE; 3677 } 3678 i = queue[cum_queue+first]; 3679 first++; 3680 for (j=xadj[i]; j<xadj[i+1]; j++) { 3681 k = adjncy[j]; 3682 if (where[k] == pid && !touched[k]) { 3683 queue[cum_queue+last] = k; 3684 last++; 3685 touched[k] = PETSC_TRUE; 3686 } 3687 } 3688 } 3689 cptr[++ncmps] = first+cum_queue; 3690 ncmps_pid++; 3691 cum_queue=cptr[ncmps]; 3692 graph->where_ncmps[n] = ncmps_pid; 3693 } 3694 graph->ncmps = ncmps; 3695 3696 PetscFunctionReturn(0); 3697 } 3698