1 /* TODOLIST 2 DofSplitting and DM attached to pc? 3 Change SetNeumannBoundaries to SetNeumannBoundariesLocal and provide new SetNeumannBoundaries (same Dirichlet) 4 - change prec_type to switch_inexact_prec_type 5 Inexact solvers: global preconditioner application is ready, ask to developers (Jed?) on how to best implement Dohrmann's approach (PCSHELL?) 6 change how to deal with the coarse problem (PCBDDCSetCoarseEnvironment): 7 - mind the problem with coarsening_factor 8 - simplify coarse problem structure -> PCBDDC or PCREDUDANT, nothing else -> same comm for all levels? 9 - remove coarse enums and allow use of PCBDDCGetCoarseKSP 10 - remove metis dependency -> use MatPartitioning for multilevel -> Assemble serial adjacency in ManageLocalBoundaries? 11 - Add levels' slot to bddc data structure and associated Set/Get functions 12 code refactoring: 13 - pick up better names for static functions 14 change options structure: 15 - insert BDDC into MG framework? 16 provide other ops? Ask to developers 17 remove all unused printf 18 man pages 19 */ 20 21 /* ---------------------------------------------------------------------------------------------------------------------------------------------- 22 Implementation of BDDC preconditioner based on: 23 C. Dohrmann "An approximate BDDC preconditioner", Numerical Linear Algebra with Applications Volume 14, Issue 2, pages 149-168, March 2007 24 ---------------------------------------------------------------------------------------------------------------------------------------------- */ 25 26 #include "bddc.h" /*I "petscpc.h" I*/ /* includes for fortran wrappers */ 27 #include <petscblaslapack.h> 28 /* -------------------------------------------------------------------------- */ 29 #undef __FUNCT__ 30 #define __FUNCT__ "PCSetFromOptions_BDDC" 31 PetscErrorCode PCSetFromOptions_BDDC(PC pc) 32 { 33 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 34 PetscErrorCode ierr; 35 36 PetscFunctionBegin; 37 ierr = PetscOptionsHead("BDDC options");CHKERRQ(ierr); 38 /* Verbose debugging of main data structures */ 39 ierr = PetscOptionsBool("-pc_bddc_check_all" ,"Verbose (debugging) output for PCBDDC" ,"none",pcbddc->dbg_flag ,&pcbddc->dbg_flag ,PETSC_NULL);CHKERRQ(ierr); 40 /* Some customization for default primal space */ 41 ierr = PetscOptionsBool("-pc_bddc_vertices_only" ,"Use only vertices in coarse space (i.e. discard constraints)","none",pcbddc->vertices_flag ,&pcbddc->vertices_flag ,PETSC_NULL);CHKERRQ(ierr); 42 ierr = PetscOptionsBool("-pc_bddc_constraints_only","Use only constraints in coarse space (i.e. discard vertices)","none",pcbddc->constraints_flag,&pcbddc->constraints_flag,PETSC_NULL);CHKERRQ(ierr); 43 ierr = PetscOptionsBool("-pc_bddc_faces_only" ,"Use only faces among constraints of coarse space (i.e. discard edges)" ,"none",pcbddc->faces_flag ,&pcbddc->faces_flag ,PETSC_NULL);CHKERRQ(ierr); 44 ierr = PetscOptionsBool("-pc_bddc_edges_only" ,"Use only edges among constraints of coarse space (i.e. discard faces)" ,"none",pcbddc->edges_flag ,&pcbddc->edges_flag ,PETSC_NULL);CHKERRQ(ierr); 45 /* Coarse solver context */ 46 static const char * const avail_coarse_problems[] = {"sequential","replicated","parallel","multilevel","CoarseProblemType","PC_BDDC_",0}; /*order of choiches depends on ENUM defined in bddc.h */ 47 ierr = PetscOptionsEnum("-pc_bddc_coarse_problem_type","Set coarse problem type","none",avail_coarse_problems,(PetscEnum)pcbddc->coarse_problem_type,(PetscEnum*)&pcbddc->coarse_problem_type,PETSC_NULL);CHKERRQ(ierr); 48 /* Two different application of BDDC to the whole set of dofs, internal and interface */ 49 ierr = PetscOptionsBool("-pc_bddc_switch_preconditioning_type","Switch between M_2 (default) and M_3 preconditioners (as defined by Dohrmann)","none",pcbddc->prec_type,&pcbddc->prec_type,PETSC_NULL);CHKERRQ(ierr); 50 ierr = PetscOptionsBool("-pc_bddc_use_change_of_basis","Use change of basis approach for primal space","none",pcbddc->usechangeofbasis,&pcbddc->usechangeofbasis,PETSC_NULL);CHKERRQ(ierr); 51 ierr = PetscOptionsBool("-pc_bddc_use_change_on_faces","Use change of basis approach for face constraints","none",pcbddc->usechangeonfaces,&pcbddc->usechangeonfaces,PETSC_NULL);CHKERRQ(ierr); 52 pcbddc->usechangeonfaces = pcbddc->usechangeonfaces && pcbddc->usechangeofbasis; 53 ierr = PetscOptionsInt("-pc_bddc_coarsening_ratio","Set coarsening ratio used in multilevel coarsening","none",pcbddc->coarsening_ratio,&pcbddc->coarsening_ratio,PETSC_NULL);CHKERRQ(ierr); 54 ierr = PetscOptionsTail();CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 /* -------------------------------------------------------------------------- */ 58 EXTERN_C_BEGIN 59 #undef __FUNCT__ 60 #define __FUNCT__ "PCBDDCSetCoarseProblemType_BDDC" 61 static PetscErrorCode PCBDDCSetCoarseProblemType_BDDC(PC pc, CoarseProblemType CPT) 62 { 63 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 64 65 PetscFunctionBegin; 66 pcbddc->coarse_problem_type = CPT; 67 PetscFunctionReturn(0); 68 } 69 EXTERN_C_END 70 #undef __FUNCT__ 71 #define __FUNCT__ "PCBDDCSetCoarseProblemType" 72 /*@ 73 PCBDDCSetCoarseProblemType - Set coarse problem type in PCBDDC. 74 75 Not collective 76 77 Input Parameters: 78 + pc - the preconditioning context 79 - CoarseProblemType - pick a better name and explain what this is 80 81 Level: intermediate 82 83 Notes: 84 Not collective but all procs must call with same arguments. 85 86 .seealso: PCBDDC 87 @*/ 88 PetscErrorCode PCBDDCSetCoarseProblemType(PC pc, CoarseProblemType CPT) 89 { 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 94 ierr = PetscTryMethod(pc,"PCBDDCSetCoarseProblemType_C",(PC,CoarseProblemType),(pc,CPT));CHKERRQ(ierr); 95 PetscFunctionReturn(0); 96 } 97 /* -------------------------------------------------------------------------- */ 98 EXTERN_C_BEGIN 99 #undef __FUNCT__ 100 #define __FUNCT__ "PCBDDCSetDirichletBoundaries_BDDC" 101 static PetscErrorCode PCBDDCSetDirichletBoundaries_BDDC(PC pc,IS DirichletBoundaries) 102 { 103 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 104 PetscErrorCode ierr; 105 106 PetscFunctionBegin; 107 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 108 ierr = PetscObjectReference((PetscObject)DirichletBoundaries);CHKERRQ(ierr); 109 pcbddc->DirichletBoundaries=DirichletBoundaries; 110 PetscFunctionReturn(0); 111 } 112 EXTERN_C_END 113 #undef __FUNCT__ 114 #define __FUNCT__ "PCBDDCSetDirichletBoundaries" 115 /*@ 116 PCBDDCSetDirichletBoundaries - Set index set defining subdomain part (in local ordering) 117 of Dirichlet boundaries for the global problem. 118 119 Not collective 120 121 Input Parameters: 122 + pc - the preconditioning context 123 - DirichletBoundaries - sequential index set defining the subdomain part of Dirichlet boundaries (can be PETSC_NULL) 124 125 Level: intermediate 126 127 Notes: 128 129 .seealso: PCBDDC 130 @*/ 131 PetscErrorCode PCBDDCSetDirichletBoundaries(PC pc,IS DirichletBoundaries) 132 { 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 137 ierr = PetscTryMethod(pc,"PCBDDCSetDirichletBoundaries_C",(PC,IS),(pc,DirichletBoundaries));CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 /* -------------------------------------------------------------------------- */ 141 EXTERN_C_BEGIN 142 #undef __FUNCT__ 143 #define __FUNCT__ "PCBDDCSetNeumannBoundaries_BDDC" 144 static PetscErrorCode PCBDDCSetNeumannBoundaries_BDDC(PC pc,IS NeumannBoundaries) 145 { 146 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 151 ierr = PetscObjectReference((PetscObject)NeumannBoundaries);CHKERRQ(ierr); 152 pcbddc->NeumannBoundaries=NeumannBoundaries; 153 PetscFunctionReturn(0); 154 } 155 EXTERN_C_END 156 #undef __FUNCT__ 157 #define __FUNCT__ "PCBDDCSetNeumannBoundaries" 158 /*@ 159 PCBDDCSetNeumannBoundaries - Set index set defining subdomain part (in local ordering) 160 of Neumann boundaries for the global problem. 161 162 Not collective 163 164 Input Parameters: 165 + pc - the preconditioning context 166 - NeumannBoundaries - sequential index set defining the subdomain part of Neumann boundaries (can be PETSC_NULL) 167 168 Level: intermediate 169 170 Notes: 171 172 .seealso: PCBDDC 173 @*/ 174 PetscErrorCode PCBDDCSetNeumannBoundaries(PC pc,IS NeumannBoundaries) 175 { 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 180 ierr = PetscTryMethod(pc,"PCBDDCSetNeumannBoundaries_C",(PC,IS),(pc,NeumannBoundaries));CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 /* -------------------------------------------------------------------------- */ 184 EXTERN_C_BEGIN 185 #undef __FUNCT__ 186 #define __FUNCT__ "PCBDDCGetDirichletBoundaries_BDDC" 187 static PetscErrorCode PCBDDCGetDirichletBoundaries_BDDC(PC pc,IS *DirichletBoundaries) 188 { 189 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 190 191 PetscFunctionBegin; 192 *DirichletBoundaries = pcbddc->DirichletBoundaries; 193 PetscFunctionReturn(0); 194 } 195 EXTERN_C_END 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCGetDirichletBoundaries" 198 /*@ 199 PCBDDCGetDirichletBoundaries - Get index set defining subdomain part (in local ordering) 200 of Dirichlet boundaries for the global problem. 201 202 Not collective 203 204 Input Parameters: 205 + pc - the preconditioning context 206 207 Output Parameters: 208 + DirichletBoundaries - index set defining the subdomain part of Dirichlet boundaries 209 210 Level: intermediate 211 212 Notes: 213 214 .seealso: PCBDDC 215 @*/ 216 PetscErrorCode PCBDDCGetDirichletBoundaries(PC pc,IS *DirichletBoundaries) 217 { 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 222 ierr = PetscUseMethod(pc,"PCBDDCGetDirichletBoundaries_C",(PC,IS*),(pc,DirichletBoundaries));CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 /* -------------------------------------------------------------------------- */ 226 EXTERN_C_BEGIN 227 #undef __FUNCT__ 228 #define __FUNCT__ "PCBDDCGetNeumannBoundaries_BDDC" 229 static PetscErrorCode PCBDDCGetNeumannBoundaries_BDDC(PC pc,IS *NeumannBoundaries) 230 { 231 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 232 233 PetscFunctionBegin; 234 *NeumannBoundaries = pcbddc->NeumannBoundaries; 235 PetscFunctionReturn(0); 236 } 237 EXTERN_C_END 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCGetNeumannBoundaries" 240 /*@ 241 PCBDDCGetNeumannBoundaries - Get index set defining subdomain part (in local ordering) 242 of Neumann boundaries for the global problem. 243 244 Not collective 245 246 Input Parameters: 247 + pc - the preconditioning context 248 249 Output Parameters: 250 + NeumannBoundaries - index set defining the subdomain part of Neumann boundaries 251 252 Level: intermediate 253 254 Notes: 255 256 .seealso: PCBDDC 257 @*/ 258 PetscErrorCode PCBDDCGetNeumannBoundaries(PC pc,IS *NeumannBoundaries) 259 { 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 264 ierr = PetscUseMethod(pc,"PCBDDCGetNeumannBoundaries_C",(PC,IS*),(pc,NeumannBoundaries));CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 /* -------------------------------------------------------------------------- */ 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph_BDDC" 271 static PetscErrorCode PCBDDCSetLocalAdjacencyGraph_BDDC(PC pc, PetscInt nvtxs, PetscInt xadj[], PetscInt adjncy[], PetscCopyMode copymode) 272 { 273 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 274 PCBDDCGraph mat_graph=pcbddc->mat_graph; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 mat_graph->nvtxs=nvtxs; 279 ierr = PetscFree(mat_graph->xadj);CHKERRQ(ierr); 280 ierr = PetscFree(mat_graph->adjncy);CHKERRQ(ierr); 281 if(copymode == PETSC_COPY_VALUES) { 282 ierr = PetscMalloc((mat_graph->nvtxs+1)*sizeof(PetscInt),&mat_graph->xadj);CHKERRQ(ierr); 283 ierr = PetscMalloc(xadj[mat_graph->nvtxs]*sizeof(PetscInt),&mat_graph->adjncy);CHKERRQ(ierr); 284 ierr = PetscMemcpy(mat_graph->xadj,xadj,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 285 ierr = PetscMemcpy(mat_graph->adjncy,adjncy,xadj[mat_graph->nvtxs]*sizeof(PetscInt));CHKERRQ(ierr); 286 } else if(copymode == PETSC_OWN_POINTER) { 287 mat_graph->xadj=xadj; 288 mat_graph->adjncy=adjncy; 289 } else { 290 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported copy mode %d in %s\n",copymode,__FUNCT__); 291 } 292 PetscFunctionReturn(0); 293 } 294 EXTERN_C_END 295 #undef __FUNCT__ 296 #define __FUNCT__ "PCBDDCSetLocalAdjacencyGraph" 297 /*@ 298 PCBDDCSetLocalAdjacencyGraph - Set CSR graph of local matrix for use of PCBDDC. 299 300 Not collective 301 302 Input Parameters: 303 + pc - the preconditioning context 304 - nvtxs - number of local vertices of the graph 305 - xadj, adjncy - the CSR graph 306 - copymode - either PETSC_COPY_VALUES or PETSC_OWN_POINTER. In the former case the user must free the array passed in; 307 in the latter case, memory must be obtained with PetscMalloc. 308 309 Level: intermediate 310 311 Notes: 312 313 .seealso: PCBDDC 314 @*/ 315 PetscErrorCode PCBDDCSetLocalAdjacencyGraph(PC pc,PetscInt nvtxs,PetscInt xadj[],PetscInt adjncy[], PetscCopyMode copymode) 316 { 317 PetscInt nrows,ncols; 318 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 319 PetscErrorCode ierr; 320 321 PetscFunctionBegin; 322 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 323 ierr = MatGetSize(matis->A,&nrows,&ncols);CHKERRQ(ierr); 324 if(nvtxs != nrows) { 325 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local adjacency size %d passed in %s differs from local problem size %d!\n",nvtxs,__FUNCT__,nrows); 326 } else { 327 ierr = PetscTryMethod(pc,"PCBDDCSetLocalAdjacencyGraph_C",(PC,PetscInt,PetscInt[],PetscInt[],PetscCopyMode),(pc,nvtxs,xadj,adjncy,copymode));CHKERRQ(ierr); 328 } 329 PetscFunctionReturn(0); 330 } 331 /* -------------------------------------------------------------------------- */ 332 EXTERN_C_BEGIN 333 #undef __FUNCT__ 334 #define __FUNCT__ "PCBDDCSetDofsSplitting_BDDC" 335 static PetscErrorCode PCBDDCSetDofsSplitting_BDDC(PC pc,PetscInt n_is, IS ISForDofs[]) 336 { 337 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 338 PetscInt i; 339 PetscErrorCode ierr; 340 341 PetscFunctionBegin; 342 /* Destroy ISes if they were already set */ 343 for(i=0;i<pcbddc->n_ISForDofs;i++) { 344 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 345 } 346 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 347 /* allocate space then set */ 348 ierr = PetscMalloc(n_is*sizeof(IS),&pcbddc->ISForDofs);CHKERRQ(ierr); 349 for(i=0;i<n_is;i++) { 350 ierr = PetscObjectReference((PetscObject)ISForDofs[i]);CHKERRQ(ierr); 351 pcbddc->ISForDofs[i]=ISForDofs[i]; 352 } 353 pcbddc->n_ISForDofs=n_is; 354 PetscFunctionReturn(0); 355 } 356 EXTERN_C_END 357 #undef __FUNCT__ 358 #define __FUNCT__ "PCBDDCSetDofsSplitting" 359 /*@ 360 PCBDDCSetDofsSplitting - Set index sets defining fields of local mat. 361 362 Not collective 363 364 Input Parameters: 365 + pc - the preconditioning context 366 - n - number of index sets defining the fields 367 - IS[] - array of IS describing the fields 368 369 Level: intermediate 370 371 Notes: 372 373 .seealso: PCBDDC 374 @*/ 375 PetscErrorCode PCBDDCSetDofsSplitting(PC pc,PetscInt n_is, IS ISForDofs[]) 376 { 377 PetscErrorCode ierr; 378 379 PetscFunctionBegin; 380 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 381 ierr = PetscTryMethod(pc,"PCBDDCSetDofsSplitting_C",(PC,PetscInt,IS[]),(pc,n_is,ISForDofs));CHKERRQ(ierr); 382 PetscFunctionReturn(0); 383 } 384 /* -------------------------------------------------------------------------- */ 385 #undef __FUNCT__ 386 #define __FUNCT__ "PCPreSolve_BDDC" 387 /* -------------------------------------------------------------------------- */ 388 /* 389 PCPreSolve_BDDC - Changes the right hand side and (if necessary) the initial 390 guess if a transformation of basis approach has been selected. 391 392 Input Parameter: 393 + pc - the preconditioner contex 394 395 Application Interface Routine: PCPreSolve() 396 397 Notes: 398 The interface routine PCPreSolve() is not usually called directly by 399 the user, but instead is called by KSPSolve(). 400 */ 401 static PetscErrorCode PCPreSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 402 { 403 PetscErrorCode ierr; 404 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 405 PC_IS *pcis = (PC_IS*)(pc->data); 406 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 407 Mat temp_mat; 408 IS dirIS; 409 PetscInt dirsize,i,*is_indices; 410 PetscScalar *array_x,*array_diagonal; 411 Vec used_vec; 412 PetscBool guess_nonzero; 413 414 PetscFunctionBegin; 415 if(x) { 416 ierr = PetscObjectReference((PetscObject)x);CHKERRQ(ierr); 417 used_vec = x; 418 } else { 419 ierr = PetscObjectReference((PetscObject)pcbddc->temp_solution);CHKERRQ(ierr); 420 used_vec = pcbddc->temp_solution; 421 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 422 } 423 /* hack into ksp data structure PCPreSolve comes earlier in src/ksp/ksp/interface/itfunc.c */ 424 if (ksp) { 425 ierr = KSPGetInitialGuessNonzero(ksp,&guess_nonzero);CHKERRQ(ierr); 426 if( !guess_nonzero ) { 427 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 428 } 429 } 430 /* store the original rhs */ 431 ierr = VecCopy(rhs,pcbddc->original_rhs);CHKERRQ(ierr); 432 433 /* Take into account zeroed rows -> change rhs and store solution removed */ 434 ierr = MatGetDiagonal(pc->pmat,pcis->vec1_global);CHKERRQ(ierr); 435 ierr = VecPointwiseDivide(pcis->vec1_global,rhs,pcis->vec1_global);CHKERRQ(ierr); 436 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 437 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 438 ierr = VecScatterBegin(matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 439 ierr = VecScatterEnd (matis->ctx,used_vec,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 440 ierr = PCBDDCGetDirichletBoundaries(pc,&dirIS);CHKERRQ(ierr); 441 if(dirIS) { 442 ierr = ISGetSize(dirIS,&dirsize);CHKERRQ(ierr); 443 ierr = VecGetArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 444 ierr = VecGetArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 445 ierr = ISGetIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 446 for(i=0;i<dirsize;i++) { 447 array_x[is_indices[i]]=array_diagonal[is_indices[i]]; 448 } 449 ierr = ISRestoreIndices(dirIS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 450 ierr = VecRestoreArray(pcis->vec2_N,&array_diagonal);CHKERRQ(ierr); 451 ierr = VecRestoreArray(pcis->vec1_N,&array_x);CHKERRQ(ierr); 452 } 453 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 454 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 455 456 /* remove the computed solution from the rhs */ 457 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 458 ierr = MatMultAdd(pc->pmat,used_vec,rhs,rhs);CHKERRQ(ierr); 459 ierr = VecScale(used_vec,-1.0);CHKERRQ(ierr); 460 461 /* store partially computed solution and set initial guess */ 462 if(x) { 463 ierr = VecCopy(used_vec,pcbddc->temp_solution);CHKERRQ(ierr); 464 ierr = VecSet(used_vec,0.0);CHKERRQ(ierr); 465 if(pcbddc->use_exact_dirichlet) { 466 ierr = VecScatterBegin(pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 467 ierr = VecScatterEnd (pcis->global_to_D,rhs,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 468 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 469 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 470 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,used_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 471 if(ksp) { 472 ierr = KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);CHKERRQ(ierr); 473 } 474 } 475 } 476 ierr = VecDestroy(&used_vec);CHKERRQ(ierr); 477 478 /* rhs change of basis */ 479 if(pcbddc->usechangeofbasis) { 480 /* swap pointers for local matrices */ 481 temp_mat = matis->A; 482 matis->A = pcbddc->local_mat; 483 pcbddc->local_mat = temp_mat; 484 /* Get local rhs and apply transformation of basis */ 485 ierr = VecScatterBegin(pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = VecScatterEnd (pcis->global_to_B,rhs,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 /* from original basis to modified basis */ 488 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 489 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 490 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 491 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,rhs,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 492 } 493 PetscFunctionReturn(0); 494 } 495 /* -------------------------------------------------------------------------- */ 496 #undef __FUNCT__ 497 #define __FUNCT__ "PCPostSolve_BDDC" 498 /* -------------------------------------------------------------------------- */ 499 /* 500 PCPostSolve_BDDC - Changes the computed solution if a transformation of basis 501 approach has been selected. Also, restores rhs to its original state. 502 503 Input Parameter: 504 + pc - the preconditioner contex 505 506 Application Interface Routine: PCPostSolve() 507 508 Notes: 509 The interface routine PCPostSolve() is not usually called directly by 510 the user, but instead is called by KSPSolve(). 511 */ 512 static PetscErrorCode PCPostSolve_BDDC(PC pc, KSP ksp, Vec rhs, Vec x) 513 { 514 PetscErrorCode ierr; 515 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 516 PC_IS *pcis = (PC_IS*)(pc->data); 517 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 518 Mat temp_mat; 519 520 PetscFunctionBegin; 521 if(pcbddc->usechangeofbasis) { 522 /* swap pointers for local matrices */ 523 temp_mat = matis->A; 524 matis->A = pcbddc->local_mat; 525 pcbddc->local_mat = temp_mat; 526 /* restore rhs to its original state */ 527 if(rhs) { 528 ierr = VecCopy(pcbddc->original_rhs,rhs);CHKERRQ(ierr); 529 } 530 /* Get Local boundary and apply transformation of basis to solution vector */ 531 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 532 ierr = VecScatterEnd (pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 533 /* from modified basis to original basis */ 534 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 535 /* put back modified values into the global vec using INSERT_VALUES copy mode */ 536 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 537 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 538 } 539 /* add solution removed in presolve */ 540 if(x) { 541 ierr = VecAXPY(x,1.0,pcbddc->temp_solution);CHKERRQ(ierr); 542 } 543 PetscFunctionReturn(0); 544 } 545 /* -------------------------------------------------------------------------- */ 546 #undef __FUNCT__ 547 #define __FUNCT__ "PCSetUp_BDDC" 548 /* -------------------------------------------------------------------------- */ 549 /* 550 PCSetUp_BDDC - Prepares for the use of the BDDC preconditioner 551 by setting data structures and options. 552 553 Input Parameter: 554 + pc - the preconditioner context 555 556 Application Interface Routine: PCSetUp() 557 558 Notes: 559 The interface routine PCSetUp() is not usually called directly by 560 the user, but instead is called by PCApply() if necessary. 561 */ 562 PetscErrorCode PCSetUp_BDDC(PC pc) 563 { 564 PetscErrorCode ierr; 565 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 566 PC_IS *pcis = (PC_IS*)(pc->data); 567 568 PetscFunctionBegin; 569 if (!pc->setupcalled) { 570 /* For BDDC we need to define a local "Neumann" problem different to that defined in PCISSetup 571 So, we set to pcnone the Neumann problem of pcis in order to avoid unneeded computation 572 Also, we decide to directly build the (same) Dirichlet problem */ 573 ierr = PetscOptionsSetValue("-is_localN_pc_type","none");CHKERRQ(ierr); 574 ierr = PetscOptionsSetValue("-is_localD_pc_type","none");CHKERRQ(ierr); 575 /* Set up all the "iterative substructuring" common block */ 576 ierr = PCISSetUp(pc);CHKERRQ(ierr); 577 /* Get stdout for dbg */ 578 if(pcbddc->dbg_flag) { 579 ierr = PetscViewerASCIIGetStdout(((PetscObject)pc)->comm,&pcbddc->dbg_viewer);CHKERRQ(ierr); 580 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 581 } 582 /* TODO MOVE CODE FRAGMENT */ 583 PetscInt im_active=0; 584 if(pcis->n) im_active = 1; 585 ierr = MPI_Allreduce(&im_active,&pcbddc->active_procs,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 586 /* Analyze local interface */ 587 ierr = PCBDDCManageLocalBoundaries(pc);CHKERRQ(ierr); 588 /* Set up local constraint matrix */ 589 ierr = PCBDDCCreateConstraintMatrix(pc);CHKERRQ(ierr); 590 /* Create coarse and local stuffs used for evaluating action of preconditioner */ 591 ierr = PCBDDCCoarseSetUp(pc);CHKERRQ(ierr); 592 /* Processes fakely involved in multilevel should not call ISLocalToGlobalMappingRestoreInfo */ 593 if ( !pcis->n_neigh ) pcis->ISLocalToGlobalMappingGetInfoWasCalled=PETSC_FALSE; 594 } 595 PetscFunctionReturn(0); 596 } 597 598 /* -------------------------------------------------------------------------- */ 599 /* 600 PCApply_BDDC - Applies the BDDC preconditioner to a vector. 601 602 Input Parameters: 603 . pc - the preconditioner context 604 . r - input vector (global) 605 606 Output Parameter: 607 . z - output vector (global) 608 609 Application Interface Routine: PCApply() 610 */ 611 #undef __FUNCT__ 612 #define __FUNCT__ "PCApply_BDDC" 613 PetscErrorCode PCApply_BDDC(PC pc,Vec r,Vec z) 614 { 615 PC_IS *pcis = (PC_IS*)(pc->data); 616 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 617 PetscErrorCode ierr; 618 const PetscScalar one = 1.0; 619 const PetscScalar m_one = -1.0; 620 const PetscScalar zero = 0.0; 621 622 /* This code is similar to that provided in nn.c for PCNN 623 NN interface preconditioner changed to BDDC 624 Added support for M_3 preconditioenr in the reference article (code is active if pcbddc->prec_type = PETSC_TRUE) */ 625 626 PetscFunctionBegin; 627 if(!pcbddc->use_exact_dirichlet) { 628 /* First Dirichlet solve */ 629 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 630 ierr = VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 631 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 632 /* 633 Assembling right hand side for BDDC operator 634 - vec1_D for the Dirichlet part (if needed, i.e. prec_flag=PETSC_TRUE) 635 - the interface part of the global vector z 636 */ 637 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 638 ierr = MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);CHKERRQ(ierr); 639 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec2_D,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 640 ierr = VecScale(pcis->vec2_D,m_one);CHKERRQ(ierr); 641 ierr = VecCopy(r,z);CHKERRQ(ierr); 642 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 643 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 644 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 645 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 646 } else { 647 ierr = VecScatterBegin(pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 648 ierr = VecScatterEnd (pcis->global_to_B,r,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 649 ierr = VecSet(pcis->vec2_D,zero);CHKERRQ(ierr); 650 } 651 652 /* Apply partition of unity */ 653 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 654 655 /* Apply interface preconditioner 656 input/output vecs: pcis->vec1_B and pcis->vec1_D */ 657 ierr = PCBDDCApplyInterfacePreconditioner(pc);CHKERRQ(ierr); 658 659 /* Apply partition of unity and sum boundary values */ 660 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 661 ierr = VecSet(z,zero);CHKERRQ(ierr); 662 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 663 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,z,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 664 665 /* Second Dirichlet solve and assembling of output */ 666 ierr = VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 667 ierr = VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec3_D);CHKERRQ(ierr); 669 if(pcbddc->prec_type) { ierr = MatMultAdd(pcis->A_II,pcis->vec1_D,pcis->vec3_D,pcis->vec3_D);CHKERRQ(ierr); } 670 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec3_D,pcbddc->vec4_D);CHKERRQ(ierr); 671 ierr = VecScale(pcbddc->vec4_D,m_one);CHKERRQ(ierr); 672 if(pcbddc->prec_type) { ierr = VecAXPY (pcbddc->vec4_D,one,pcis->vec1_D);CHKERRQ(ierr); } 673 ierr = VecAXPY (pcis->vec2_D,one,pcbddc->vec4_D);CHKERRQ(ierr); 674 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 675 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 678 } 679 /* -------------------------------------------------------------------------- */ 680 #undef __FUNCT__ 681 #define __FUNCT__ "PCDestroy_BDDC" 682 PetscErrorCode PCDestroy_BDDC(PC pc) 683 { 684 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 /* free data created by PCIS */ 689 ierr = PCISDestroy(pc);CHKERRQ(ierr); 690 /* free BDDC data */ 691 ierr = VecDestroy(&pcbddc->temp_solution);CHKERRQ(ierr); 692 ierr = VecDestroy(&pcbddc->original_rhs);CHKERRQ(ierr); 693 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 694 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 695 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 696 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 697 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 701 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 702 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 703 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 704 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 705 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 706 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 707 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 708 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 709 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 710 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 711 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 712 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 713 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 714 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 715 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 716 ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr); 717 ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 718 if (pcbddc->replicated_local_primal_values) { free(pcbddc->replicated_local_primal_values); } 719 ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr); 720 ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr); 721 PetscInt i; 722 for(i=0;i<pcbddc->n_ISForDofs;i++) { ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); } 723 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 724 for(i=0;i<pcbddc->n_ISForFaces;i++) { ierr = ISDestroy(&pcbddc->ISForFaces[i]);CHKERRQ(ierr); } 725 ierr = PetscFree(pcbddc->ISForFaces);CHKERRQ(ierr); 726 for(i=0;i<pcbddc->n_ISForEdges;i++) { ierr = ISDestroy(&pcbddc->ISForEdges[i]);CHKERRQ(ierr); } 727 ierr = PetscFree(pcbddc->ISForEdges);CHKERRQ(ierr); 728 ierr = ISDestroy(&pcbddc->ISForVertices);CHKERRQ(ierr); 729 /* Free graph structure */ 730 ierr = PetscFree(pcbddc->mat_graph->xadj);CHKERRQ(ierr); 731 ierr = PetscFree(pcbddc->mat_graph->adjncy);CHKERRQ(ierr); 732 ierr = PetscFree(pcbddc->mat_graph->neighbours_set[0]);CHKERRQ(ierr); 733 ierr = PetscFree(pcbddc->mat_graph->neighbours_set);CHKERRQ(ierr); 734 ierr = PetscFree4(pcbddc->mat_graph->where,pcbddc->mat_graph->count,pcbddc->mat_graph->cptr,pcbddc->mat_graph->queue);CHKERRQ(ierr); 735 ierr = PetscFree2(pcbddc->mat_graph->which_dof,pcbddc->mat_graph->touched);CHKERRQ(ierr); 736 ierr = PetscFree(pcbddc->mat_graph->where_ncmps);CHKERRQ(ierr); 737 ierr = PetscFree(pcbddc->mat_graph);CHKERRQ(ierr); 738 /* remove functions */ 739 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 741 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 742 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","",PETSC_NULL);CHKERRQ(ierr); 743 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","",PETSC_NULL);CHKERRQ(ierr); 744 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","",PETSC_NULL);CHKERRQ(ierr); 745 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","",PETSC_NULL);CHKERRQ(ierr); 746 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","",PETSC_NULL);CHKERRQ(ierr); 747 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","",PETSC_NULL);CHKERRQ(ierr); 748 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","",PETSC_NULL);CHKERRQ(ierr); 749 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","",PETSC_NULL);CHKERRQ(ierr); 750 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","",PETSC_NULL);CHKERRQ(ierr); 751 /* Free the private data structure that was hanging off the PC */ 752 ierr = PetscFree(pcbddc);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 /* -------------------------------------------------------------------------- */ 756 EXTERN_C_BEGIN 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS_BDDC" 759 static PetscErrorCode PCBDDCMatFETIDPGetRHS_BDDC(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 760 { 761 FETIDPMat_ctx *mat_ctx; 762 PC_IS* pcis; 763 PC_BDDC* pcbddc; 764 Mat_IS* matis; 765 PetscErrorCode ierr; 766 767 PetscFunctionBegin; 768 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 769 pcis = (PC_IS*)mat_ctx->pc->data; 770 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 771 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 772 773 /* change of basis for physical rhs if needed 774 It also changes the rhs in case of dirichlet boundaries */ 775 (*mat_ctx->pc->ops->presolve)(mat_ctx->pc,PETSC_NULL,standard_rhs,PETSC_NULL); 776 /* store vectors for computation of fetidp final solution */ 777 ierr = VecScatterBegin(pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 778 ierr = VecScatterEnd (pcis->global_to_D,standard_rhs,mat_ctx->temp_solution_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 779 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 780 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 781 /* scale rhs since it should be unassembled */ 782 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 783 if(!pcbddc->prec_type) { 784 /* compute partially subassembled Schur complement right-hand side */ 785 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 786 ierr = MatMult(pcis->A_BI,pcis->vec1_D,pcis->vec1_B);CHKERRQ(ierr); 787 ierr = VecAXPY(mat_ctx->temp_solution_B,-1.0,pcis->vec1_B);CHKERRQ(ierr); 788 ierr = VecSet(standard_rhs,0.0);CHKERRQ(ierr); 789 ierr = VecScatterBegin(pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 790 ierr = VecScatterEnd (pcis->global_to_B,mat_ctx->temp_solution_B,standard_rhs,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 791 ierr = VecScatterBegin(pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterEnd (pcis->global_to_B,standard_rhs,mat_ctx->temp_solution_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793 ierr = VecPointwiseMult(mat_ctx->temp_solution_B,pcis->D,mat_ctx->temp_solution_B);CHKERRQ(ierr); 794 } 795 /* BDDC rhs */ 796 ierr = VecCopy(mat_ctx->temp_solution_B,pcis->vec1_B);CHKERRQ(ierr); 797 if(pcbddc->prec_type) { 798 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 799 } 800 /* apply BDDC */ 801 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 802 /* Application of B_delta and assembling of rhs for fetidp fluxes */ 803 ierr = VecSet(fetidp_flux_rhs,0.0);CHKERRQ(ierr); 804 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 805 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterEnd (mat_ctx->l2g_lambda,mat_ctx->lambda_local,fetidp_flux_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 /* restore original rhs */ 808 ierr = VecCopy(pcbddc->original_rhs,standard_rhs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 EXTERN_C_END 812 #undef __FUNCT__ 813 #define __FUNCT__ "PCBDDCMatFETIDPGetRHS" 814 /*@ 815 PCBDDCMatFETIDPGetRHS - Get rhs for FETIDP linear system. 816 817 Collective 818 819 Input Parameters: 820 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 821 + standard_rhs - the rhs of your linear system 822 823 Output Parameters: 824 + fetidp_flux_rhs - the rhs of the FETIDP linear system 825 826 Level: developer 827 828 Notes: 829 830 .seealso: PCBDDC 831 @*/ 832 PetscErrorCode PCBDDCMatFETIDPGetRHS(Mat fetidp_mat, Vec standard_rhs, Vec fetidp_flux_rhs) 833 { 834 FETIDPMat_ctx *mat_ctx; 835 PetscErrorCode ierr; 836 837 PetscFunctionBegin; 838 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 839 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetRHS_C",(Mat,Vec,Vec),(fetidp_mat,standard_rhs,fetidp_flux_rhs));CHKERRQ(ierr); 840 PetscFunctionReturn(0); 841 } 842 /* -------------------------------------------------------------------------- */ 843 EXTERN_C_BEGIN 844 #undef __FUNCT__ 845 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution_BDDC" 846 static PetscErrorCode PCBDDCMatFETIDPGetSolution_BDDC(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 847 { 848 FETIDPMat_ctx *mat_ctx; 849 PC_IS* pcis; 850 PC_BDDC* pcbddc; 851 Mat_IS* matis; 852 PetscErrorCode ierr; 853 854 PetscFunctionBegin; 855 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 856 pcis = (PC_IS*)mat_ctx->pc->data; 857 pcbddc = (PC_BDDC*)mat_ctx->pc->data; 858 matis = (Mat_IS*)mat_ctx->pc->pmat->data; 859 860 /* apply B_delta^T */ 861 ierr = VecScatterBegin(mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 862 ierr = VecScatterEnd (mat_ctx->l2g_lambda,fetidp_flux_sol,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 863 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 864 /* compute rhs for BDDC application */ 865 ierr = VecAYPX(pcis->vec1_B,-1.0,mat_ctx->temp_solution_B);CHKERRQ(ierr); 866 if(pcbddc->prec_type) { 867 ierr = VecCopy(mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 868 } 869 /* apply BDDC */ 870 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 871 /* put values into standard global vector */ 872 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 873 ierr = VecScatterEnd (pcis->global_to_B,pcis->vec1_B,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 874 if(!pcbddc->prec_type) { 875 /* compute values into the interior if solved for the partially subassembled Schur complement */ 876 ierr = MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);CHKERRQ(ierr); 877 ierr = VecAXPY(mat_ctx->temp_solution_D,-1.0,pcis->vec1_D);CHKERRQ(ierr); 878 ierr = KSPSolve(pcbddc->ksp_D,mat_ctx->temp_solution_D,pcis->vec1_D);CHKERRQ(ierr); 879 } 880 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 881 ierr = VecScatterEnd (pcis->global_to_D,pcis->vec1_D,standard_sol,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 882 /* final change of basis if needed 883 Is also sums the dirichlet part removed during RHS assembling */ 884 (*mat_ctx->pc->ops->postsolve)(mat_ctx->pc,PETSC_NULL,PETSC_NULL,standard_sol); 885 PetscFunctionReturn(0); 886 887 } 888 EXTERN_C_END 889 #undef __FUNCT__ 890 #define __FUNCT__ "PCBDDCMatFETIDPGetSolution" 891 /*@ 892 PCBDDCMatFETIDPGetSolution - Get Solution for FETIDP linear system. 893 894 Collective 895 896 Input Parameters: 897 + fetidp_mat - the FETIDP mat obtained by a call to PCBDDCCreateFETIDPOperators 898 + fetidp_flux_sol - the solution of the FETIDP linear system 899 900 Output Parameters: 901 + standard_sol - the solution on the global domain 902 903 Level: developer 904 905 Notes: 906 907 .seealso: PCBDDC 908 @*/ 909 PetscErrorCode PCBDDCMatFETIDPGetSolution(Mat fetidp_mat, Vec fetidp_flux_sol, Vec standard_sol) 910 { 911 FETIDPMat_ctx *mat_ctx; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 ierr = MatShellGetContext(fetidp_mat,&mat_ctx);CHKERRQ(ierr); 916 ierr = PetscTryMethod(mat_ctx->pc,"PCBDDCMatFETIDPGetSolution_C",(Mat,Vec,Vec),(fetidp_mat,fetidp_flux_sol,standard_sol));CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 /* -------------------------------------------------------------------------- */ 920 EXTERN_C_BEGIN 921 #undef __FUNCT__ 922 #define __FUNCT__ "PCBDDCCreateFETIDPOperators_BDDC" 923 static PetscErrorCode PCBDDCCreateFETIDPOperators_BDDC(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 924 { 925 PETSC_EXTERN PetscErrorCode FETIDPMatMult(Mat,Vec,Vec); 926 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPMat(Mat); 927 PETSC_EXTERN PetscErrorCode FETIDPPCApply(PC,Vec,Vec); 928 PETSC_EXTERN PetscErrorCode PCBDDCDestroyFETIDPPC(PC); 929 930 FETIDPMat_ctx *fetidpmat_ctx; 931 Mat newmat; 932 FETIDPPC_ctx *fetidppc_ctx; 933 PC newpc; 934 MPI_Comm comm = ((PetscObject)pc)->comm; 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 /* FETIDP linear matrix */ 939 ierr = PCBDDCCreateFETIDPMatContext(pc, &fetidpmat_ctx);CHKERRQ(ierr); 940 ierr = PCBDDCSetupFETIDPMatContext(fetidpmat_ctx);CHKERRQ(ierr); 941 ierr = MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,fetidpmat_ctx->n_lambda,fetidpmat_ctx->n_lambda,fetidpmat_ctx,&newmat);CHKERRQ(ierr); 942 ierr = MatShellSetOperation(newmat,MATOP_MULT,(void (*)(void))FETIDPMatMult);CHKERRQ(ierr); 943 ierr = MatShellSetOperation(newmat,MATOP_DESTROY,(void (*)(void))PCBDDCDestroyFETIDPMat);CHKERRQ(ierr); 944 ierr = MatSetUp(newmat);CHKERRQ(ierr); 945 /* FETIDP preconditioner */ 946 ierr = PCBDDCCreateFETIDPPCContext(pc, &fetidppc_ctx);CHKERRQ(ierr); 947 ierr = PCBDDCSetupFETIDPPCContext(newmat,fetidppc_ctx);CHKERRQ(ierr); 948 ierr = PCCreate(comm,&newpc);CHKERRQ(ierr); 949 ierr = PCSetType(newpc,PCSHELL);CHKERRQ(ierr); 950 ierr = PCShellSetContext(newpc,fetidppc_ctx);CHKERRQ(ierr); 951 ierr = PCShellSetApply(newpc,FETIDPPCApply);CHKERRQ(ierr); 952 ierr = PCShellSetDestroy(newpc,PCBDDCDestroyFETIDPPC);CHKERRQ(ierr); 953 ierr = PCSetOperators(newpc,newmat,newmat,SAME_PRECONDITIONER);CHKERRQ(ierr); 954 ierr = PCSetUp(newpc);CHKERRQ(ierr); 955 /* return pointers for objects created */ 956 *fetidp_mat=newmat; 957 *fetidp_pc=newpc; 958 959 PetscFunctionReturn(0); 960 } 961 EXTERN_C_END 962 #undef __FUNCT__ 963 #define __FUNCT__ "PCBDDCCreateFETIDPOperators" 964 /*@ 965 PCBDDCCreateFETIDPOperators - Create operators for FETIDP. 966 967 Collective 968 969 Input Parameters: 970 + pc - the BDDC preconditioning context (setup must be already called) 971 972 Level: developer 973 974 Notes: 975 976 .seealso: PCBDDC 977 @*/ 978 PetscErrorCode PCBDDCCreateFETIDPOperators(PC pc, Mat *fetidp_mat, PC *fetidp_pc) 979 { 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 984 if (pc->setupcalled) { 985 ierr = PetscTryMethod(pc,"PCBDDCCreateFETIDPOperators_C",(PC,Mat*,PC*),(pc,fetidp_mat,fetidp_pc));CHKERRQ(ierr); 986 } else { 987 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"You must call PCSetup_BDDC before calling %s\n",__FUNCT__); 988 } 989 PetscFunctionReturn(0); 990 } 991 /* -------------------------------------------------------------------------- */ 992 /*MC 993 PCBDDC - Balancing Domain Decomposition by Constraints. 994 995 Options Database Keys: 996 . -pcbddc ??? - 997 998 Level: intermediate 999 1000 Notes: The matrix used with this preconditioner must be of type MATIS 1001 1002 Unlike more 'conventional' interface preconditioners, this iterates over ALL the 1003 degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers 1004 on the subdomains). 1005 1006 Options for the coarse grid preconditioner can be set with - 1007 Options for the Dirichlet subproblem can be set with - 1008 Options for the Neumann subproblem can be set with - 1009 1010 Contributed by Stefano Zampini 1011 1012 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS 1013 M*/ 1014 EXTERN_C_BEGIN 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "PCCreate_BDDC" 1017 PetscErrorCode PCCreate_BDDC(PC pc) 1018 { 1019 PetscErrorCode ierr; 1020 PC_BDDC *pcbddc; 1021 PCBDDCGraph mat_graph; 1022 1023 PetscFunctionBegin; 1024 /* Creates the private data structure for this preconditioner and attach it to the PC object. */ 1025 ierr = PetscNewLog(pc,PC_BDDC,&pcbddc);CHKERRQ(ierr); 1026 pc->data = (void*)pcbddc; 1027 1028 /* create PCIS data structure */ 1029 ierr = PCISCreate(pc);CHKERRQ(ierr); 1030 1031 /* BDDC specific */ 1032 pcbddc->temp_solution = 0; 1033 pcbddc->original_rhs = 0; 1034 pcbddc->local_mat = 0; 1035 pcbddc->ChangeOfBasisMatrix = 0; 1036 pcbddc->usechangeofbasis = PETSC_TRUE; 1037 pcbddc->usechangeonfaces = PETSC_FALSE; 1038 pcbddc->coarse_vec = 0; 1039 pcbddc->coarse_rhs = 0; 1040 pcbddc->coarse_ksp = 0; 1041 pcbddc->coarse_phi_B = 0; 1042 pcbddc->coarse_phi_D = 0; 1043 pcbddc->vec1_P = 0; 1044 pcbddc->vec1_R = 0; 1045 pcbddc->vec2_R = 0; 1046 pcbddc->local_auxmat1 = 0; 1047 pcbddc->local_auxmat2 = 0; 1048 pcbddc->R_to_B = 0; 1049 pcbddc->R_to_D = 0; 1050 pcbddc->ksp_D = 0; 1051 pcbddc->ksp_R = 0; 1052 pcbddc->local_primal_indices = 0; 1053 pcbddc->prec_type = PETSC_FALSE; 1054 pcbddc->NeumannBoundaries = 0; 1055 pcbddc->ISForDofs = 0; 1056 pcbddc->ISForVertices = 0; 1057 pcbddc->n_ISForFaces = 0; 1058 pcbddc->n_ISForEdges = 0; 1059 pcbddc->ConstraintMatrix = 0; 1060 pcbddc->use_nnsp_true = PETSC_FALSE; 1061 pcbddc->local_primal_sizes = 0; 1062 pcbddc->local_primal_displacements = 0; 1063 pcbddc->replicated_local_primal_indices = 0; 1064 pcbddc->replicated_local_primal_values = 0; 1065 pcbddc->coarse_loc_to_glob = 0; 1066 pcbddc->dbg_flag = PETSC_FALSE; 1067 pcbddc->coarsening_ratio = 8; 1068 pcbddc->use_exact_dirichlet = PETSC_TRUE; 1069 1070 /* allocate and initialize needed graph structure */ 1071 ierr = PetscMalloc(sizeof(*mat_graph),&pcbddc->mat_graph);CHKERRQ(ierr); 1072 pcbddc->mat_graph->xadj = 0; 1073 pcbddc->mat_graph->adjncy = 0; 1074 1075 /* function pointers */ 1076 pc->ops->apply = PCApply_BDDC; 1077 pc->ops->applytranspose = 0; 1078 pc->ops->setup = PCSetUp_BDDC; 1079 pc->ops->destroy = PCDestroy_BDDC; 1080 pc->ops->setfromoptions = PCSetFromOptions_BDDC; 1081 pc->ops->view = 0; 1082 pc->ops->applyrichardson = 0; 1083 pc->ops->applysymmetricleft = 0; 1084 pc->ops->applysymmetricright = 0; 1085 pc->ops->presolve = PCPreSolve_BDDC; 1086 pc->ops->postsolve = PCPostSolve_BDDC; 1087 1088 /* composing function */ 1089 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDirichletBoundaries_C","PCBDDCSetDirichletBoundaries_BDDC", 1090 PCBDDCSetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1091 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetNeumannBoundaries_C","PCBDDCSetNeumannBoundaries_BDDC", 1092 PCBDDCSetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1093 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetDirichletBoundaries_C","PCBDDCGetDirichletBoundaries_BDDC", 1094 PCBDDCGetDirichletBoundaries_BDDC);CHKERRQ(ierr); 1095 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCGetNeumannBoundaries_C","PCBDDCGetNeumannBoundaries_BDDC", 1096 PCBDDCGetNeumannBoundaries_BDDC);CHKERRQ(ierr); 1097 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetCoarseProblemType_C","PCBDDCSetCoarseProblemType_BDDC", 1098 PCBDDCSetCoarseProblemType_BDDC);CHKERRQ(ierr); 1099 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetDofsSplitting_C","PCBDDCSetDofsSplitting_BDDC", 1100 PCBDDCSetDofsSplitting_BDDC);CHKERRQ(ierr); 1101 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCSetLocalAdjacencyGraph_C","PCBDDCSetLocalAdjacencyGraph_BDDC", 1102 PCBDDCSetLocalAdjacencyGraph_BDDC);CHKERRQ(ierr); 1103 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPreSolve_C","PCPreSolve_BDDC", 1104 PCPreSolve_BDDC);CHKERRQ(ierr); 1105 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCPostSolve_C","PCPostSolve_BDDC", 1106 PCPostSolve_BDDC);CHKERRQ(ierr); 1107 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCCreateFETIDPOperators_C","PCBDDCCreateFETIDPOperators_BDDC", 1108 PCBDDCCreateFETIDPOperators_BDDC);CHKERRQ(ierr); 1109 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetRHS_C","PCBDDCMatFETIDPGetRHS_BDDC", 1110 PCBDDCMatFETIDPGetRHS_BDDC);CHKERRQ(ierr); 1111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCBDDCMatFETIDPGetSolution_C","PCBDDCMatFETIDPGetSolution_BDDC", 1112 PCBDDCMatFETIDPGetSolution_BDDC);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 EXTERN_C_END 1116 1117 /* -------------------------------------------------------------------------- */ 1118 /* All static functions from now on */ 1119 /* -------------------------------------------------------------------------- */ 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "PCBDDCCreateFETIDPMatContext" 1123 static PetscErrorCode PCBDDCCreateFETIDPMatContext(PC pc, FETIDPMat_ctx **fetidpmat_ctx) 1124 { 1125 FETIDPMat_ctx *newctx; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1130 newctx->lambda_local = 0; 1131 newctx->temp_solution_B = 0; 1132 newctx->temp_solution_D = 0; 1133 newctx->B_delta = 0; 1134 newctx->B_Ddelta = 0; /* theoretically belongs to the FETIDP preconditioner */ 1135 newctx->l2g_lambda = 0; 1136 /* increase the reference count for BDDC preconditioner */ 1137 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1138 newctx->pc = pc; 1139 *fetidpmat_ctx = newctx; 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "PCBDDCCreateFETIDPPCContext" 1145 static PetscErrorCode PCBDDCCreateFETIDPPCContext(PC pc, FETIDPPC_ctx **fetidppc_ctx) 1146 { 1147 FETIDPPC_ctx *newctx; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = PetscMalloc(sizeof(*newctx),&newctx);CHKERRQ(ierr); 1152 newctx->lambda_local = 0; 1153 newctx->B_Ddelta = 0; 1154 newctx->l2g_lambda = 0; 1155 /* increase the reference count for BDDC preconditioner */ 1156 ierr = PetscObjectReference((PetscObject)pc);CHKERRQ(ierr); 1157 newctx->pc = pc; 1158 *fetidppc_ctx = newctx; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "PCBDDCDestroyFETIDPMat" 1164 static PetscErrorCode PCBDDCDestroyFETIDPMat(Mat A) 1165 { 1166 FETIDPMat_ctx *mat_ctx; 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = MatShellGetContext(A,(void**)&mat_ctx);CHKERRQ(ierr); 1171 ierr = VecDestroy(&mat_ctx->lambda_local);CHKERRQ(ierr); 1172 ierr = VecDestroy(&mat_ctx->temp_solution_D);CHKERRQ(ierr); 1173 ierr = VecDestroy(&mat_ctx->temp_solution_B);CHKERRQ(ierr); 1174 ierr = MatDestroy(&mat_ctx->B_delta);CHKERRQ(ierr); 1175 ierr = MatDestroy(&mat_ctx->B_Ddelta);CHKERRQ(ierr); 1176 ierr = VecScatterDestroy(&mat_ctx->l2g_lambda);CHKERRQ(ierr); 1177 ierr = PCDestroy(&mat_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1178 ierr = PetscFree(mat_ctx);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "PCBDDCDestroyFETIDPPC" 1184 static PetscErrorCode PCBDDCDestroyFETIDPPC(PC pc) 1185 { 1186 FETIDPPC_ctx *pc_ctx; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 ierr = PCShellGetContext(pc,(void**)&pc_ctx);CHKERRQ(ierr); 1191 ierr = VecDestroy(&pc_ctx->lambda_local);CHKERRQ(ierr); 1192 ierr = MatDestroy(&pc_ctx->B_Ddelta);CHKERRQ(ierr); 1193 ierr = VecScatterDestroy(&pc_ctx->l2g_lambda);CHKERRQ(ierr); 1194 ierr = PCDestroy(&pc_ctx->pc);CHKERRQ(ierr); /* actually it does not destroy BDDC, only decrease its reference count */ 1195 ierr = PetscFree(pc_ctx);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCSetupFETIDPMatContext" 1201 static PetscErrorCode PCBDDCSetupFETIDPMatContext(FETIDPMat_ctx *fetidpmat_ctx ) 1202 { 1203 PetscErrorCode ierr; 1204 PC_IS *pcis=(PC_IS*)fetidpmat_ctx->pc->data; 1205 PC_BDDC *pcbddc=(PC_BDDC*)fetidpmat_ctx->pc->data; 1206 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1207 Mat_IS *matis = (Mat_IS*)fetidpmat_ctx->pc->pmat->data; 1208 MPI_Comm comm = ((PetscObject)(fetidpmat_ctx->pc))->comm; 1209 1210 Mat ScalingMat; 1211 Vec lambda_global; 1212 IS IS_l2g_lambda; 1213 1214 PetscBool skip_node,fully_redundant; 1215 PetscInt i,j,k,s,n_boundary_dofs,sum_dof_sizes,n_global_lambda,n_vertices; 1216 PetscInt n_local_lambda,n_lambda_for_dof,dual_size,n_neg_values,n_pos_values; 1217 PetscMPIInt rank,nprocs,partial_sum; 1218 PetscScalar scalar_value; 1219 1220 PetscInt *vertex_indices,*temp_indices; 1221 PetscInt *dual_dofs_boundary_indices,*aux_local_numbering_1,*aux_global_numbering; 1222 PetscInt *aux_sums,*cols_B_delta,*l2g_indices; 1223 PetscMPIInt *aux_local_numbering_2,*aux_global_numbering_mpi,*dof_sizes,*dof_displs; 1224 PetscMPIInt *all_aux_global_numbering_mpi_1,*all_aux_global_numbering_mpi_2,*global_dofs_numbering; 1225 PetscScalar *array,*scaling_factors,*vals_B_delta; 1226 1227 /* For communication of scaling factors */ 1228 PetscInt *ptrs_buffer,neigh_position; 1229 PetscScalar **all_factors,*send_buffer,*recv_buffer; 1230 MPI_Request *send_reqs,*recv_reqs; 1231 1232 /* tests */ 1233 Vec test_vec; 1234 PetscBool test_fetidp; 1235 PetscViewer viewer; 1236 1237 PetscFunctionBegin; 1238 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1239 ierr = MPI_Comm_size(comm,&nprocs);CHKERRQ(ierr); 1240 1241 /* Default type of lagrange multipliers is non-redundant */ 1242 fully_redundant = PETSC_FALSE; 1243 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_fullyredundant",&fully_redundant,PETSC_NULL);CHKERRQ(ierr); 1244 1245 /* Evaluate local and global number of lagrange multipliers */ 1246 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1247 n_local_lambda = 0; 1248 partial_sum = 0; 1249 n_boundary_dofs = 0; 1250 s = 0; 1251 n_vertices = 0; 1252 /* Get Vertices used to define the BDDC */ 1253 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(*vertex_indices),&vertex_indices);CHKERRQ(ierr); 1254 for(i=0;i<pcbddc->local_primal_size;i++) { 1255 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1256 if(j == 1) { 1257 vertex_indices[n_vertices]=temp_indices[0]; 1258 n_vertices++; 1259 } 1260 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&j,(const PetscInt**)&temp_indices,PETSC_NULL);CHKERRQ(ierr); 1261 } 1262 dual_size = pcis->n_B-n_vertices; 1263 ierr = PetscMalloc(dual_size*sizeof(*dual_dofs_boundary_indices),&dual_dofs_boundary_indices);CHKERRQ(ierr); 1264 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_1),&aux_local_numbering_1);CHKERRQ(ierr); 1265 ierr = PetscMalloc(dual_size*sizeof(*aux_local_numbering_2),&aux_local_numbering_2);CHKERRQ(ierr); 1266 1267 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1268 for(i=0;i<pcis->n;i++){ 1269 j = mat_graph->count[i]; /* RECALL: mat_graph->count[i] does not count myself */ 1270 k = 0; 1271 if(j > 0) { 1272 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1273 } 1274 j = j - k ; 1275 if( j > 0 ) { n_boundary_dofs++; } 1276 1277 skip_node = PETSC_FALSE; 1278 if( s < n_vertices && vertex_indices[s]==i) { /* it works for a sorted set of vertices */ 1279 skip_node = PETSC_TRUE; 1280 s++; 1281 } 1282 if(j < 1) {skip_node = PETSC_TRUE;} 1283 if( !skip_node ) { 1284 if(fully_redundant) { 1285 /* fully redundant set of lagrange multipliers */ 1286 n_lambda_for_dof = (j*(j+1))/2; 1287 } else { 1288 n_lambda_for_dof = j; 1289 } 1290 n_local_lambda += j; 1291 /* needed to evaluate global number of lagrange multipliers */ 1292 array[i]=(1.0*n_lambda_for_dof)/(j+1.0); /* already scaled for the next global sum */ 1293 /* store some data needed */ 1294 dual_dofs_boundary_indices[partial_sum] = n_boundary_dofs-1; 1295 aux_local_numbering_1[partial_sum] = i; 1296 aux_local_numbering_2[partial_sum] = (PetscMPIInt)n_lambda_for_dof; 1297 partial_sum++; 1298 } 1299 } 1300 /*printf("I found %d local lambda dofs\n",n_local_lambda); 1301 printf("I found %d boundary dofs (should be %d)\n",n_boundary_dofs,pcis->n_B); 1302 printf("Partial sum %d should be %d\n",partial_sum,dual_size);*/ 1303 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1304 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1305 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1306 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1307 ierr = VecSum(pcis->vec1_global,&scalar_value);CHKERRQ(ierr); 1308 fetidpmat_ctx->n_lambda = (PetscInt) scalar_value; 1309 /* printf("I found %d global multipliers (%f)\n",fetidpmat_ctx->n_lambda,scalar_value); */ 1310 ierr = VecCreate(PETSC_COMM_SELF,&fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1311 ierr = VecSetSizes(fetidpmat_ctx->lambda_local,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1312 ierr = VecSetType(fetidpmat_ctx->lambda_local,VECSEQ);CHKERRQ(ierr); 1313 ierr = VecCreate(comm,&lambda_global);CHKERRQ(ierr); 1314 ierr = VecSetSizes(lambda_global,PETSC_DECIDE,fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1315 ierr = VecSetType(lambda_global,VECMPI);CHKERRQ(ierr); 1316 1317 /* compute global ordering of lagrange multipliers and associate l2g map */ 1318 1319 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering),&aux_global_numbering);CHKERRQ(ierr); 1320 ierr = PetscMalloc(dual_size*sizeof(*aux_global_numbering_mpi),&aux_global_numbering_mpi);CHKERRQ(ierr); 1321 j = (rank == 0 ? nprocs : 0); 1322 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 1323 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 1324 ierr = ISLocalToGlobalMappingApply(matis->mapping,dual_size,aux_local_numbering_1,aux_global_numbering);CHKERRQ(ierr); 1325 ierr = MPI_Gather(&dual_size,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 1326 sum_dof_sizes=0; 1327 if ( rank == 0 ) { 1328 dof_displs[0]=0; 1329 sum_dof_sizes=dual_size; 1330 for(i=1;i<nprocs;i++) { 1331 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 1332 sum_dof_sizes += dof_sizes[i]; 1333 } 1334 } 1335 for(i=0;i<dual_size;i++) { 1336 aux_global_numbering_mpi[i]=(PetscMPIInt)aux_global_numbering[i]; 1337 } 1338 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_1),&all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1339 ierr = PetscMalloc(sum_dof_sizes*sizeof(*all_aux_global_numbering_mpi_2),&all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1340 ierr = MPI_Gatherv(aux_global_numbering_mpi,dual_size,MPIU_INT,all_aux_global_numbering_mpi_1,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1341 ierr = MPI_Gatherv(aux_local_numbering_2,dual_size,MPIU_INT,all_aux_global_numbering_mpi_2,dof_sizes,dof_displs,MPIU_INT,0,comm);CHKERRQ(ierr); 1342 1343 ierr = PetscMalloc(fetidpmat_ctx->n_lambda*sizeof(*global_dofs_numbering),&global_dofs_numbering);CHKERRQ(ierr); 1344 if( rank == 0 ) { 1345 ierr = PetscSortMPIIntWithArray(sum_dof_sizes,all_aux_global_numbering_mpi_1,all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1346 j=-1; 1347 partial_sum = 0; 1348 for(i=0;i<sum_dof_sizes;i++) { 1349 if(j != all_aux_global_numbering_mpi_1[i] ) { 1350 j=all_aux_global_numbering_mpi_1[i]; 1351 for(k=0;k<all_aux_global_numbering_mpi_2[i];k++) { 1352 global_dofs_numbering[partial_sum+k]=all_aux_global_numbering_mpi_1[i]; 1353 } 1354 partial_sum += all_aux_global_numbering_mpi_2[i]; 1355 } 1356 } 1357 /* printf("Partial sum for global dofs %d should be %d\n",partial_sum,fetidpmat_ctx->n_lambda); */ 1358 } 1359 ierr = MPI_Bcast(global_dofs_numbering,fetidpmat_ctx->n_lambda,MPIU_INT,0,comm);CHKERRQ(ierr); 1360 1361 /* init data for scaling factors exchange */ 1362 partial_sum = 0; 1363 j = 0; 1364 ierr = PetscMalloc( pcis->n_neigh*sizeof(PetscInt),&ptrs_buffer);CHKERRQ(ierr); 1365 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&send_reqs);CHKERRQ(ierr); 1366 ierr = PetscMalloc( (pcis->n_neigh-1)*sizeof(MPI_Request),&recv_reqs);CHKERRQ(ierr); 1367 ierr = PetscMalloc( pcis->n*sizeof(PetscScalar*),&all_factors);CHKERRQ(ierr); 1368 ptrs_buffer[0]=0; 1369 for(i=1;i<pcis->n_neigh;i++) { 1370 partial_sum += pcis->n_shared[i]; 1371 ptrs_buffer[i] = ptrs_buffer[i-1]+pcis->n_shared[i]; 1372 } 1373 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&send_buffer);CHKERRQ(ierr); 1374 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&recv_buffer);CHKERRQ(ierr); 1375 ierr = PetscMalloc( partial_sum*sizeof(PetscScalar),&all_factors[0]);CHKERRQ(ierr); 1376 for(i=0;i<pcis->n-1;i++) { 1377 j = mat_graph->count[i]; 1378 if(j>0) { 1379 k = (mat_graph->neighbours_set[i][0] == -1 ? 1 : 0); 1380 j = j - k; 1381 } 1382 all_factors[i+1]=all_factors[i]+j; 1383 } 1384 /* scatter B scaling to N vec */ 1385 ierr = VecScatterBegin(pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1386 ierr = VecScatterEnd (pcis->N_to_B,pcis->D,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1387 /* communications */ 1388 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1389 for(i=1;i<pcis->n_neigh;i++) { 1390 for(j=0;j<pcis->n_shared[i];j++) { 1391 send_buffer[ptrs_buffer[i-1]+j]=array[pcis->shared[i][j]]; 1392 } 1393 j = ptrs_buffer[i]-ptrs_buffer[i-1]; 1394 ierr = MPI_Isend(&send_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&send_reqs[i-1]);CHKERRQ(ierr); 1395 ierr = MPI_Irecv(&recv_buffer[ptrs_buffer[i-1]],j,MPIU_SCALAR,pcis->neigh[i],0,comm,&recv_reqs[i-1]);CHKERRQ(ierr); 1396 } 1397 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1398 ierr = MPI_Waitall((pcis->n_neigh-1),recv_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1399 /* put values in correct places */ 1400 for(i=1;i<pcis->n_neigh;i++) { 1401 for(j=0;j<pcis->n_shared[i];j++) { 1402 k = pcis->shared[i][j]; 1403 neigh_position = 0; 1404 while(mat_graph->neighbours_set[k][neigh_position] != pcis->neigh[i]) {neigh_position++;} 1405 s = (mat_graph->neighbours_set[k][0] == -1 ? 1 : 0); 1406 neigh_position = neigh_position - s; 1407 all_factors[k][neigh_position]=recv_buffer[ptrs_buffer[i-1]+j]; 1408 } 1409 } 1410 ierr = MPI_Waitall((pcis->n_neigh-1),send_reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1411 ierr = PetscFree(send_reqs);CHKERRQ(ierr); 1412 ierr = PetscFree(recv_reqs);CHKERRQ(ierr); 1413 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 1414 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 1415 ierr = PetscFree(ptrs_buffer);CHKERRQ(ierr); 1416 1417 /* Compute B and B_delta (local actions) */ 1418 ierr = PetscMalloc(pcis->n_neigh*sizeof(*aux_sums),&aux_sums);CHKERRQ(ierr); 1419 ierr = PetscMalloc(n_local_lambda*sizeof(*l2g_indices),&l2g_indices);CHKERRQ(ierr); 1420 ierr = PetscMalloc(n_local_lambda*sizeof(*vals_B_delta),&vals_B_delta);CHKERRQ(ierr); 1421 ierr = PetscMalloc(n_local_lambda*sizeof(*cols_B_delta),&cols_B_delta);CHKERRQ(ierr); 1422 ierr = PetscMalloc(n_local_lambda*sizeof(*scaling_factors),&scaling_factors);CHKERRQ(ierr); 1423 n_global_lambda=0; 1424 partial_sum=0; 1425 for(i=0;i<dual_size;i++) { 1426 while( global_dofs_numbering[n_global_lambda] != aux_global_numbering_mpi[i] ) { n_global_lambda++; } 1427 j = mat_graph->count[aux_local_numbering_1[i]]; 1428 k = (mat_graph->neighbours_set[aux_local_numbering_1[i]][0] == -1 ? 1 : 0); 1429 j = j - k; 1430 aux_sums[0]=0; 1431 for(s=1;s<j;s++) { 1432 aux_sums[s]=aux_sums[s-1]+j-s+1; 1433 } 1434 array = all_factors[aux_local_numbering_1[i]]; 1435 n_neg_values = 0; 1436 while(n_neg_values < j && mat_graph->neighbours_set[aux_local_numbering_1[i]][n_neg_values+k] < rank) {n_neg_values++;} 1437 n_pos_values = j - n_neg_values; 1438 if(fully_redundant) { 1439 for(s=0;s<n_neg_values;s++) { 1440 l2g_indices [partial_sum+s]=aux_sums[s]+n_neg_values-s-1+n_global_lambda; 1441 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1442 vals_B_delta [partial_sum+s]=-1.0; 1443 scaling_factors[partial_sum+s]=array[s]; 1444 } 1445 for(s=0;s<n_pos_values;s++) { 1446 l2g_indices [partial_sum+s+n_neg_values]=aux_sums[n_neg_values]+s+n_global_lambda; 1447 cols_B_delta [partial_sum+s+n_neg_values]=dual_dofs_boundary_indices[i]; 1448 vals_B_delta [partial_sum+s+n_neg_values]=1.0; 1449 scaling_factors[partial_sum+s+n_neg_values]=array[s+n_neg_values]; 1450 } 1451 partial_sum += j; 1452 } else { 1453 /* l2g_indices and default cols and vals of B_delta */ 1454 for(s=0;s<j;s++) { 1455 l2g_indices [partial_sum+s]=n_global_lambda+s; 1456 cols_B_delta [partial_sum+s]=dual_dofs_boundary_indices[i]; 1457 vals_B_delta [partial_sum+s]=0.0; 1458 } 1459 /* B_delta */ 1460 if( n_neg_values > 0 ) { /* there's a rank next to me to the left */ 1461 vals_B_delta [partial_sum+n_neg_values-1]=-1.0; 1462 } 1463 if ( n_neg_values < j ) { /* there's a rank next to me to the right */ 1464 vals_B_delta [partial_sum+n_neg_values]=1.0; 1465 } 1466 /* scaling as in Klawonn-Widlund 1999*/ 1467 for(s=0;s<n_neg_values;s++) { 1468 scalar_value = 0.0; 1469 for(k=0;k<s+1;k++) { 1470 scalar_value += array[k]; 1471 } 1472 scalar_value = -scalar_value; 1473 scaling_factors[partial_sum+s] = scalar_value; 1474 } 1475 for(s=0;s<n_pos_values;s++) { 1476 scalar_value = 0.0; 1477 for(k=s+n_neg_values;k<j;k++) { 1478 scalar_value += array[k]; 1479 } 1480 scaling_factors[partial_sum+s+n_neg_values] = scalar_value; 1481 } 1482 partial_sum += j; 1483 } 1484 } 1485 ierr = PetscFree(all_factors[0]);CHKERRQ(ierr); 1486 ierr = PetscFree(all_factors);CHKERRQ(ierr); 1487 /* printf("I found %d local lambda dofs when numbering them (should be %d)\n",partial_sum,n_local_lambda); */ 1488 ierr = ISCreateGeneral(comm,n_local_lambda,l2g_indices,PETSC_OWN_POINTER,&IS_l2g_lambda);CHKERRQ(ierr); 1489 ierr = VecScatterCreate(fetidpmat_ctx->lambda_local,(IS)0,lambda_global,IS_l2g_lambda,&fetidpmat_ctx->l2g_lambda);CHKERRQ(ierr); 1490 1491 /* Create local part of B_delta */ 1492 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_delta); 1493 ierr = MatSetSizes(fetidpmat_ctx->B_delta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1494 ierr = MatSetType(fetidpmat_ctx->B_delta,MATSEQAIJ);CHKERRQ(ierr); 1495 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_delta,1,PETSC_NULL);CHKERRQ(ierr); 1496 ierr = MatSetOption(fetidpmat_ctx->B_delta,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1497 for(i=0;i<n_local_lambda;i++) { 1498 ierr = MatSetValue(fetidpmat_ctx->B_delta,i,cols_B_delta[i],vals_B_delta[i],INSERT_VALUES);CHKERRQ(ierr); 1499 } 1500 ierr = MatAssemblyBegin(fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1501 ierr = MatAssemblyEnd (fetidpmat_ctx->B_delta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1502 1503 if(fully_redundant) { 1504 ierr = MatCreate(PETSC_COMM_SELF,&ScalingMat); 1505 ierr = MatSetSizes(ScalingMat,n_local_lambda,n_local_lambda,n_local_lambda,n_local_lambda);CHKERRQ(ierr); 1506 ierr = MatSetType(ScalingMat,MATSEQAIJ);CHKERRQ(ierr); 1507 ierr = MatSeqAIJSetPreallocation(ScalingMat,1,PETSC_NULL);CHKERRQ(ierr); 1508 for(i=0;i<n_local_lambda;i++) { 1509 ierr = MatSetValue(ScalingMat,i,i,scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1510 } 1511 ierr = MatAssemblyBegin(ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1512 ierr = MatAssemblyEnd (ScalingMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1513 ierr = MatMatMult(ScalingMat,fetidpmat_ctx->B_delta,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&fetidpmat_ctx->B_Ddelta);CHKERRQ(ierr); 1514 ierr = MatDestroy(&ScalingMat);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatCreate(PETSC_COMM_SELF,&fetidpmat_ctx->B_Ddelta); 1517 ierr = MatSetSizes(fetidpmat_ctx->B_Ddelta,n_local_lambda,pcis->n_B,n_local_lambda,pcis->n_B);CHKERRQ(ierr); 1518 ierr = MatSetType(fetidpmat_ctx->B_Ddelta,MATSEQAIJ);CHKERRQ(ierr); 1519 ierr = MatSeqAIJSetPreallocation(fetidpmat_ctx->B_Ddelta,1,PETSC_NULL);CHKERRQ(ierr); 1520 for(i=0;i<n_local_lambda;i++) { 1521 ierr = MatSetValue(fetidpmat_ctx->B_Ddelta,i,cols_B_delta[i],scaling_factors[i],INSERT_VALUES);CHKERRQ(ierr); 1522 } 1523 ierr = MatAssemblyBegin(fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1524 ierr = MatAssemblyEnd (fetidpmat_ctx->B_Ddelta,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1525 } 1526 1527 /* Create some vectors needed by fetidp */ 1528 ierr = VecDuplicate(pcis->vec1_B,&fetidpmat_ctx->temp_solution_B);CHKERRQ(ierr); 1529 ierr = VecDuplicate(pcis->vec1_D,&fetidpmat_ctx->temp_solution_D);CHKERRQ(ierr); 1530 1531 test_fetidp = PETSC_FALSE; 1532 ierr = PetscOptionsGetBool(PETSC_NULL,"-fetidp_check",&test_fetidp,PETSC_NULL);CHKERRQ(ierr); 1533 1534 if(test_fetidp) { 1535 1536 ierr = PetscViewerASCIIGetStdout(((PetscObject)(fetidpmat_ctx->pc))->comm,&viewer);CHKERRQ(ierr); 1537 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1538 ierr = PetscViewerASCIIPrintf(viewer,"----------FETI_DP TESTS--------------\n");CHKERRQ(ierr); 1539 ierr = PetscViewerASCIIPrintf(viewer,"All tests should return zero!\n");CHKERRQ(ierr); 1540 ierr = PetscViewerASCIIPrintf(viewer,"FETIDP MAT context in the ");CHKERRQ(ierr); 1541 if(fully_redundant) { 1542 ierr = PetscViewerASCIIPrintf(viewer,"fully redundant case for lagrange multipliers.\n");CHKERRQ(ierr); 1543 } else { 1544 ierr = PetscViewerASCIIPrintf(viewer,"Non-fully redundant case for lagrange multiplier.\n");CHKERRQ(ierr); 1545 } 1546 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1547 1548 /******************************************************************/ 1549 /* TEST A/B: Test numbering of global lambda dofs */ 1550 /******************************************************************/ 1551 1552 ierr = VecDuplicate(fetidpmat_ctx->lambda_local,&test_vec);CHKERRQ(ierr); 1553 ierr = VecSet(lambda_global,1.0);CHKERRQ(ierr); 1554 ierr = VecSet(test_vec,1.0);CHKERRQ(ierr); 1555 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1556 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1557 scalar_value = -1.0; 1558 ierr = VecAXPY(test_vec,scalar_value,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1559 ierr = VecNorm(test_vec,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1560 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1561 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"A[%04d]: CHECK glob to loc: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1562 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1563 if(fully_redundant) { 1564 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1565 ierr = VecSet(fetidpmat_ctx->lambda_local,0.5);CHKERRQ(ierr); 1566 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecSum(lambda_global,&scalar_value);CHKERRQ(ierr); 1569 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"B[%04d]: CHECK loc to glob: % 1.14e\n",rank,scalar_value-fetidpmat_ctx->n_lambda);CHKERRQ(ierr); 1570 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1571 } 1572 1573 /******************************************************************/ 1574 /* TEST C: It should holds B_delta*w=0, w\in\widehat{W} */ 1575 /* This is the meaning of the B matrix */ 1576 /******************************************************************/ 1577 1578 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1579 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1580 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1581 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1582 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 /* Action of B_delta */ 1587 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1588 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1589 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1591 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1592 ierr = PetscViewerASCIIPrintf(viewer,"C[coll]: CHECK infty norm of B_delta*w (w continuous): % 1.14e\n",scalar_value);CHKERRQ(ierr); 1593 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1594 1595 /******************************************************************/ 1596 /* TEST D: It should holds E_Dw = w - P_Dw w\in\widetilde{W} */ 1597 /* E_D = R_D^TR */ 1598 /* P_D = B_{D,delta}^T B_{delta} */ 1599 /* eq.44 Mandel Tezaur and Dohrmann 2005 */ 1600 /******************************************************************/ 1601 1602 /* compute a random vector in \widetilde{W} */ 1603 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1604 scalar_value = 0.0; /* set zero at vertices */ 1605 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1606 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1607 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1608 /* store w for final comparison */ 1609 ierr = VecDuplicate(pcis->vec1_B,&test_vec);CHKERRQ(ierr); 1610 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,test_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 1613 /* Jump operator P_D : results stored in pcis->vec1_B */ 1614 1615 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1616 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1617 /* Action of B_delta */ 1618 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1619 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1620 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1621 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1622 /* Action of B_Ddelta^T */ 1623 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1624 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1625 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1626 1627 /* Average operator E_D : results stored in pcis->vec2_B */ 1628 1629 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1631 ierr = VecPointwiseMult(pcis->vec2_B,pcis->D,pcis->vec2_B);CHKERRQ(ierr); 1632 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1633 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec2_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1634 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1635 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1636 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1637 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1638 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1639 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 1642 /* test E_D=I-P_D */ 1643 scalar_value = 1.0; 1644 ierr = VecAXPY(pcis->vec1_B,scalar_value,pcis->vec2_B);CHKERRQ(ierr); 1645 scalar_value = -1.0; 1646 ierr = VecAXPY(pcis->vec1_B,scalar_value,test_vec);CHKERRQ(ierr); 1647 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1648 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1649 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"D[%04d] CHECK infty norm of E_D + P_D - I: % 1.14e\n",rank,scalar_value);CHKERRQ(ierr); 1650 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1651 1652 /******************************************************************/ 1653 /* TEST E: It should holds R_D^TP_Dw=0 w\in\widetilde{W} */ 1654 /* eq.48 Mandel Tezaur and Dohrmann 2005 */ 1655 /******************************************************************/ 1656 1657 ierr = VecSetRandom(pcis->vec1_N,PETSC_NULL);CHKERRQ(ierr); 1658 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1659 scalar_value = 0.0; /* set zero at vertices */ 1660 for(i=0;i<n_vertices;i++) { array[vertex_indices[i]]=scalar_value; } 1661 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 1662 1663 /* Jump operator P_D : results stored in pcis->vec1_B */ 1664 1665 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 /* Action of B_delta */ 1668 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1669 ierr = VecSet(lambda_global,0.0);CHKERRQ(ierr); 1670 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1671 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,lambda_global,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1672 /* Action of B_Ddelta^T */ 1673 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1674 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1675 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1676 /* diagonal scaling */ 1677 ierr = VecPointwiseMult(pcis->vec1_B,pcis->D,pcis->vec1_B);CHKERRQ(ierr); 1678 /* sum on the interface */ 1679 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 1680 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1681 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1682 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 1683 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1684 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1685 ierr = VecNorm(pcis->vec1_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1686 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of R^T_D P_D: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1687 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1688 1689 if(!fully_redundant) { 1690 /******************************************************************/ 1691 /* TEST F: It should holds B_{delta}B^T_{D,delta}=I */ 1692 /* Corollary thm 14 Mandel Tezaur and Dohrmann 2005 */ 1693 /******************************************************************/ 1694 ierr = VecDuplicate(lambda_global,&test_vec);CHKERRQ(ierr); 1695 ierr = VecSetRandom(lambda_global,PETSC_NULL);CHKERRQ(ierr); 1696 /* Action of B_Ddelta^T */ 1697 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1698 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,lambda_global,fetidpmat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1699 ierr = MatMultTranspose(fetidpmat_ctx->B_Ddelta,fetidpmat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1700 /* Action of B_delta */ 1701 ierr = MatMult(fetidpmat_ctx->B_delta,pcis->vec1_B,fetidpmat_ctx->lambda_local);CHKERRQ(ierr); 1702 ierr = VecSet(test_vec,0.0);CHKERRQ(ierr); 1703 ierr = VecScatterBegin(fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1704 ierr = VecScatterEnd (fetidpmat_ctx->l2g_lambda,fetidpmat_ctx->lambda_local,test_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1705 scalar_value = -1.0; 1706 ierr = VecAXPY(lambda_global,scalar_value,test_vec);CHKERRQ(ierr); 1707 ierr = VecNorm(lambda_global,NORM_INFINITY,&scalar_value);CHKERRQ(ierr); 1708 ierr = PetscViewerASCIIPrintf(viewer,"E[coll]: CHECK infty norm of P^T_D - I: % 1.14e\n",scalar_value);CHKERRQ(ierr); 1709 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1710 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1711 ierr = VecDestroy(&test_vec);CHKERRQ(ierr); 1712 } 1713 } 1714 /* final cleanup */ 1715 ierr = PetscFree(dual_dofs_boundary_indices);CHKERRQ(ierr); 1716 ierr = PetscFree(vertex_indices);CHKERRQ(ierr); 1717 ierr = PetscFree(aux_local_numbering_1);CHKERRQ(ierr); 1718 ierr = PetscFree(aux_local_numbering_2);CHKERRQ(ierr); 1719 ierr = PetscFree(aux_global_numbering);CHKERRQ(ierr); 1720 ierr = PetscFree(aux_global_numbering_mpi);CHKERRQ(ierr); 1721 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 1722 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 1723 ierr = PetscFree(all_aux_global_numbering_mpi_1);CHKERRQ(ierr); 1724 ierr = PetscFree(all_aux_global_numbering_mpi_2);CHKERRQ(ierr); 1725 ierr = PetscFree(global_dofs_numbering);CHKERRQ(ierr); 1726 ierr = PetscFree(aux_sums);CHKERRQ(ierr); 1727 ierr = PetscFree(cols_B_delta);CHKERRQ(ierr); 1728 ierr = PetscFree(vals_B_delta);CHKERRQ(ierr); 1729 ierr = PetscFree(scaling_factors);CHKERRQ(ierr); 1730 ierr = VecDestroy(&lambda_global);CHKERRQ(ierr); 1731 ierr = ISDestroy(&IS_l2g_lambda);CHKERRQ(ierr); 1732 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "PCBDDCSetupFETIDPPCContext" 1738 static PetscErrorCode PCBDDCSetupFETIDPPCContext(Mat fetimat, FETIDPPC_ctx *fetidppc_ctx) 1739 { 1740 FETIDPMat_ctx *mat_ctx; 1741 PetscErrorCode ierr; 1742 1743 PetscFunctionBegin; 1744 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1745 /* get references from objects created when setting up feti mat context */ 1746 ierr = PetscObjectReference((PetscObject)mat_ctx->lambda_local);CHKERRQ(ierr); 1747 fetidppc_ctx->lambda_local = mat_ctx->lambda_local; 1748 ierr = PetscObjectReference((PetscObject)mat_ctx->B_Ddelta);CHKERRQ(ierr); 1749 fetidppc_ctx->B_Ddelta = mat_ctx->B_Ddelta; 1750 ierr = PetscObjectReference((PetscObject)mat_ctx->l2g_lambda);CHKERRQ(ierr); 1751 fetidppc_ctx->l2g_lambda = mat_ctx->l2g_lambda; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "FETIDPMatMult" 1757 static PetscErrorCode FETIDPMatMult(Mat fetimat, Vec x, Vec y) 1758 { 1759 FETIDPMat_ctx *mat_ctx; 1760 PC_IS *pcis; 1761 PetscErrorCode ierr; 1762 1763 PetscFunctionBegin; 1764 ierr = MatShellGetContext(fetimat,&mat_ctx);CHKERRQ(ierr); 1765 pcis = (PC_IS*)mat_ctx->pc->data; 1766 /* Application of B_delta^T */ 1767 ierr = VecScatterBegin(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1768 ierr = VecScatterEnd(mat_ctx->l2g_lambda,x,mat_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1769 ierr = MatMultTranspose(mat_ctx->B_delta,mat_ctx->lambda_local,pcis->vec1_B);CHKERRQ(ierr); 1770 /* Application of \widetilde{S}^-1 */ 1771 ierr = VecSet(pcis->vec1_D,0.0);CHKERRQ(ierr); 1772 ierr = PCBDDCApplyInterfacePreconditioner(mat_ctx->pc);CHKERRQ(ierr); 1773 /* Application of B_delta */ 1774 ierr = MatMult(mat_ctx->B_delta,pcis->vec1_B,mat_ctx->lambda_local);CHKERRQ(ierr); 1775 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1776 ierr = VecScatterBegin(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1777 ierr = VecScatterEnd(mat_ctx->l2g_lambda,mat_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "FETIDPPCApply" 1783 static PetscErrorCode FETIDPPCApply(PC fetipc, Vec x, Vec y) 1784 { 1785 FETIDPPC_ctx *pc_ctx; 1786 PC_IS *pcis; 1787 PetscErrorCode ierr; 1788 1789 PetscFunctionBegin; 1790 ierr = PCShellGetContext(fetipc,(void**)&pc_ctx); 1791 pcis = (PC_IS*)pc_ctx->pc->data; 1792 /* Application of B_Ddelta^T */ 1793 ierr = VecScatterBegin(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1794 ierr = VecScatterEnd(pc_ctx->l2g_lambda,x,pc_ctx->lambda_local,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1795 ierr = VecSet(pcis->vec2_B,0.0);CHKERRQ(ierr); 1796 ierr = MatMultTranspose(pc_ctx->B_Ddelta,pc_ctx->lambda_local,pcis->vec2_B);CHKERRQ(ierr); 1797 /* Application of S */ 1798 ierr = PCISApplySchur(pc_ctx->pc,pcis->vec2_B,pcis->vec1_B,(Vec)0,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1799 /* Application of B_Ddelta */ 1800 ierr = MatMult(pc_ctx->B_Ddelta,pcis->vec1_B,pc_ctx->lambda_local);CHKERRQ(ierr); 1801 ierr = VecSet(y,0.0);CHKERRQ(ierr); 1802 ierr = VecScatterBegin(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1803 ierr = VecScatterEnd(pc_ctx->l2g_lambda,pc_ctx->lambda_local,y,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "PCBDDCSetupLocalAdjacencyGraph" 1809 static PetscErrorCode PCBDDCSetupLocalAdjacencyGraph(PC pc) 1810 { 1811 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1812 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1813 PetscInt nvtxs,*xadj,*adjncy; 1814 Mat mat_adj; 1815 PetscBool symmetrize_rowij=PETSC_TRUE,compressed_rowij=PETSC_FALSE,flg_row=PETSC_TRUE; 1816 PCBDDCGraph mat_graph=pcbddc->mat_graph; 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 /* get CSR adjacency from local matrix if user has not yet provided local graph using PCBDDCSetLocalAdjacencyGraph function */ 1821 if(!mat_graph->xadj) { 1822 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 1823 ierr = MatGetRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1824 if(!flg_row) { 1825 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 1826 } 1827 /* Get adjacency into BDDC workspace */ 1828 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 1829 ierr = MatRestoreRowIJ(mat_adj,0,symmetrize_rowij,compressed_rowij,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1830 if(!flg_row) { 1831 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 1832 } 1833 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 /* -------------------------------------------------------------------------- */ 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1840 static PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1841 { 1842 PetscErrorCode ierr; 1843 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1844 PC_IS* pcis = (PC_IS*) (pc->data); 1845 const PetscScalar zero = 0.0; 1846 1847 PetscFunctionBegin; 1848 /* Application of PHI^T */ 1849 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1850 if(pcbddc->prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1851 1852 /* Scatter data of coarse_rhs */ 1853 if(pcbddc->coarse_rhs) ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); 1854 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1855 1856 /* Local solution on R nodes */ 1857 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1858 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1859 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1860 if(pcbddc->prec_type) { 1861 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1862 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1863 } 1864 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1865 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1866 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1867 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1868 if(pcbddc->prec_type) { 1869 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1870 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1871 } 1872 1873 /* Coarse solution */ 1874 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1875 if(pcbddc->coarse_rhs) ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1876 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1877 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1878 1879 /* Sum contributions from two levels */ 1880 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1881 if(pcbddc->prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1882 PetscFunctionReturn(0); 1883 } 1884 /* -------------------------------------------------------------------------- */ 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1887 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1888 { 1889 PetscErrorCode ierr; 1890 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1891 1892 PetscFunctionBegin; 1893 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1894 if(pcbddc->local_auxmat1) { 1895 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1896 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 /* -------------------------------------------------------------------------- */ 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1903 static PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1904 { 1905 PetscErrorCode ierr; 1906 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1907 1908 PetscFunctionBegin; 1909 switch(pcbddc->coarse_communications_type){ 1910 case SCATTERS_BDDC: 1911 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1912 break; 1913 case GATHERS_BDDC: 1914 break; 1915 } 1916 PetscFunctionReturn(0); 1917 } 1918 /* -------------------------------------------------------------------------- */ 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1921 static PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1922 { 1923 PetscErrorCode ierr; 1924 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1925 PetscScalar* array_to; 1926 PetscScalar* array_from; 1927 MPI_Comm comm=((PetscObject)pc)->comm; 1928 PetscInt i; 1929 1930 PetscFunctionBegin; 1931 1932 switch(pcbddc->coarse_communications_type){ 1933 case SCATTERS_BDDC: 1934 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1935 break; 1936 case GATHERS_BDDC: 1937 if(vec_from) VecGetArray(vec_from,&array_from); 1938 if(vec_to) VecGetArray(vec_to,&array_to); 1939 switch(pcbddc->coarse_problem_type){ 1940 case SEQUENTIAL_BDDC: 1941 if(smode == SCATTER_FORWARD) { 1942 ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1943 if(vec_to) { 1944 for(i=0;i<pcbddc->replicated_primal_size;i++) 1945 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1946 } 1947 } else { 1948 if(vec_from) 1949 for(i=0;i<pcbddc->replicated_primal_size;i++) 1950 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]]; 1951 ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr); 1952 } 1953 break; 1954 case REPLICATED_BDDC: 1955 if(smode == SCATTER_FORWARD) { 1956 ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr); 1957 for(i=0;i<pcbddc->replicated_primal_size;i++) 1958 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i]; 1959 } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */ 1960 for(i=0;i<pcbddc->local_primal_size;i++) 1961 array_to[i]=array_from[pcbddc->local_primal_indices[i]]; 1962 } 1963 break; 1964 case MULTILEVEL_BDDC: 1965 break; 1966 case PARALLEL_BDDC: 1967 break; 1968 } 1969 if(vec_from) VecRestoreArray(vec_from,&array_from); 1970 if(vec_to) VecRestoreArray(vec_to,&array_to); 1971 break; 1972 } 1973 PetscFunctionReturn(0); 1974 } 1975 /* -------------------------------------------------------------------------- */ 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "PCBDDCCreateConstraintMatrix" 1978 static PetscErrorCode PCBDDCCreateConstraintMatrix(PC pc) 1979 { 1980 PetscErrorCode ierr; 1981 PC_IS* pcis = (PC_IS*)(pc->data); 1982 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1983 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1984 PetscInt *nnz,*is_indices; 1985 PetscScalar *temp_quadrature_constraint; 1986 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B,*local_to_B; 1987 PetscInt local_primal_size,i,j,k,total_counts,max_size_of_constraint; 1988 PetscInt n_constraints,n_vertices,size_of_constraint; 1989 PetscScalar quad_value; 1990 PetscBool nnsp_has_cnst=PETSC_FALSE,use_nnsp_true=pcbddc->use_nnsp_true; 1991 PetscInt nnsp_size=0,nnsp_addone=0,temp_constraints,temp_start_ptr; 1992 IS *used_IS; 1993 const MatType impMatType=MATSEQAIJ; 1994 PetscBLASInt Bs,Bt,lwork,lierr; 1995 PetscReal tol=1.0e-8; 1996 MatNullSpace nearnullsp; 1997 const Vec *nearnullvecs; 1998 Vec *localnearnullsp; 1999 PetscScalar *work,*temp_basis,*array_vector,*correlation_mat; 2000 PetscReal *rwork,*singular_vals; 2001 PetscBLASInt Bone=1,*ipiv; 2002 Vec temp_vec; 2003 Mat temp_mat; 2004 KSP temp_ksp; 2005 PC temp_pc; 2006 PetscInt s,start_constraint,dual_dofs; 2007 PetscBool compute_submatrix,useksp=PETSC_FALSE; 2008 PetscInt *aux_primal_permutation,*aux_primal_numbering; 2009 PetscBool boolforface,*change_basis; 2010 /* some ugly conditional declarations */ 2011 #if defined(PETSC_MISSING_LAPACK_GESVD) 2012 PetscScalar dot_result; 2013 PetscScalar one=1.0,zero=0.0; 2014 PetscInt ii; 2015 PetscScalar *singular_vectors; 2016 PetscBLASInt *iwork,*ifail; 2017 PetscReal dummy_real,abs_tol; 2018 PetscBLASInt eigs_found; 2019 #if defined(PETSC_USE_COMPLEX) 2020 PetscScalar val1,val2; 2021 #endif 2022 #endif 2023 PetscBLASInt dummy_int; 2024 PetscScalar dummy_scalar; 2025 2026 PetscFunctionBegin; 2027 /* check if near null space is attached to global mat */ 2028 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2029 if (nearnullsp) { 2030 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2031 } else { /* if near null space is not provided it uses constants */ 2032 nnsp_has_cnst = PETSC_TRUE; 2033 use_nnsp_true = PETSC_TRUE; 2034 } 2035 if(nnsp_has_cnst) { 2036 nnsp_addone = 1; 2037 } 2038 /* 2039 Evaluate maximum storage size needed by the procedure 2040 - temp_indices will contain start index of each constraint stored as follows 2041 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 2042 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 2043 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 2044 */ 2045 2046 total_counts = pcbddc->n_ISForFaces+pcbddc->n_ISForEdges; 2047 total_counts *= (nnsp_addone+nnsp_size); 2048 ierr = ISGetSize(pcbddc->ISForVertices,&n_vertices);CHKERRQ(ierr); 2049 total_counts += n_vertices; 2050 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2051 ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr); 2052 total_counts = 0; 2053 max_size_of_constraint = 0; 2054 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2055 if(i<pcbddc->n_ISForEdges){ 2056 used_IS = &pcbddc->ISForEdges[i]; 2057 } else { 2058 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2059 } 2060 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 2061 total_counts += j; 2062 if(j>max_size_of_constraint) max_size_of_constraint=j; 2063 } 2064 total_counts *= (nnsp_addone+nnsp_size); 2065 total_counts += n_vertices; 2066 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 2067 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 2068 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 2069 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 2070 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2071 for(i=0;i<pcis->n;i++) { 2072 local_to_B[i]=-1; 2073 } 2074 for(i=0;i<pcis->n_B;i++) { 2075 local_to_B[is_indices[i]]=i; 2076 } 2077 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2078 2079 /* First we issue queries to allocate optimal workspace for LAPACKgesvd or LAPACKsyev/LAPACKheev */ 2080 rwork = 0; 2081 work = 0; 2082 singular_vals = 0; 2083 temp_basis = 0; 2084 correlation_mat = 0; 2085 if(!pcbddc->use_nnsp_true) { 2086 PetscScalar temp_work; 2087 #if defined(PETSC_MISSING_LAPACK_GESVD) 2088 /* POD */ 2089 PetscInt max_n; 2090 max_n = nnsp_addone+nnsp_size; 2091 /* using some techniques borrowed from Proper Orthogonal Decomposition */ 2092 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 2093 ierr = PetscMalloc(max_n*max_n*sizeof(PetscScalar),&singular_vectors);CHKERRQ(ierr); 2094 ierr = PetscMalloc(max_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2095 ierr = PetscMalloc(max_size_of_constraint*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2096 #if defined(PETSC_USE_COMPLEX) 2097 ierr = PetscMalloc(3*max_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2098 #endif 2099 ierr = PetscMalloc(5*max_n*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); 2100 ierr = PetscMalloc(max_n*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); 2101 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2102 Bt = PetscBLASIntCast(max_n); 2103 lwork=-1; 2104 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2105 #if !defined(PETSC_USE_COMPLEX) 2106 abs_tol=1.e-8; 2107 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,&lierr); */ 2108 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2109 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,&temp_work,&lwork,iwork,ifail,&lierr); 2110 #else 2111 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,&temp_work,&lwork,rwork,&lierr); */ 2112 /* LAPACK call is missing here! TODO */ 2113 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2114 #endif 2115 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEVX Lapack routine %d",(int)lierr); 2116 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2117 #else /* on missing GESVD */ 2118 /* SVD */ 2119 PetscInt max_n,min_n; 2120 max_n = max_size_of_constraint; 2121 min_n = nnsp_addone+nnsp_size; 2122 if(max_size_of_constraint < ( nnsp_addone+nnsp_size ) ) { 2123 min_n = max_size_of_constraint; 2124 max_n = nnsp_addone+nnsp_size; 2125 } 2126 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 2127 #if defined(PETSC_USE_COMPLEX) 2128 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 2129 #endif 2130 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2131 lwork=-1; 2132 Bs = PetscBLASIntCast(max_n); 2133 Bt = PetscBLASIntCast(min_n); 2134 dummy_int = Bs; 2135 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2136 #if !defined(PETSC_USE_COMPLEX) 2137 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2138 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr); 2139 #else 2140 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[0],&Bs,singular_vals, 2141 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr); 2142 #endif 2143 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SVD Lapack routine %d",(int)lierr); 2144 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2145 #endif 2146 /* Allocate optimal workspace */ 2147 lwork = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work)); 2148 total_counts = (PetscInt)lwork; 2149 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2150 } 2151 /* get local part of global near null space vectors */ 2152 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 2153 for(k=0;k<nnsp_size;k++) { 2154 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2155 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2156 ierr = VecScatterEnd (matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2157 } 2158 /* Now we can loop on constraining sets */ 2159 total_counts=0; 2160 temp_indices[0]=0; 2161 /* vertices */ 2162 PetscBool used_vertex; 2163 ierr = ISGetIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2164 if(nnsp_has_cnst) { /* consider all vertices */ 2165 for(i=0;i<n_vertices;i++) { 2166 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2167 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2168 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2169 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2170 change_basis[total_counts]=PETSC_FALSE; 2171 total_counts++; 2172 } 2173 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2174 for(i=0;i<n_vertices;i++) { 2175 used_vertex=PETSC_FALSE; 2176 k=0; 2177 while(!used_vertex && k<nnsp_size) { 2178 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2179 if(PetscAbsScalar(array_vector[is_indices[i]])>0.0) { 2180 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2181 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 2182 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2183 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2184 change_basis[total_counts]=PETSC_FALSE; 2185 total_counts++; 2186 used_vertex=PETSC_TRUE; 2187 } 2188 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2189 k++; 2190 } 2191 } 2192 } 2193 ierr = ISRestoreIndices(pcbddc->ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2194 n_vertices=total_counts; 2195 /* edges and faces */ 2196 for(i=0;i<pcbddc->n_ISForEdges+pcbddc->n_ISForFaces;i++){ 2197 if(i<pcbddc->n_ISForEdges){ 2198 used_IS = &pcbddc->ISForEdges[i]; 2199 boolforface = pcbddc->usechangeofbasis; 2200 } else { 2201 used_IS = &pcbddc->ISForFaces[i-pcbddc->n_ISForEdges]; 2202 boolforface = pcbddc->usechangeonfaces; 2203 } 2204 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2205 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2206 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2207 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2208 if(nnsp_has_cnst) { 2209 temp_constraints++; 2210 quad_value = (PetscScalar) (1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2211 for(j=0;j<size_of_constraint;j++) { 2212 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2213 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2214 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2215 } 2216 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2217 change_basis[total_counts]=boolforface; 2218 total_counts++; 2219 } 2220 for(k=0;k<nnsp_size;k++) { 2221 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2222 for(j=0;j<size_of_constraint;j++) { 2223 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 2224 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 2225 temp_quadrature_constraint[temp_indices[total_counts]+j]=array_vector[is_indices[j]]; 2226 } 2227 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array_vector);CHKERRQ(ierr); 2228 quad_value = 1.0; 2229 if( use_nnsp_true ) { /* check if array is null on the connected component in case use_nnsp_true has been requested */ 2230 Bs = PetscBLASIntCast(size_of_constraint); 2231 quad_value = BLASasum_(&Bs,&temp_quadrature_constraint[temp_indices[total_counts]],&Bone); 2232 } 2233 if ( quad_value > 0.0 ) { /* keep indices and values */ 2234 temp_constraints++; 2235 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2236 change_basis[total_counts]=boolforface; 2237 total_counts++; 2238 } 2239 } 2240 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2241 /* perform SVD on the constraint if use_nnsp_true has not be requested by the user */ 2242 if(!use_nnsp_true) { 2243 2244 Bs = PetscBLASIntCast(size_of_constraint); 2245 Bt = PetscBLASIntCast(temp_constraints); 2246 2247 #if defined(PETSC_MISSING_LAPACK_GESVD) 2248 ierr = PetscMemzero(correlation_mat,Bt*Bt*sizeof(PetscScalar));CHKERRQ(ierr); 2249 /* Store upper triangular part of correlation matrix */ 2250 for(j=0;j<temp_constraints;j++) { 2251 for(k=0;k<j+1;k++) { 2252 #if defined(PETSC_USE_COMPLEX) 2253 /* hand made complex dot product -> replace */ 2254 dot_result = 0.0; 2255 for (ii=0; ii<size_of_constraint; ii++) { 2256 val1 = temp_quadrature_constraint[temp_indices[temp_start_ptr+j]+ii]; 2257 val2 = temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]; 2258 dot_result += val1*PetscConj(val2); 2259 } 2260 #else 2261 dot_result = BLASdot_(&Bs,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Bone, 2262 &temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Bone); 2263 #endif 2264 correlation_mat[j*temp_constraints+k]=dot_result; 2265 } 2266 } 2267 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2268 #if !defined(PETSC_USE_COMPLEX) 2269 /* LAPACKsyev_("V","U",&Bt,correlation_mat,&Bt,singular_vals,work,&lwork,&lierr); */ 2270 LAPACKsyevx_("V","A","U",&Bt,correlation_mat,&Bt,&dummy_real,&dummy_real,&dummy_int,&dummy_int, 2271 &abs_tol,&eigs_found,singular_vals,singular_vectors,&Bt,work,&lwork,iwork,ifail,&lierr); 2272 #else 2273 /* LAPACK call is missing here! TODO */ 2274 SETERRQ(((PetscObject) pc)->comm, PETSC_ERR_SUP, "Not yet implemented for complexes when PETSC_MISSING_GESVD = 1"); 2275 #endif 2276 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEVX Lapack routine %d",(int)lierr); 2277 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2278 /* retain eigenvalues greater than tol: note that lapack SYEV gives eigs in ascending order */ 2279 j=0; 2280 while( j < Bt && singular_vals[j] < tol) j++; 2281 total_counts=total_counts-j; 2282 if(j<temp_constraints) { 2283 for(k=j;k<Bt;k++) { singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); } 2284 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2285 BLASgemm_("N","N",&Bs,&Bt,&Bt,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,correlation_mat,&Bt,&zero,temp_basis,&Bs); 2286 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2287 /* copy POD basis into used quadrature memory */ 2288 for(k=0;k<Bt-j;k++) { 2289 for(ii=0;ii<size_of_constraint;ii++) { 2290 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[Bt-1-k]*temp_basis[(Bt-1-k)*size_of_constraint+ii]; 2291 } 2292 } 2293 } 2294 2295 #else /* on missing GESVD */ 2296 PetscInt min_n = temp_constraints; 2297 if(min_n > size_of_constraint) min_n = size_of_constraint; 2298 dummy_int = Bs; 2299 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2300 #if !defined(PETSC_USE_COMPLEX) 2301 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2302 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr); 2303 #else 2304 LAPACKgesvd_("O","N",&Bs,&Bt,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Bs,singular_vals, 2305 &dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr); 2306 #endif 2307 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr); 2308 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2309 /* retain eigenvalues greater than tol: note that lapack SVD gives eigs in descending order */ 2310 j=0; 2311 while( j < min_n && singular_vals[min_n-j-1] < tol) j++; 2312 total_counts = total_counts-(PetscInt)Bt+(min_n-j); 2313 #endif 2314 } 2315 } 2316 2317 n_constraints=total_counts-n_vertices; 2318 local_primal_size = total_counts; 2319 /* set quantities in pcbddc data structure */ 2320 pcbddc->n_vertices = n_vertices; 2321 pcbddc->n_constraints = n_constraints; 2322 pcbddc->local_primal_size = local_primal_size; 2323 2324 /* Create constraint matrix */ 2325 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2326 /* so we need to set it up properly either with or without change of basis */ 2327 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2328 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2329 ierr = MatSetSizes(pcbddc->ConstraintMatrix,local_primal_size,pcis->n,local_primal_size,pcis->n);CHKERRQ(ierr); 2330 /* compute a local numbering of constraints : vertices first then constraints */ 2331 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 2332 ierr = VecGetArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2333 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 2334 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&aux_primal_permutation);CHKERRQ(ierr); 2335 total_counts=0; 2336 /* find vertices: subdomain corners plus dofs with basis changed */ 2337 for(i=0;i<local_primal_size;i++) { 2338 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2339 if(change_basis[i] || size_of_constraint == 1) { 2340 k=0; 2341 while(k < size_of_constraint && array_vector[temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]] != 0.0) { 2342 k=k+1; 2343 } 2344 j=temp_indices_to_constraint[temp_indices[i]+size_of_constraint-k-1]; 2345 array_vector[j] = 1.0; 2346 aux_primal_numbering[total_counts]=j; 2347 aux_primal_permutation[total_counts]=total_counts; 2348 total_counts++; 2349 } 2350 } 2351 ierr = VecRestoreArray(pcis->vec1_N,&array_vector);CHKERRQ(ierr); 2352 /* permute indices in order to have a sorted set of vertices */ 2353 ierr = PetscSortIntWithPermutation(total_counts,aux_primal_numbering,aux_primal_permutation); 2354 /* nonzero structure */ 2355 ierr = PetscMalloc(local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2356 for(i=0;i<total_counts;i++) { 2357 nnz[i]=1; 2358 } 2359 j=total_counts; 2360 for(i=n_vertices;i<local_primal_size;i++) { 2361 if(!change_basis[i]) { 2362 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2363 j++; 2364 } 2365 } 2366 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2367 ierr = PetscFree(nnz);CHKERRQ(ierr); 2368 /* set values in constraint matrix */ 2369 for(i=0;i<total_counts;i++) { 2370 j = aux_primal_permutation[i]; 2371 k = aux_primal_numbering[j]; 2372 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,k,1.0,INSERT_VALUES);CHKERRQ(ierr); 2373 } 2374 for(i=n_vertices;i<local_primal_size;i++) { 2375 if(!change_basis[i]) { 2376 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2377 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2378 total_counts++; 2379 } 2380 } 2381 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2382 ierr = PetscFree(aux_primal_permutation);CHKERRQ(ierr); 2383 /* assembling */ 2384 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2385 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2386 2387 /* Create matrix for change of basis. We don't need it in case pcbddc->usechangeofbasis is FALSE */ 2388 if(pcbddc->usechangeofbasis) { 2389 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2390 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2391 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2392 /* work arrays */ 2393 /* we need to reuse these arrays, so we free them */ 2394 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2395 ierr = PetscFree(work);CHKERRQ(ierr); 2396 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2397 ierr = PetscMalloc((nnsp_addone+nnsp_size)*(nnsp_addone+nnsp_size)*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 2398 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscScalar),&work);CHKERRQ(ierr); 2399 ierr = PetscMalloc((nnsp_addone+nnsp_size)*sizeof(PetscBLASInt),&ipiv);CHKERRQ(ierr); 2400 for(i=0;i<pcis->n_B;i++) { 2401 nnz[i]=1; 2402 } 2403 /* Overestimated nonzeros per row */ 2404 k=1; 2405 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2406 if(change_basis[i]) { 2407 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2408 if(k < size_of_constraint) { 2409 k = size_of_constraint; 2410 } 2411 for(j=0;j<size_of_constraint;j++) { 2412 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2413 } 2414 } 2415 } 2416 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2417 ierr = PetscFree(nnz);CHKERRQ(ierr); 2418 /* Temporary array to store indices */ 2419 ierr = PetscMalloc(k*sizeof(PetscInt),&is_indices);CHKERRQ(ierr); 2420 /* Set initial identity in the matrix */ 2421 for(i=0;i<pcis->n_B;i++) { 2422 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2423 } 2424 /* Now we loop on the constraints which need a change of basis */ 2425 /* Change of basis matrix is evaluated as the FIRST APPROACH in */ 2426 /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (6.2.1) */ 2427 temp_constraints = 0; 2428 if(pcbddc->n_vertices < local_primal_size) { 2429 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[pcbddc->n_vertices]]; 2430 } 2431 for(i=pcbddc->n_vertices;i<local_primal_size;i++) { 2432 if(change_basis[i]) { 2433 compute_submatrix = PETSC_FALSE; 2434 useksp = PETSC_FALSE; 2435 if(temp_start_ptr == temp_indices_to_constraint_B[temp_indices[i]]) { 2436 temp_constraints++; 2437 if(i == local_primal_size -1 || temp_start_ptr != temp_indices_to_constraint_B[temp_indices[i+1]]) { 2438 compute_submatrix = PETSC_TRUE; 2439 } 2440 } 2441 if(compute_submatrix) { 2442 if(temp_constraints > 1 || pcbddc->use_nnsp_true) { 2443 useksp = PETSC_TRUE; 2444 } 2445 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2446 if(useksp) { /* experimental */ 2447 ierr = MatCreate(PETSC_COMM_SELF,&temp_mat);CHKERRQ(ierr); 2448 ierr = MatSetType(temp_mat,impMatType);CHKERRQ(ierr); 2449 ierr = MatSetSizes(temp_mat,size_of_constraint,size_of_constraint,size_of_constraint,size_of_constraint);CHKERRQ(ierr); 2450 ierr = MatSeqAIJSetPreallocation(temp_mat,size_of_constraint,PETSC_NULL);CHKERRQ(ierr); 2451 } 2452 /* First _size_of_constraint-temp_constraints_ columns */ 2453 dual_dofs = size_of_constraint-temp_constraints; 2454 start_constraint = i+1-temp_constraints; 2455 for(s=0;s<dual_dofs;s++) { 2456 is_indices[0] = s; 2457 for(j=0;j<temp_constraints;j++) { 2458 for(k=0;k<temp_constraints;k++) { 2459 temp_basis[j*temp_constraints+k]=temp_quadrature_constraint[temp_indices[start_constraint+k]+s+j+1]; 2460 } 2461 work[j]=-temp_quadrature_constraint[temp_indices[start_constraint+j]+s]; 2462 is_indices[j+1]=s+j+1; 2463 } 2464 Bt = temp_constraints; 2465 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2466 LAPACKgesv_(&Bt,&Bone,temp_basis,&Bt,ipiv,work,&Bt,&lierr); 2467 if ( lierr ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESV Lapack routine %d",(int)lierr); 2468 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2469 j = temp_indices_to_constraint_B[temp_indices[start_constraint]+s]; 2470 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,temp_constraints,&temp_indices_to_constraint_B[temp_indices[start_constraint]+s+1],1,&j,work,INSERT_VALUES);CHKERRQ(ierr); 2471 if(useksp) { 2472 /* temp mat with transposed rows and columns */ 2473 ierr = MatSetValues(temp_mat,1,&s,temp_constraints,&is_indices[1],work,INSERT_VALUES);CHKERRQ(ierr); 2474 ierr = MatSetValue(temp_mat,is_indices[0],is_indices[0],1.0,INSERT_VALUES);CHKERRQ(ierr); 2475 } 2476 } 2477 if(useksp) { 2478 /* last rows of temp_mat */ 2479 for(j=0;j<size_of_constraint;j++) { 2480 is_indices[j] = j; 2481 } 2482 for(s=0;s<temp_constraints;s++) { 2483 k = s + dual_dofs; 2484 ierr = MatSetValues(temp_mat,1,&k,size_of_constraint,is_indices,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2485 } 2486 ierr = MatAssemblyBegin(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2487 ierr = MatAssemblyEnd(temp_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 ierr = MatGetVecs(temp_mat,&temp_vec,PETSC_NULL);CHKERRQ(ierr); 2489 ierr = KSPCreate(PETSC_COMM_SELF,&temp_ksp);CHKERRQ(ierr); 2490 ierr = KSPSetOperators(temp_ksp,temp_mat,temp_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 2491 ierr = KSPSetType(temp_ksp,KSPPREONLY);CHKERRQ(ierr); 2492 ierr = KSPGetPC(temp_ksp,&temp_pc);CHKERRQ(ierr); 2493 ierr = PCSetType(temp_pc,PCLU);CHKERRQ(ierr); 2494 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr); 2495 for(s=0;s<temp_constraints;s++) { 2496 ierr = VecSet(temp_vec,0.0);CHKERRQ(ierr); 2497 ierr = VecSetValue(temp_vec,s+dual_dofs,1.0,INSERT_VALUES);CHKERRQ(ierr); 2498 ierr = VecAssemblyBegin(temp_vec);CHKERRQ(ierr); 2499 ierr = VecAssemblyEnd(temp_vec);CHKERRQ(ierr); 2500 ierr = KSPSolve(temp_ksp,temp_vec,temp_vec);CHKERRQ(ierr); 2501 ierr = VecGetArray(temp_vec,&array_vector);CHKERRQ(ierr); 2502 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2503 /* last columns of change of basis matrix associated to new primal dofs */ 2504 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,array_vector,INSERT_VALUES);CHKERRQ(ierr); 2505 ierr = VecRestoreArray(temp_vec,&array_vector);CHKERRQ(ierr); 2506 } 2507 ierr = MatDestroy(&temp_mat);CHKERRQ(ierr); 2508 ierr = KSPDestroy(&temp_ksp);CHKERRQ(ierr); 2509 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2510 } else { 2511 /* last columns of change of basis matrix associated to new primal dofs */ 2512 for(s=0;s<temp_constraints;s++) { 2513 j = temp_indices_to_constraint_B[temp_indices[start_constraint+s]+size_of_constraint-s-1]; 2514 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,&temp_indices_to_constraint_B[temp_indices[start_constraint+s]],1,&j,&temp_quadrature_constraint[temp_indices[start_constraint+s]],INSERT_VALUES);CHKERRQ(ierr); 2515 } 2516 } 2517 /* prepare for the next cycle */ 2518 temp_constraints = 0; 2519 if(i != local_primal_size -1 ) { 2520 temp_start_ptr = temp_indices_to_constraint_B[temp_indices[i+1]]; 2521 } 2522 } 2523 } 2524 } 2525 /* assembling */ 2526 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2527 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2528 ierr = PetscFree(ipiv);CHKERRQ(ierr); 2529 ierr = PetscFree(is_indices);CHKERRQ(ierr); 2530 } 2531 /* free workspace no longer needed */ 2532 ierr = PetscFree(rwork);CHKERRQ(ierr); 2533 ierr = PetscFree(work);CHKERRQ(ierr); 2534 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2535 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2536 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2537 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2538 ierr = PetscFree(change_basis);CHKERRQ(ierr); 2539 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2540 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2541 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 2542 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2543 #if defined(PETSC_MISSING_LAPACK_GESVD) 2544 ierr = PetscFree(iwork);CHKERRQ(ierr); 2545 ierr = PetscFree(ifail);CHKERRQ(ierr); 2546 ierr = PetscFree(singular_vectors);CHKERRQ(ierr); 2547 #endif 2548 for(k=0;k<nnsp_size;k++) { 2549 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2550 } 2551 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 /* -------------------------------------------------------------------------- */ 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "PCBDDCCoarseSetUp" 2557 static PetscErrorCode PCBDDCCoarseSetUp(PC pc) 2558 { 2559 PetscErrorCode ierr; 2560 2561 PC_IS* pcis = (PC_IS*)(pc->data); 2562 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2563 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2564 Mat change_mat_all; 2565 IS is_R_local; 2566 IS is_V_local; 2567 IS is_C_local; 2568 IS is_aux1; 2569 IS is_aux2; 2570 const VecType impVecType; 2571 const MatType impMatType; 2572 PetscInt n_R=0; 2573 PetscInt n_D=0; 2574 PetscInt n_B=0; 2575 PetscScalar zero=0.0; 2576 PetscScalar one=1.0; 2577 PetscScalar m_one=-1.0; 2578 PetscScalar* array; 2579 PetscScalar *coarse_submat_vals; 2580 PetscInt *idx_R_local; 2581 PetscInt *idx_V_B; 2582 PetscScalar *coarsefunctions_errors; 2583 PetscScalar *constraints_errors; 2584 /* auxiliary indices */ 2585 PetscInt i,j,k; 2586 /* for verbose output of bddc */ 2587 PetscViewer viewer=pcbddc->dbg_viewer; 2588 PetscBool dbg_flag=pcbddc->dbg_flag; 2589 /* for counting coarse dofs */ 2590 PetscInt n_vertices,n_constraints; 2591 PetscInt size_of_constraint; 2592 PetscInt *row_cmat_indices; 2593 PetscScalar *row_cmat_values; 2594 PetscInt *vertices,*nnz,*is_indices,*temp_indices; 2595 2596 PetscFunctionBegin; 2597 /* Set Non-overlapping dimensions */ 2598 n_B = pcis->n_B; n_D = pcis->n - n_B; 2599 /* Set types for local objects needed by BDDC precondtioner */ 2600 impMatType = MATSEQDENSE; 2601 impVecType = VECSEQ; 2602 /* get vertex indices from constraint matrix */ 2603 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2604 n_vertices=0; 2605 for(i=0;i<pcbddc->local_primal_size;i++) { 2606 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2607 if(size_of_constraint == 1) { 2608 vertices[n_vertices]=row_cmat_indices[0]; 2609 n_vertices++; 2610 } 2611 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 2612 } 2613 /* Set number of constraints */ 2614 n_constraints = pcbddc->local_primal_size-n_vertices; 2615 2616 /* vertices in boundary numbering */ 2617 if(n_vertices) { 2618 ierr = VecSet(pcis->vec1_N,m_one);CHKERRQ(ierr); 2619 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2620 for (i=0; i<n_vertices; i++) { array[ vertices[i] ] = i; } 2621 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2622 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2623 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2624 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 2625 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2626 for (i=0; i<n_vertices; i++) { 2627 j=0; 2628 while (array[j] != i ) {j++;} 2629 idx_V_B[i]=j; 2630 } 2631 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2632 } 2633 2634 /* transform local matrices if needed */ 2635 if(pcbddc->usechangeofbasis) { 2636 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2637 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2638 for(i=0;i<n_D;i++) { 2639 nnz[is_indices[i]]=1; 2640 } 2641 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2642 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2643 k=1; 2644 for(i=0;i<n_B;i++) { 2645 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2646 nnz[is_indices[i]]=j; 2647 if( k < j) { 2648 k = j; 2649 } 2650 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 2651 } 2652 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2653 /* assemble change of basis matrix on the whole set of local dofs */ 2654 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 2655 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 2656 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2657 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 2658 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 2659 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2660 for(i=0;i<n_D;i++) { 2661 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2662 } 2663 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2664 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2665 for(i=0;i<n_B;i++) { 2666 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2667 for(k=0;k<j;k++) { 2668 temp_indices[k]=is_indices[row_cmat_indices[k]]; 2669 } 2670 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 2671 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2672 } 2673 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2674 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2675 ierr = MatPtAP(matis->A,change_mat_all,MAT_INITIAL_MATRIX,1.0,&pcbddc->local_mat);CHKERRQ(ierr); 2676 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2677 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2678 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 2679 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_IB);CHKERRQ(ierr); 2680 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&pcis->A_BI);CHKERRQ(ierr); 2681 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&pcis->A_BB);CHKERRQ(ierr); 2682 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 2683 ierr = PetscFree(nnz);CHKERRQ(ierr); 2684 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2685 } else { 2686 /* without change of basis, the local matrix is unchanged */ 2687 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 2688 pcbddc->local_mat = matis->A; 2689 } 2690 2691 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2692 ierr = VecSet(pcis->vec1_N,one);CHKERRQ(ierr); 2693 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2694 for (i=0;i<n_vertices;i++) { array[ vertices[i] ] = zero; } 2695 ierr = PetscMalloc(( pcis->n - n_vertices )*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 2696 for (i=0, n_R=0; i<pcis->n; i++) { if (array[i] == one) { idx_R_local[n_R] = i; n_R++; } } 2697 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2698 if(dbg_flag) { 2699 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2700 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2701 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2702 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2703 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr); 2704 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 2705 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2706 } 2707 2708 /* Allocate needed vectors */ 2709 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->original_rhs);CHKERRQ(ierr); 2710 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->temp_solution);CHKERRQ(ierr); 2711 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 2712 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_R);CHKERRQ(ierr); 2713 ierr = VecSetSizes(pcbddc->vec1_R,n_R,n_R);CHKERRQ(ierr); 2714 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2715 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2716 ierr = VecCreate(PETSC_COMM_SELF,&pcbddc->vec1_P);CHKERRQ(ierr); 2717 ierr = VecSetSizes(pcbddc->vec1_P,pcbddc->local_primal_size,pcbddc->local_primal_size);CHKERRQ(ierr); 2718 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2719 2720 /* Creating some index sets needed */ 2721 /* For submatrices */ 2722 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&is_R_local);CHKERRQ(ierr); 2723 if(n_vertices) { 2724 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_OWN_POINTER,&is_V_local);CHKERRQ(ierr); 2725 } 2726 if(n_constraints) { 2727 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_C_local);CHKERRQ(ierr); 2728 } 2729 2730 /* For VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2731 { 2732 PetscInt *aux_array1; 2733 PetscInt *aux_array2; 2734 2735 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2736 ierr = PetscMalloc( (pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 2737 2738 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 2739 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2740 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2741 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2742 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2743 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2744 ierr = VecScatterEnd (pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2745 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2746 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] > one) { aux_array1[j] = i; j++; } } 2747 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2748 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2749 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2750 for (i=0, j=0; i<n_B; i++) { if (array[i] > one) { aux_array2[j] = i; j++; } } 2751 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 2752 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_COPY_VALUES,&is_aux2);CHKERRQ(ierr); 2753 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2754 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2755 ierr = PetscFree(aux_array2);CHKERRQ(ierr); 2756 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2757 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2758 2759 if(pcbddc->prec_type || dbg_flag ) { 2760 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 2761 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2762 for (i=0, j=0; i<n_R; i++) { if (array[idx_R_local[i]] == one) { aux_array1[j] = i; j++; } } 2763 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 2764 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_COPY_VALUES,&is_aux1);CHKERRQ(ierr); 2765 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2766 ierr = PetscFree(aux_array1);CHKERRQ(ierr); 2767 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2768 } 2769 } 2770 2771 /* Creating PC contexts for local Dirichlet and Neumann problems */ 2772 { 2773 Mat A_RR; 2774 PC pc_temp; 2775 /* Matrix for Dirichlet problem is A_II -> we already have it from pcis.c code */ 2776 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2777 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2778 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,SAME_PRECONDITIONER);CHKERRQ(ierr); 2779 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2780 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr); 2781 /* default */ 2782 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2783 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2784 /* Allow user's customization */ 2785 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2786 /* Set Up KSP for Dirichlet problem of BDDC */ 2787 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2788 /* set ksp_D into pcis data */ 2789 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2790 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2791 pcis->ksp_D = pcbddc->ksp_D; 2792 /* Matrix for Neumann problem is A_RR -> we need to create it */ 2793 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 2794 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2795 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2796 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,SAME_PRECONDITIONER);CHKERRQ(ierr); 2797 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2798 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr); 2799 /* default */ 2800 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2801 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2802 /* Allow user's customization */ 2803 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2804 /* Set Up KSP for Neumann problem of BDDC */ 2805 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2806 /* check Dirichlet and Neumann solvers */ 2807 { 2808 Vec temp_vec; 2809 PetscReal value; 2810 PetscMPIInt use_exact,use_exact_reduced; 2811 2812 ierr = VecDuplicate(pcis->vec1_D,&temp_vec);CHKERRQ(ierr); 2813 ierr = VecSetRandom(pcis->vec1_D,PETSC_NULL);CHKERRQ(ierr); 2814 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2815 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,temp_vec);CHKERRQ(ierr); 2816 ierr = VecAXPY(temp_vec,m_one,pcis->vec1_D);CHKERRQ(ierr); 2817 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2818 use_exact = 1; 2819 if(PetscAbsReal(value) > 1.e-4) { 2820 use_exact = 0; 2821 } 2822 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,((PetscObject)pc)->comm);CHKERRQ(ierr); 2823 pcbddc->use_exact_dirichlet = (PetscBool) use_exact_reduced; 2824 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2825 if(dbg_flag) { 2826 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2827 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2828 ierr = PetscViewerASCIIPrintf(viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 2829 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2830 ierr = VecDuplicate(pcbddc->vec1_R,&temp_vec);CHKERRQ(ierr); 2831 ierr = VecSetRandom(pcbddc->vec1_R,PETSC_NULL);CHKERRQ(ierr); 2832 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2833 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,temp_vec);CHKERRQ(ierr); 2834 ierr = VecAXPY(temp_vec,m_one,pcbddc->vec1_R);CHKERRQ(ierr); 2835 ierr = VecNorm(temp_vec,NORM_INFINITY,&value);CHKERRQ(ierr); 2836 ierr = VecDestroy(&temp_vec);CHKERRQ(ierr); 2837 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d infinity error for Neumann solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr); 2838 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2839 } 2840 } 2841 /* free Neumann problem's matrix */ 2842 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2843 } 2844 2845 /* Assemble all remaining stuff needed to apply BDDC */ 2846 { 2847 Mat A_RV,A_VR,A_VV; 2848 Mat M1,M2; 2849 Mat C_CR; 2850 Mat AUXMAT; 2851 Vec vec1_C; 2852 Vec vec2_C; 2853 Vec vec1_V; 2854 Vec vec2_V; 2855 PetscInt *nnz; 2856 PetscInt *auxindices; 2857 PetscInt index; 2858 PetscScalar* array2; 2859 MatFactorInfo matinfo; 2860 2861 /* Allocating some extra storage just to be safe */ 2862 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 2863 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 2864 for(i=0;i<pcis->n;i++) {auxindices[i]=i;} 2865 2866 /* some work vectors on vertices and/or constraints */ 2867 if(n_vertices) { 2868 ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr); 2869 ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr); 2870 ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr); 2871 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 2872 } 2873 if(n_constraints) { 2874 ierr = VecCreate(PETSC_COMM_SELF,&vec1_C);CHKERRQ(ierr); 2875 ierr = VecSetSizes(vec1_C,n_constraints,n_constraints);CHKERRQ(ierr); 2876 ierr = VecSetType(vec1_C,impVecType);CHKERRQ(ierr); 2877 ierr = VecDuplicate(vec1_C,&vec2_C);CHKERRQ(ierr); 2878 ierr = VecDuplicate(vec1_C,&pcbddc->vec1_C);CHKERRQ(ierr); 2879 } 2880 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2881 if(n_constraints) { 2882 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2883 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,n_R,n_constraints);CHKERRQ(ierr); 2884 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 2885 ierr = MatSeqDenseSetPreallocation(pcbddc->local_auxmat2,PETSC_NULL);CHKERRQ(ierr); 2886 2887 /* Create Constraint matrix on R nodes: C_{CR} */ 2888 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_C_local,is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2889 ierr = ISDestroy(&is_C_local);CHKERRQ(ierr); 2890 2891 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 2892 for(i=0;i<n_constraints;i++) { 2893 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 2894 /* Get row of constraint matrix in R numbering */ 2895 ierr = VecGetArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2896 ierr = MatGetRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2897 for(j=0;j<size_of_constraint;j++) { array[ row_cmat_indices[j] ] = - row_cmat_values[j]; } 2898 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 2899 ierr = VecRestoreArray(pcbddc->vec1_R,&array);CHKERRQ(ierr); 2900 /* Solve for row of constraint matrix in R numbering */ 2901 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2902 /* Set values */ 2903 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2904 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2905 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2906 } 2907 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2908 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2909 2910 /* Assemble AUXMAT = ( LUFactor )( -C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2911 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AUXMAT);CHKERRQ(ierr); 2912 ierr = MatFactorInfoInitialize(&matinfo);CHKERRQ(ierr); 2913 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,0,1,&is_aux1);CHKERRQ(ierr); 2914 ierr = MatLUFactor(AUXMAT,is_aux1,is_aux1,&matinfo);CHKERRQ(ierr); 2915 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2916 2917 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 2918 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 2919 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 2920 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 2921 ierr = MatSeqDenseSetPreallocation(M1,PETSC_NULL);CHKERRQ(ierr); 2922 for(i=0;i<n_constraints;i++) { 2923 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 2924 ierr = VecSetValue(vec1_C,i,one,INSERT_VALUES);CHKERRQ(ierr); 2925 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 2926 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 2927 ierr = MatSolve(AUXMAT,vec1_C,vec2_C);CHKERRQ(ierr); 2928 ierr = VecScale(vec2_C,m_one);CHKERRQ(ierr); 2929 ierr = VecGetArray(vec2_C,&array);CHKERRQ(ierr); 2930 ierr = MatSetValues(M1,n_constraints,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2931 ierr = VecRestoreArray(vec2_C,&array);CHKERRQ(ierr); 2932 } 2933 ierr = MatAssemblyBegin(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2934 ierr = MatAssemblyEnd(M1,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2935 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 2936 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 2937 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2938 2939 } 2940 2941 /* Get submatrices from subdomain matrix */ 2942 if(n_vertices){ 2943 ierr = MatGetSubMatrix(pcbddc->local_mat,is_R_local,is_V_local,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2944 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2945 ierr = MatGetSubMatrix(pcbddc->local_mat,is_V_local,is_V_local,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2946 /* Assemble M2 = A_RR^{-1}A_RV */ 2947 ierr = MatCreate(PETSC_COMM_SELF,&M2);CHKERRQ(ierr); 2948 ierr = MatSetSizes(M2,n_R,n_vertices,n_R,n_vertices);CHKERRQ(ierr); 2949 ierr = MatSetType(M2,impMatType);CHKERRQ(ierr); 2950 ierr = MatSeqDenseSetPreallocation(M2,PETSC_NULL);CHKERRQ(ierr); 2951 for(i=0;i<n_vertices;i++) { 2952 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2953 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2954 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2955 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2956 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2957 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2958 ierr = VecGetArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2959 ierr = MatSetValues(M2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 2960 ierr = VecRestoreArray(pcbddc->vec2_R,&array);CHKERRQ(ierr); 2961 } 2962 ierr = MatAssemblyBegin(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2963 ierr = MatAssemblyEnd(M2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2964 } 2965 2966 /* Matrix of coarse basis functions (local) */ 2967 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2968 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 2969 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 2970 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_B,PETSC_NULL);CHKERRQ(ierr); 2971 if(pcbddc->prec_type || dbg_flag ) { 2972 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2973 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 2974 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 2975 ierr = MatSeqDenseSetPreallocation(pcbddc->coarse_phi_D,PETSC_NULL);CHKERRQ(ierr); 2976 } 2977 2978 if(dbg_flag) { 2979 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&coarsefunctions_errors);CHKERRQ(ierr); 2980 ierr = PetscMalloc( pcbddc->local_primal_size*sizeof(PetscScalar),&constraints_errors);CHKERRQ(ierr); 2981 } 2982 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2983 ierr = PetscMalloc ((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 2984 2985 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2986 for(i=0;i<n_vertices;i++){ 2987 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 2988 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 2989 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 2990 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 2991 /* solution of saddle point problem */ 2992 ierr = MatMult(M2,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 2993 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 2994 if(n_constraints) { 2995 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 2996 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2997 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 2998 } 2999 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 3000 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 3001 3002 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3003 /* coarse basis functions */ 3004 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3005 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3006 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3007 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3008 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3009 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3010 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 3011 if( pcbddc->prec_type || dbg_flag ) { 3012 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3013 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3014 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3015 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 3016 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3017 } 3018 /* subdomain contribution to coarse matrix */ 3019 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3020 for(j=0;j<n_vertices;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j] = array[j]; } /* WARNING -> column major ordering */ 3021 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3022 if(n_constraints) { 3023 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3024 for(j=0;j<n_constraints;j++) { coarse_submat_vals[i*pcbddc->local_primal_size+j+n_vertices] = array[j]; } /* WARNING -> column major ordering */ 3025 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3026 } 3027 3028 if( dbg_flag ) { 3029 /* assemble subdomain vector on nodes */ 3030 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3031 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3032 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3033 for(j=0;j<n_R;j++) { array[idx_R_local[j]] = array2[j]; } 3034 array[ vertices[i] ] = one; 3035 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3036 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3037 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 3038 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3039 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3040 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3041 for(j=0;j<n_vertices;j++) { array2[j]=array[j]; } 3042 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3043 if(n_constraints) { 3044 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3045 for(j=0;j<n_constraints;j++) { array2[j+n_vertices]=array[j]; } 3046 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3047 } 3048 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3049 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 3050 /* check saddle point solution */ 3051 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3052 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3053 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 3054 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3055 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3056 array[i]=array[i]+m_one; /* shift by the identity matrix */ 3057 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3058 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 3059 } 3060 } 3061 3062 for(i=0;i<n_constraints;i++){ 3063 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 3064 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 3065 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 3066 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 3067 /* solution of saddle point problem */ 3068 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 3069 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 3070 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 3071 if(n_vertices) { ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); } 3072 /* Set values in coarse basis function and subdomain part of coarse_mat */ 3073 /* coarse basis functions */ 3074 index=i+n_vertices; 3075 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 3076 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3077 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3078 ierr = VecGetArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3079 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3080 ierr = VecRestoreArray(pcis->vec1_B,&array);CHKERRQ(ierr); 3081 if( pcbddc->prec_type || dbg_flag ) { 3082 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3083 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3084 ierr = VecGetArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3085 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&index,array,INSERT_VALUES);CHKERRQ(ierr); 3086 ierr = VecRestoreArray(pcis->vec1_D,&array);CHKERRQ(ierr); 3087 } 3088 /* subdomain contribution to coarse matrix */ 3089 if(n_vertices) { 3090 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3091 for(j=0;j<n_vertices;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j]=array[j];} /* WARNING -> column major ordering */ 3092 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3093 } 3094 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3095 for(j=0;j<n_constraints;j++) {coarse_submat_vals[index*pcbddc->local_primal_size+j+n_vertices]=array[j];} /* WARNING -> column major ordering */ 3096 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3097 3098 if( dbg_flag ) { 3099 /* assemble subdomain vector on nodes */ 3100 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3101 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3102 ierr = VecGetArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3103 for(j=0;j<n_R;j++){ array[ idx_R_local[j] ] = array2[j]; } 3104 ierr = VecRestoreArray(pcbddc->vec1_R,&array2);CHKERRQ(ierr); 3105 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3106 /* assemble subdomain vector of lagrange multipliers */ 3107 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 3108 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3109 if( n_vertices) { 3110 ierr = VecGetArray(vec2_V,&array);CHKERRQ(ierr); 3111 for(j=0;j<n_vertices;j++) {array2[j]=-array[j];} 3112 ierr = VecRestoreArray(vec2_V,&array);CHKERRQ(ierr); 3113 } 3114 ierr = VecGetArray(vec1_C,&array);CHKERRQ(ierr); 3115 for(j=0;j<n_constraints;j++) {array2[j+n_vertices]=-array[j];} 3116 ierr = VecRestoreArray(vec1_C,&array);CHKERRQ(ierr); 3117 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 3118 /* check saddle point solution */ 3119 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3120 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 3121 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[index]);CHKERRQ(ierr); 3122 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 3123 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3124 array[index]=array[index]+m_one; /* shift by the identity matrix */ 3125 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 3126 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[index]);CHKERRQ(ierr); 3127 } 3128 } 3129 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3130 ierr = MatAssemblyEnd (pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3131 if( pcbddc->prec_type || dbg_flag ) { 3132 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3133 ierr = MatAssemblyEnd (pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3134 } 3135 /* Checking coarse_sub_mat and coarse basis functios */ 3136 /* It shuld be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3137 if(dbg_flag) { 3138 3139 Mat coarse_sub_mat; 3140 Mat TM1,TM2,TM3,TM4; 3141 Mat coarse_phi_D,coarse_phi_B,A_II,A_BB,A_IB,A_BI; 3142 const MatType checkmattype=MATSEQAIJ; 3143 PetscScalar value; 3144 3145 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3146 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3147 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3148 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3149 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3150 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3151 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3152 ierr = MatConvert(coarse_sub_mat,checkmattype,MAT_REUSE_MATRIX,&coarse_sub_mat);CHKERRQ(ierr); 3153 3154 /*PetscViewer view_out; 3155 PetscMPIInt myrank; 3156 char filename[256]; 3157 MPI_Comm_rank(((PetscObject)pc)->comm,&myrank); 3158 sprintf(filename,"coarsesubmat_%04d.m",myrank); 3159 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&view_out);CHKERRQ(ierr); 3160 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3161 ierr = MatView(coarse_sub_mat,view_out);CHKERRQ(ierr); 3162 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3163 3164 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3165 ierr = PetscViewerASCIIPrintf(viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 3166 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3167 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3168 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3169 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3170 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3171 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3172 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3173 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3174 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3175 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3176 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3177 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3178 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3179 ierr = MatNorm(TM1,NORM_INFINITY,&value);CHKERRQ(ierr); 3180 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"----------------------------------\n");CHKERRQ(ierr); 3181 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 3182 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"matrix error = % 1.14e\n",value);CHKERRQ(ierr); 3183 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"coarse functions errors\n");CHKERRQ(ierr); 3184 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); } 3185 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"constraints errors\n");CHKERRQ(ierr); 3186 for(i=0;i<pcbddc->local_primal_size;i++) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); } 3187 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3188 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3189 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3190 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3191 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3192 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3193 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3194 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3195 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3196 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3197 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3198 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3199 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 3200 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 3201 } 3202 3203 /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */ 3204 ierr = PCBDDCSetupCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr); 3205 /* free memory */ 3206 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3207 ierr = PetscFree(auxindices);CHKERRQ(ierr); 3208 ierr = PetscFree(nnz);CHKERRQ(ierr); 3209 if(n_vertices) { 3210 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 3211 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 3212 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3213 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3214 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3215 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3216 } 3217 if(n_constraints) { 3218 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 3219 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 3220 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3221 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3222 } 3223 } 3224 /* free memory */ 3225 if(n_vertices) { 3226 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3227 ierr = ISDestroy(&is_V_local);CHKERRQ(ierr); 3228 } 3229 ierr = ISDestroy(&is_R_local);CHKERRQ(ierr); 3230 3231 PetscFunctionReturn(0); 3232 } 3233 3234 /* -------------------------------------------------------------------------- */ 3235 3236 #undef __FUNCT__ 3237 #define __FUNCT__ "PCBDDCSetupCoarseEnvironment" 3238 static PetscErrorCode PCBDDCSetupCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals) 3239 { 3240 3241 3242 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3243 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3244 PC_IS *pcis = (PC_IS*)pc->data; 3245 MPI_Comm prec_comm = ((PetscObject)pc)->comm; 3246 MPI_Comm coarse_comm; 3247 3248 /* common to all choiches */ 3249 PetscScalar *temp_coarse_mat_vals; 3250 PetscScalar *ins_coarse_mat_vals; 3251 PetscInt *ins_local_primal_indices; 3252 PetscMPIInt *localsizes2,*localdispl2; 3253 PetscMPIInt size_prec_comm; 3254 PetscMPIInt rank_prec_comm; 3255 PetscMPIInt active_rank=MPI_PROC_NULL; 3256 PetscMPIInt master_proc=0; 3257 PetscInt ins_local_primal_size; 3258 /* specific to MULTILEVEL_BDDC */ 3259 PetscMPIInt *ranks_recv; 3260 PetscMPIInt count_recv=0; 3261 PetscMPIInt rank_coarse_proc_send_to; 3262 PetscMPIInt coarse_color = MPI_UNDEFINED; 3263 ISLocalToGlobalMapping coarse_ISLG; 3264 /* some other variables */ 3265 PetscErrorCode ierr; 3266 const MatType coarse_mat_type; 3267 const PCType coarse_pc_type; 3268 const KSPType coarse_ksp_type; 3269 PC pc_temp; 3270 PetscInt i,j,k,bs; 3271 PetscInt max_it_coarse_ksp=1; /* don't increase this value */ 3272 /* verbose output viewer */ 3273 PetscViewer viewer=pcbddc->dbg_viewer; 3274 PetscBool dbg_flag=pcbddc->dbg_flag; 3275 PetscInt offset,offset2; 3276 3277 PetscFunctionBegin; 3278 ins_local_primal_indices = 0; 3279 ins_coarse_mat_vals = 0; 3280 localsizes2 = 0; 3281 localdispl2 = 0; 3282 temp_coarse_mat_vals = 0; 3283 coarse_ISLG = 0; 3284 3285 ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr); 3286 ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr); 3287 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 3288 3289 /* adapt coarse problem type */ 3290 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC && pcbddc->active_procs < MIN_PROCS_FOR_BDDC ) 3291 pcbddc->coarse_problem_type = PARALLEL_BDDC; 3292 3293 /* Assign global numbering to coarse dofs */ 3294 { 3295 PetscScalar one=1.,zero=0.; 3296 PetscScalar *array; 3297 PetscMPIInt *auxlocal_primal; 3298 PetscMPIInt *auxglobal_primal; 3299 PetscMPIInt *all_auxglobal_primal; 3300 PetscMPIInt mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size; 3301 PetscInt *row_cmat_indices; 3302 PetscInt size_of_constraint; 3303 PetscScalar coarsesum; 3304 3305 /* Construct needed data structures for message passing */ 3306 ierr = PetscMalloc(mpi_local_primal_size*sizeof(PetscMPIInt),&pcbddc->local_primal_indices);CHKERRQ(ierr); 3307 j = 0; 3308 if(rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC) { 3309 j = size_prec_comm; 3310 } 3311 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr); 3312 ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr); 3313 /* Gather local_primal_size information for all processes */ 3314 if(pcbddc->coarse_problem_type == REPLICATED_BDDC) { 3315 ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr); 3316 } else { 3317 ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3318 } 3319 pcbddc->replicated_primal_size = 0; 3320 for (i=0; i<j; i++) { 3321 pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ; 3322 pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i]; 3323 } 3324 if(rank_prec_comm == 0) { 3325 /* allocate some auxiliary space */ 3326 ierr = PetscMalloc(pcbddc->replicated_primal_size*sizeof(*all_auxglobal_primal),&all_auxglobal_primal);CHKERRQ(ierr); 3327 } 3328 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxlocal_primal);CHKERRQ(ierr); 3329 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscMPIInt),&auxglobal_primal);CHKERRQ(ierr); 3330 3331 /* First let's count coarse dofs. 3332 This code fragment assumes that the number of local constraints per connected component 3333 is not greater than the number of nodes defined for the connected component 3334 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3335 /* auxlocal_primal : primal indices in local nodes numbering (internal and interface) with complete queue sorted by global ordering */ 3336 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 3337 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3338 for(i=0;i<pcbddc->local_primal_size;i++) { 3339 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3340 for (j=0; j<size_of_constraint; j++) { 3341 k = row_cmat_indices[j]; 3342 if( array[k] == zero ) { 3343 array[k] = one; 3344 auxlocal_primal[i] = k; 3345 break; 3346 } 3347 } 3348 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,PETSC_NULL);CHKERRQ(ierr); 3349 } 3350 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3351 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3352 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3353 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3354 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3355 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3356 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3357 for(i=0;i<pcis->n;i++) { if( array[i] > zero) array[i] = one/array[i]; } 3358 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3359 ierr = VecSet(pcis->vec1_global,zero);CHKERRQ(ierr); 3360 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3361 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3362 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3363 pcbddc->coarse_size = (PetscInt) coarsesum; 3364 3365 /* Now assign them a global numbering */ 3366 /* auxglobal_primal contains indices in global nodes numbering (internal and interface) */ 3367 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,auxlocal_primal,auxglobal_primal);CHKERRQ(ierr); 3368 /* all_auxglobal_primal contains all primal nodes indices in global nodes numbering (internal and interface) */ 3369 ierr = MPI_Gatherv(&auxglobal_primal[0],pcbddc->local_primal_size,MPIU_INT,&all_auxglobal_primal[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3370 3371 /* After this block all_auxglobal_primal should contains one copy of each primal node's indices in global nodes numbering */ 3372 if(rank_prec_comm==0) { 3373 j=pcbddc->replicated_primal_size; 3374 ierr = PetscSortRemoveDupsMPIInt(&j,all_auxglobal_primal);CHKERRQ(ierr); 3375 } else { 3376 ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscMPIInt),&all_auxglobal_primal);CHKERRQ(ierr); 3377 } 3378 /* We only need to broadcast the indices from 0 to pcbddc->coarse_size. Remaning elements of array all_aux_global_primal are garbage. */ 3379 ierr = MPI_Bcast(all_auxglobal_primal,pcbddc->coarse_size,MPIU_INT,0,prec_comm);CHKERRQ(ierr); 3380 3381 /* Now get global coarse numbering of local primal nodes */ 3382 for(i=0;i<pcbddc->local_primal_size;i++) { 3383 k=0; 3384 while( all_auxglobal_primal[k] != auxglobal_primal[i] ) { k++;} 3385 pcbddc->local_primal_indices[i]=k; 3386 } 3387 if(dbg_flag) { 3388 ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3389 ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem %d\n",pcbddc->coarse_size);CHKERRQ(ierr); 3390 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3391 } 3392 /* free allocated memory */ 3393 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3394 ierr = PetscFree(auxglobal_primal);CHKERRQ(ierr); 3395 ierr = PetscFree(all_auxglobal_primal);CHKERRQ(ierr); 3396 } 3397 3398 switch(pcbddc->coarse_problem_type){ 3399 3400 case(MULTILEVEL_BDDC): /* we define a coarse mesh where subdomains are elements */ 3401 { 3402 /* we need additional variables */ 3403 MetisInt n_subdomains,n_parts,objval,ncon,faces_nvtxs; 3404 MetisInt *metis_coarse_subdivision; 3405 MetisInt options[METIS_NOPTIONS]; 3406 PetscMPIInt size_coarse_comm,rank_coarse_comm; 3407 PetscMPIInt procs_jumps_coarse_comm; 3408 PetscMPIInt *coarse_subdivision; 3409 PetscMPIInt *total_count_recv; 3410 PetscMPIInt *total_ranks_recv; 3411 PetscMPIInt *displacements_recv; 3412 PetscMPIInt *my_faces_connectivity; 3413 PetscMPIInt *petsc_faces_adjncy; 3414 MetisInt *faces_adjncy; 3415 MetisInt *faces_xadj; 3416 PetscMPIInt *number_of_faces; 3417 PetscMPIInt *faces_displacements; 3418 PetscInt *array_int; 3419 PetscMPIInt my_faces=0; 3420 PetscMPIInt total_faces=0; 3421 PetscInt ranks_stretching_ratio; 3422 3423 /* define some quantities */ 3424 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3425 coarse_mat_type = MATIS; 3426 coarse_pc_type = PCBDDC; 3427 coarse_ksp_type = KSPCHEBYSHEV; 3428 3429 /* details of coarse decomposition */ 3430 n_subdomains = pcbddc->active_procs; 3431 n_parts = n_subdomains/pcbddc->coarsening_ratio; 3432 ranks_stretching_ratio = size_prec_comm/pcbddc->active_procs; 3433 procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio; 3434 3435 /*printf("Coarse algorithm details: \n"); 3436 printf("n_subdomains %d, n_parts %d\nstretch %d,jumps %d,coarse_ratio %d\nlevel should be log_%d(%d)\n",n_subdomains,n_parts,ranks_stretching_ratio,procs_jumps_coarse_comm,pcbddc->coarsening_ratio,pcbddc->coarsening_ratio,(ranks_stretching_ratio/pcbddc->coarsening_ratio+1));*/ 3437 3438 /* build CSR graph of subdomains' connectivity through faces */ 3439 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr); 3440 ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr); 3441 for(i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */ 3442 for(j=0;j<pcis->n_shared[i];j++){ 3443 array_int[ pcis->shared[i][j] ]+=1; 3444 } 3445 } 3446 for(i=1;i<pcis->n_neigh;i++){ 3447 for(j=0;j<pcis->n_shared[i];j++){ 3448 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3449 my_faces++; 3450 break; 3451 } 3452 } 3453 } 3454 3455 ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr); 3456 ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr); 3457 my_faces=0; 3458 for(i=1;i<pcis->n_neigh;i++){ 3459 for(j=0;j<pcis->n_shared[i];j++){ 3460 if(array_int[ pcis->shared[i][j] ] == 1 ){ 3461 my_faces_connectivity[my_faces]=pcis->neigh[i]; 3462 my_faces++; 3463 break; 3464 } 3465 } 3466 } 3467 if(rank_prec_comm == master_proc) { 3468 ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr); 3469 ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr); 3470 ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr); 3471 ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr); 3472 ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr); 3473 } 3474 ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3475 if(rank_prec_comm == master_proc) { 3476 faces_xadj[0]=0; 3477 faces_displacements[0]=0; 3478 j=0; 3479 for(i=1;i<size_prec_comm+1;i++) { 3480 faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1]; 3481 if(number_of_faces[i-1]) { 3482 j++; 3483 faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1]; 3484 } 3485 } 3486 /*printf("The J I count is %d and should be %d\n",j,n_subdomains); 3487 printf("Total faces seem %d and should be %d\n",faces_xadj[j],total_faces);*/ 3488 } 3489 ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3490 ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr); 3491 ierr = PetscFree(array_int);CHKERRQ(ierr); 3492 if(rank_prec_comm == master_proc) { 3493 for(i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */ 3494 /*printf("This is the face connectivity (actual ranks)\n"); 3495 for(i=0;i<n_subdomains;i++){ 3496 printf("proc %d is connected with \n",i); 3497 for(j=faces_xadj[i];j<faces_xadj[i+1];j++) 3498 printf("%d ",faces_adjncy[j]); 3499 printf("\n"); 3500 }*/ 3501 ierr = PetscFree(faces_displacements);CHKERRQ(ierr); 3502 ierr = PetscFree(number_of_faces);CHKERRQ(ierr); 3503 ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr); 3504 } 3505 3506 if( rank_prec_comm == master_proc ) { 3507 3508 PetscInt heuristic_for_metis=3; 3509 3510 ncon=1; 3511 faces_nvtxs=n_subdomains; 3512 /* partition graoh induced by face connectivity */ 3513 ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr); 3514 ierr = METIS_SetDefaultOptions(options); 3515 /* we need a contiguous partition of the coarse mesh */ 3516 options[METIS_OPTION_CONTIG]=1; 3517 options[METIS_OPTION_DBGLVL]=1; 3518 options[METIS_OPTION_NITER]=30; 3519 if(n_subdomains>n_parts*heuristic_for_metis) { 3520 options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE; 3521 options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT; 3522 ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3523 } else { 3524 ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision); 3525 } 3526 if(ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetupCoarseEnvironment\n",ierr); 3527 ierr = PetscFree(faces_xadj);CHKERRQ(ierr); 3528 ierr = PetscFree(faces_adjncy);CHKERRQ(ierr); 3529 coarse_subdivision = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); /* calloc for contiguous memory since we need to scatter these values later */ 3530 /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */ 3531 for(i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL; 3532 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]); 3533 ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr); 3534 } 3535 3536 /* Create new communicator for coarse problem splitting the old one */ 3537 if( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){ 3538 coarse_color=0; /* for communicator splitting */ 3539 active_rank=rank_prec_comm; /* for insertion of matrix values */ 3540 } 3541 /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards) 3542 key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */ 3543 ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr); 3544 3545 if( coarse_color == 0 ) { 3546 ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr); 3547 ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr); 3548 /*printf("Details of coarse comm\n"); 3549 printf("size = %d, myrank = %d\n",size_coarse_comm,rank_coarse_comm); 3550 printf("jumps = %d, coarse_color = %d, n_parts = %d\n",procs_jumps_coarse_comm,coarse_color,n_parts);*/ 3551 } else { 3552 rank_coarse_comm = MPI_PROC_NULL; 3553 } 3554 3555 /* master proc take care of arranging and distributing coarse informations */ 3556 if(rank_coarse_comm == master_proc) { 3557 ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr); 3558 /*ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr); 3559 ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);*/ 3560 total_count_recv = (PetscMPIInt*)calloc(size_prec_comm,sizeof(PetscMPIInt)); 3561 total_ranks_recv = (PetscMPIInt*)calloc(n_subdomains,sizeof(PetscMPIInt)); 3562 /* some initializations */ 3563 displacements_recv[0]=0; 3564 /* PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt)); not needed -> calloc initializes to zero */ 3565 /* count from how many processes the j-th process of the coarse decomposition will receive data */ 3566 for(j=0;j<size_coarse_comm;j++) 3567 for(i=0;i<size_prec_comm;i++) 3568 if(coarse_subdivision[i]==j) 3569 total_count_recv[j]++; 3570 /* displacements needed for scatterv of total_ranks_recv */ 3571 for(i=1;i<size_coarse_comm;i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1]; 3572 /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */ 3573 ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr); 3574 for(j=0;j<size_coarse_comm;j++) { 3575 for(i=0;i<size_prec_comm;i++) { 3576 if(coarse_subdivision[i]==j) { 3577 total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i; 3578 total_count_recv[j]+=1; 3579 } 3580 } 3581 } 3582 /*for(j=0;j<size_coarse_comm;j++) { 3583 printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]); 3584 for(i=0;i<total_count_recv[j];i++) { 3585 printf("%d ",total_ranks_recv[displacements_recv[j]+i]); 3586 } 3587 printf("\n"); 3588 }*/ 3589 3590 /* identify new decomposition in terms of ranks in the old communicator */ 3591 for(i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm; 3592 /*printf("coarse_subdivision in old end new ranks\n"); 3593 for(i=0;i<size_prec_comm;i++) 3594 if(coarse_subdivision[i]!=MPI_PROC_NULL) { 3595 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm); 3596 } else { 3597 printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]); 3598 } 3599 printf("\n");*/ 3600 } 3601 3602 /* Scatter new decomposition for send details */ 3603 ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3604 /* Scatter receiving details to members of coarse decomposition */ 3605 if( coarse_color == 0) { 3606 ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3607 ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr); 3608 ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr); 3609 } 3610 3611 /*printf("I will send my matrix data to proc %d\n",rank_coarse_proc_send_to); 3612 if(coarse_color == 0) { 3613 printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv); 3614 for(i=0;i<count_recv;i++) 3615 printf("%d ",ranks_recv[i]); 3616 printf("\n"); 3617 }*/ 3618 3619 if(rank_prec_comm == master_proc) { 3620 /*ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr); 3621 ierr = PetscFree(total_count_recv);CHKERRQ(ierr); 3622 ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);*/ 3623 free(coarse_subdivision); 3624 free(total_count_recv); 3625 free(total_ranks_recv); 3626 ierr = PetscFree(displacements_recv);CHKERRQ(ierr); 3627 } 3628 break; 3629 } 3630 3631 case(REPLICATED_BDDC): 3632 3633 pcbddc->coarse_communications_type = GATHERS_BDDC; 3634 coarse_mat_type = MATSEQAIJ; 3635 coarse_pc_type = PCLU; 3636 coarse_ksp_type = KSPPREONLY; 3637 coarse_comm = PETSC_COMM_SELF; 3638 active_rank = rank_prec_comm; 3639 break; 3640 3641 case(PARALLEL_BDDC): 3642 3643 pcbddc->coarse_communications_type = SCATTERS_BDDC; 3644 coarse_mat_type = MATMPIAIJ; 3645 coarse_pc_type = PCREDUNDANT; 3646 coarse_ksp_type = KSPPREONLY; 3647 coarse_comm = prec_comm; 3648 active_rank = rank_prec_comm; 3649 break; 3650 3651 case(SEQUENTIAL_BDDC): 3652 pcbddc->coarse_communications_type = GATHERS_BDDC; 3653 coarse_mat_type = MATSEQAIJ; 3654 coarse_pc_type = PCLU; 3655 coarse_ksp_type = KSPPREONLY; 3656 coarse_comm = PETSC_COMM_SELF; 3657 active_rank = master_proc; 3658 break; 3659 } 3660 3661 switch(pcbddc->coarse_communications_type){ 3662 3663 case(SCATTERS_BDDC): 3664 { 3665 if(pcbddc->coarse_problem_type==MULTILEVEL_BDDC) { 3666 3667 PetscMPIInt send_size; 3668 PetscInt *aux_ins_indices; 3669 PetscInt ii,jj; 3670 MPI_Request *requests; 3671 3672 /* allocate auxiliary space */ 3673 ierr = PetscMalloc (pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3674 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],pcbddc->local_primal_size,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3675 ierr = PetscMalloc ( pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr); 3676 ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr); 3677 /* allocate stuffs for message massing */ 3678 ierr = PetscMalloc ( (count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr); 3679 for(i=0;i<count_recv+1;i++) requests[i]=MPI_REQUEST_NULL; 3680 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3681 ierr = PetscMalloc ( count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3682 /* fill up quantities */ 3683 j=0; 3684 for(i=0;i<count_recv;i++){ 3685 ii = ranks_recv[i]; 3686 localsizes2[i]=pcbddc->local_primal_sizes[ii]*pcbddc->local_primal_sizes[ii]; 3687 localdispl2[i]=j; 3688 j+=localsizes2[i]; 3689 jj = pcbddc->local_primal_displacements[ii]; 3690 for(k=0;k<pcbddc->local_primal_sizes[ii];k++) aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]]+=1; /* it counts the coarse subdomains sharing the coarse node */ 3691 } 3692 /*printf("aux_ins_indices 1\n"); 3693 for(i=0;i<pcbddc->coarse_size;i++) 3694 printf("%d ",aux_ins_indices[i]); 3695 printf("\n");*/ 3696 /* temp_coarse_mat_vals used to store temporarly received matrix values */ 3697 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3698 /* evaluate how many values I will insert in coarse mat */ 3699 ins_local_primal_size=0; 3700 for(i=0;i<pcbddc->coarse_size;i++){ 3701 if(aux_ins_indices[i]){ 3702 ins_local_primal_size++; 3703 } 3704 } 3705 /* evaluate indices I will insert in coarse mat */ 3706 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr); 3707 j=0; 3708 for(i=0;i<pcbddc->coarse_size;i++){ 3709 if(aux_ins_indices[i]){ 3710 ins_local_primal_indices[j++]=i; 3711 } 3712 } 3713 /* use aux_ins_indices to realize a global to local mapping */ 3714 j=0; 3715 for(i=0;i<pcbddc->coarse_size;i++){ 3716 if(aux_ins_indices[i]==0){ 3717 aux_ins_indices[i]=-1; 3718 } else { 3719 aux_ins_indices[i]=j; 3720 j++; 3721 } 3722 } 3723 3724 /*printf("New details localsizes2 localdispl2\n"); 3725 for(i=0;i<count_recv;i++) 3726 printf("(%d %d) ",localsizes2[i],localdispl2[i]); 3727 printf("\n"); 3728 printf("aux_ins_indices 2\n"); 3729 for(i=0;i<pcbddc->coarse_size;i++) 3730 printf("%d ",aux_ins_indices[i]); 3731 printf("\n"); 3732 printf("ins_local_primal_indices\n"); 3733 for(i=0;i<ins_local_primal_size;i++) 3734 printf("%d ",ins_local_primal_indices[i]); 3735 printf("\n"); 3736 printf("coarse_submat_vals\n"); 3737 for(i=0;i<pcbddc->local_primal_size;i++) 3738 for(j=0;j<pcbddc->local_primal_size;j++) 3739 printf("(%lf %d %d)\n",coarse_submat_vals[j*pcbddc->local_primal_size+i],pcbddc->local_primal_indices[i],pcbddc->local_primal_indices[j]); 3740 printf("\n");*/ 3741 3742 /* processes partecipating in coarse problem receive matrix data from their friends */ 3743 for(i=0;i<count_recv;i++) ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr); 3744 if(rank_coarse_proc_send_to != MPI_PROC_NULL ) { 3745 send_size=pcbddc->local_primal_size*pcbddc->local_primal_size; 3746 ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr); 3747 } 3748 ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3749 3750 /*if(coarse_color == 0) { 3751 printf("temp_coarse_mat_vals\n"); 3752 for(k=0;k<count_recv;k++){ 3753 printf("---- %d ----\n",ranks_recv[k]); 3754 for(i=0;i<pcbddc->local_primal_sizes[ranks_recv[k]];i++) 3755 for(j=0;j<pcbddc->local_primal_sizes[ranks_recv[k]];j++) 3756 printf("(%lf %d %d)\n",temp_coarse_mat_vals[localdispl2[k]+j*pcbddc->local_primal_sizes[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+i],pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[ranks_recv[k]]+j]); 3757 printf("\n"); 3758 } 3759 }*/ 3760 /* calculate data to insert in coarse mat */ 3761 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3762 PetscMemzero(ins_coarse_mat_vals,ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar)); 3763 3764 PetscMPIInt rr,kk,lps,lpd; 3765 PetscInt row_ind,col_ind; 3766 for(k=0;k<count_recv;k++){ 3767 rr = ranks_recv[k]; 3768 kk = localdispl2[k]; 3769 lps = pcbddc->local_primal_sizes[rr]; 3770 lpd = pcbddc->local_primal_displacements[rr]; 3771 /*printf("Inserting the following indices (received from %d)\n",rr);*/ 3772 for(j=0;j<lps;j++){ 3773 col_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+j]]; 3774 for(i=0;i<lps;i++){ 3775 row_ind=aux_ins_indices[pcbddc->replicated_local_primal_indices[lpd+i]]; 3776 /*printf("%d %d\n",row_ind,col_ind);*/ 3777 ins_coarse_mat_vals[col_ind*ins_local_primal_size+row_ind]+=temp_coarse_mat_vals[kk+j*lps+i]; 3778 } 3779 } 3780 } 3781 ierr = PetscFree(requests);CHKERRQ(ierr); 3782 ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr); 3783 ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); 3784 if(coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); } 3785 3786 /* create local to global mapping needed by coarse MATIS */ 3787 { 3788 IS coarse_IS; 3789 if(coarse_comm != MPI_COMM_NULL ) ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr); 3790 coarse_comm = prec_comm; 3791 active_rank=rank_prec_comm; 3792 ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr); 3793 ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr); 3794 ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr); 3795 } 3796 } 3797 if(pcbddc->coarse_problem_type==PARALLEL_BDDC) { 3798 /* arrays for values insertion */ 3799 ins_local_primal_size = pcbddc->local_primal_size; 3800 ierr = PetscMalloc ( ins_local_primal_size*sizeof(PetscMPIInt),&ins_local_primal_indices);CHKERRQ(ierr); 3801 ierr = PetscMalloc ( ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr); 3802 for(j=0;j<ins_local_primal_size;j++){ 3803 ins_local_primal_indices[j]=pcbddc->local_primal_indices[j]; 3804 for(i=0;i<ins_local_primal_size;i++) ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i]; 3805 } 3806 } 3807 break; 3808 3809 } 3810 3811 case(GATHERS_BDDC): 3812 { 3813 3814 PetscMPIInt mysize,mysize2; 3815 3816 if(rank_prec_comm==active_rank) { 3817 ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr); 3818 pcbddc->replicated_local_primal_values = (PetscScalar*)calloc(pcbddc->replicated_primal_size,sizeof(PetscScalar)); 3819 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr); 3820 ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr); 3821 /* arrays for values insertion */ 3822 for(i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i]; 3823 localdispl2[0]=0; 3824 for(i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1]; 3825 j=0; 3826 for(i=0;i<size_prec_comm;i++) j+=localsizes2[i]; 3827 ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr); 3828 } 3829 3830 mysize=pcbddc->local_primal_size; 3831 mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size; 3832 if(pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){ 3833 ierr = MPI_Gatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr); 3834 ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr); 3835 } else { 3836 ierr = MPI_Allgatherv(&pcbddc->local_primal_indices[0],mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr); 3837 ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr); 3838 } 3839 break; 3840 }/* switch on coarse problem and communications associated with finished */ 3841 } 3842 3843 /* Now create and fill up coarse matrix */ 3844 if( rank_prec_comm == active_rank ) { 3845 if(pcbddc->coarse_problem_type != MULTILEVEL_BDDC) { 3846 ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr); 3847 ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr); 3848 ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr); 3849 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3850 ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3851 ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3852 } else { 3853 Mat matis_coarse_local_mat; 3854 /* remind bs */ 3855 ierr = MatCreateIS(coarse_comm,bs,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr); 3856 ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr); 3857 ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr); 3858 ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr); 3859 ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */ 3860 ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3861 } 3862 ierr = MatSetOption(pcbddc->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3863 if(pcbddc->coarse_communications_type != GATHERS_BDDC) { 3864 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3865 } else { 3866 for(k=0;k<size_prec_comm;k++){ 3867 offset=pcbddc->local_primal_displacements[k]; 3868 offset2=localdispl2[k]; 3869 ins_local_primal_size = pcbddc->local_primal_sizes[k]; 3870 ins_local_primal_indices = &pcbddc->replicated_local_primal_indices[offset]; 3871 ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2]; 3872 ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr); 3873 } 3874 ins_local_primal_indices = 0; 3875 ins_coarse_mat_vals = 0; 3876 } 3877 ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3878 ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 3880 /* PetscViewer view_out; 3881 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,"coarsematfull.m",&view_out);CHKERRQ(ierr); 3882 ierr = PetscViewerSetFormat(view_out,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3883 ierr = MatView(pcbddc->coarse_mat,view_out);CHKERRQ(ierr); 3884 ierr = PetscViewerDestroy(&view_out);CHKERRQ(ierr);*/ 3885 3886 ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3887 /* Preconditioner for coarse problem */ 3888 ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr); 3889 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3890 ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3891 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3892 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3893 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3894 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3895 /* Allow user's customization */ 3896 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr); 3897 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3898 /* Set Up PC for coarse problem BDDC */ 3899 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3900 if(dbg_flag) { 3901 ierr = PetscViewerASCIIPrintf(viewer,"----------------Setting up a new level---------------\n");CHKERRQ(ierr); 3902 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3903 } 3904 ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr); 3905 } 3906 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3907 if(pcbddc->coarse_problem_type == MULTILEVEL_BDDC) { 3908 if(dbg_flag) { 3909 ierr = PetscViewerASCIIPrintf(viewer,"----------------New level set------------------------\n");CHKERRQ(ierr); 3910 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 3911 } 3912 } 3913 } 3914 if(pcbddc->coarse_communications_type == SCATTERS_BDDC) { 3915 IS local_IS,global_IS; 3916 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr); 3917 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr); 3918 ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3919 ierr = ISDestroy(&local_IS);CHKERRQ(ierr); 3920 ierr = ISDestroy(&global_IS);CHKERRQ(ierr); 3921 } 3922 3923 3924 /* Evaluate condition number of coarse problem for cheby (and verbose output if requested) */ 3925 if( pcbddc->coarse_problem_type == MULTILEVEL_BDDC && rank_prec_comm == active_rank ) { 3926 PetscScalar m_one=-1.0; 3927 PetscReal infty_error,lambda_min,lambda_max,kappa_2; 3928 const KSPType check_ksp_type=KSPGMRES; 3929 3930 /* change coarse ksp object to an iterative method suitable for extreme eigenvalues' estimation */ 3931 ierr = KSPSetType(pcbddc->coarse_ksp,check_ksp_type);CHKERRQ(ierr); 3932 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_TRUE);CHKERRQ(ierr); 3933 ierr = KSPSetTolerances(pcbddc->coarse_ksp,1.e-8,1.e-8,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3934 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3935 ierr = VecSetRandom(pcbddc->coarse_rhs,PETSC_NULL);CHKERRQ(ierr); 3936 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3937 ierr = MatMult(pcbddc->coarse_mat,pcbddc->coarse_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3938 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_rhs);CHKERRQ(ierr); 3939 ierr = KSPComputeExtremeSingularValues(pcbddc->coarse_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3940 if(dbg_flag) { 3941 kappa_2=lambda_max/lambda_min; 3942 ierr = KSPGetIterationNumber(pcbddc->coarse_ksp,&k);CHKERRQ(ierr); 3943 ierr = VecAXPY(pcbddc->coarse_rhs,m_one,pcbddc->coarse_vec);CHKERRQ(ierr); 3944 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3945 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem condition number estimated with %d iterations of %s is: % 1.14e\n",k,check_ksp_type,kappa_2);CHKERRQ(ierr); 3946 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr); 3947 ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem infty_error: %1.14e\n",infty_error);CHKERRQ(ierr); 3948 } 3949 /* restore coarse ksp to default values */ 3950 ierr = KSPSetComputeSingularValues(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 3951 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3952 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3953 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr); 3954 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3955 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3956 } 3957 3958 /* free data structures no longer needed */ 3959 if(coarse_ISLG) { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); } 3960 if(ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); } 3961 if(ins_coarse_mat_vals) { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr);} 3962 if(localsizes2) { ierr = PetscFree(localsizes2);CHKERRQ(ierr);} 3963 if(localdispl2) { ierr = PetscFree(localdispl2);CHKERRQ(ierr);} 3964 if(temp_coarse_mat_vals) { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr);} 3965 3966 PetscFunctionReturn(0); 3967 } 3968 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "PCBDDCManageLocalBoundaries" 3971 static PetscErrorCode PCBDDCManageLocalBoundaries(PC pc) 3972 { 3973 3974 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3975 PC_IS *pcis = (PC_IS*)pc->data; 3976 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3977 PCBDDCGraph mat_graph=pcbddc->mat_graph; 3978 PetscInt *queue_in_global_numbering,*is_indices,*auxis; 3979 PetscInt bs,ierr,i,j,s,k,iindex,neumann_bsize,dirichlet_bsize; 3980 PetscInt total_counts,nodes_touched,where_values=1,vertex_size; 3981 PetscMPIInt adapt_interface=0,adapt_interface_reduced=0,NEUMANNCNT=0; 3982 PetscBool same_set; 3983 MPI_Comm interface_comm=((PetscObject)pc)->comm; 3984 PetscBool use_faces=PETSC_FALSE,use_edges=PETSC_FALSE; 3985 const PetscInt *neumann_nodes; 3986 const PetscInt *dirichlet_nodes; 3987 IS used_IS,*custom_ISForDofs; 3988 PetscScalar *array; 3989 PetscScalar *array2; 3990 PetscViewer viewer=pcbddc->dbg_viewer; 3991 3992 PetscFunctionBegin; 3993 /* Setup local adjacency graph */ 3994 mat_graph->nvtxs=pcis->n; 3995 if(!mat_graph->xadj) { NEUMANNCNT = 1; } 3996 ierr = PCBDDCSetupLocalAdjacencyGraph(pc);CHKERRQ(ierr); 3997 i = mat_graph->nvtxs; 3998 ierr = PetscMalloc4(i,PetscInt,&mat_graph->where,i,PetscInt,&mat_graph->count,i+1,PetscInt,&mat_graph->cptr,i,PetscInt,&mat_graph->queue);CHKERRQ(ierr); 3999 ierr = PetscMalloc2(i,PetscInt,&mat_graph->which_dof,i,PetscBool,&mat_graph->touched);CHKERRQ(ierr); 4000 ierr = PetscMalloc(i*sizeof(PetscInt),&queue_in_global_numbering);CHKERRQ(ierr); 4001 ierr = PetscMemzero(mat_graph->where,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4002 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4003 ierr = PetscMemzero(mat_graph->which_dof,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4004 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4005 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4006 4007 /* Setting dofs splitting in mat_graph->which_dof 4008 Get information about dofs' splitting if provided by the user 4009 Otherwise it assumes a constant block size */ 4010 vertex_size=0; 4011 if(!pcbddc->n_ISForDofs) { 4012 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 4013 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 4014 for(i=0;i<bs;i++) { 4015 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 4016 } 4017 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 4018 vertex_size=1; 4019 /* remove my references to IS objects */ 4020 for(i=0;i<bs;i++) { 4021 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 4022 } 4023 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 4024 } 4025 for(i=0;i<pcbddc->n_ISForDofs;i++) { 4026 ierr = ISGetSize(pcbddc->ISForDofs[i],&k);CHKERRQ(ierr); 4027 ierr = ISGetIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4028 for(j=0;j<k;j++) { 4029 mat_graph->which_dof[is_indices[j]]=i; 4030 } 4031 ierr = ISRestoreIndices(pcbddc->ISForDofs[i],(const PetscInt**)&is_indices);CHKERRQ(ierr); 4032 } 4033 /* use mat block size as vertex size if it has not yet set */ 4034 if(!vertex_size) { 4035 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 4036 } 4037 4038 /* count number of neigh per node */ 4039 total_counts=0; 4040 for(i=1;i<pcis->n_neigh;i++){ 4041 s=pcis->n_shared[i]; 4042 total_counts+=s; 4043 for(j=0;j<s;j++){ 4044 mat_graph->count[pcis->shared[i][j]] += 1; 4045 } 4046 } 4047 /* Take into account Neumann data -> it increments number of sharing subdomains for nodes lying on the interface */ 4048 ierr = PCBDDCGetNeumannBoundaries(pc,&used_IS);CHKERRQ(ierr); 4049 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4050 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4051 if(used_IS) { 4052 ierr = ISGetSize(used_IS,&neumann_bsize);CHKERRQ(ierr); 4053 ierr = ISGetIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4054 for(i=0;i<neumann_bsize;i++){ 4055 iindex = neumann_nodes[i]; 4056 if(mat_graph->count[iindex] > NEUMANNCNT && array[iindex]==0.0){ 4057 mat_graph->count[iindex]+=1; 4058 total_counts++; 4059 array[iindex]=array[iindex]+1.0; 4060 } else if(array[iindex]>0.0) { 4061 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Error for neumann nodes provided to BDDC! They must be uniquely listed! Found duplicate node %d\n",iindex); 4062 } 4063 } 4064 } 4065 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4066 /* allocate space for storing the set of neighbours for each node */ 4067 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt*),&mat_graph->neighbours_set);CHKERRQ(ierr); 4068 if(mat_graph->nvtxs) { ierr = PetscMalloc(total_counts*sizeof(PetscInt),&mat_graph->neighbours_set[0]);CHKERRQ(ierr); } 4069 for(i=1;i<mat_graph->nvtxs;i++) mat_graph->neighbours_set[i]=mat_graph->neighbours_set[i-1]+mat_graph->count[i-1]; 4070 ierr = PetscMemzero(mat_graph->count,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4071 for(i=1;i<pcis->n_neigh;i++){ 4072 s=pcis->n_shared[i]; 4073 for(j=0;j<s;j++) { 4074 k=pcis->shared[i][j]; 4075 mat_graph->neighbours_set[k][mat_graph->count[k]] = pcis->neigh[i]; 4076 mat_graph->count[k]+=1; 4077 } 4078 } 4079 /* Check consistency of Neumann nodes */ 4080 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4081 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4082 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4083 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4084 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4085 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4086 /* set -1 fake neighbour to mimic Neumann boundary */ 4087 if(used_IS) { 4088 for(i=0;i<neumann_bsize;i++){ 4089 iindex = neumann_nodes[i]; 4090 if(mat_graph->count[iindex] > NEUMANNCNT){ 4091 if(mat_graph->count[iindex]+1 != (PetscInt)array[iindex]) { 4092 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Neumann nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,mat_graph->count[iindex]+1,(PetscInt)array[iindex]); 4093 } 4094 mat_graph->neighbours_set[iindex][mat_graph->count[iindex]] = -1; 4095 mat_graph->count[iindex]+=1; 4096 } 4097 } 4098 ierr = ISRestoreIndices(used_IS,&neumann_nodes);CHKERRQ(ierr); 4099 } 4100 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4101 /* sort set of sharing subdomains */ 4102 for(i=0;i<mat_graph->nvtxs;i++) { ierr = PetscSortInt(mat_graph->count[i],mat_graph->neighbours_set[i]);CHKERRQ(ierr); } 4103 /* remove interior nodes and dirichlet boundary nodes from the next search into the graph */ 4104 for(i=0;i<mat_graph->nvtxs;i++){mat_graph->touched[i]=PETSC_FALSE;} 4105 nodes_touched=0; 4106 ierr = PCBDDCGetDirichletBoundaries(pc,&used_IS);CHKERRQ(ierr); 4107 ierr = VecSet(pcis->vec2_N,0.0);CHKERRQ(ierr); 4108 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4109 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4110 if(used_IS) { 4111 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4112 if(dirichlet_bsize && matis->pure_neumann) { 4113 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet boundaries are intended to be used with matrices with zeroed rows!\n"); 4114 } 4115 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4116 for(i=0;i<dirichlet_bsize;i++){ 4117 iindex=dirichlet_nodes[i]; 4118 if(mat_graph->count[iindex] && !mat_graph->touched[iindex]) { 4119 if(array[iindex]>0.0) { 4120 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"BDDC cannot have nodes which are marked as Neumann and Dirichlet at the same time! Wrong node %d\n",iindex); 4121 } 4122 mat_graph->touched[iindex]=PETSC_TRUE; 4123 mat_graph->where[iindex]=0; 4124 nodes_touched++; 4125 array2[iindex]=array2[iindex]+1.0; 4126 } 4127 } 4128 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4129 } 4130 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4131 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4132 /* Check consistency of Dirichlet nodes */ 4133 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4134 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4135 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4136 ierr = VecScatterEnd (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4137 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4138 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4139 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4140 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4141 ierr = VecScatterEnd (matis->ctx,pcis->vec2_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4142 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4143 ierr = VecScatterEnd (matis->ctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4144 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4145 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4146 if(used_IS) { 4147 ierr = ISGetSize(used_IS,&dirichlet_bsize);CHKERRQ(ierr); 4148 ierr = ISGetIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4149 for(i=0;i<dirichlet_bsize;i++){ 4150 iindex=dirichlet_nodes[i]; 4151 if(array[iindex]>1.0 && array[iindex]!=array2[iindex] ) { 4152 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_USER,"Dirichlet nodes provided to BDDC must be consistent among neighbours!\nNode %d: number of sharing subdomains %d != number of subdomains for which it is a neumann node %d\n",iindex,(PetscInt)array[iindex],(PetscInt)array2[iindex]); 4153 } 4154 } 4155 ierr = ISRestoreIndices(used_IS,&dirichlet_nodes);CHKERRQ(ierr); 4156 } 4157 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4158 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4159 4160 for(i=0;i<mat_graph->nvtxs;i++){ 4161 if(!mat_graph->count[i]){ /* interior nodes */ 4162 mat_graph->touched[i]=PETSC_TRUE; 4163 mat_graph->where[i]=0; 4164 nodes_touched++; 4165 } 4166 } 4167 mat_graph->ncmps = 0; 4168 i=0; 4169 while(nodes_touched<mat_graph->nvtxs) { 4170 /* find first untouched node in local ordering */ 4171 while(mat_graph->touched[i]) i++; 4172 mat_graph->touched[i]=PETSC_TRUE; 4173 mat_graph->where[i]=where_values; 4174 nodes_touched++; 4175 /* now find all other nodes having the same set of sharing subdomains */ 4176 for(j=i+1;j<mat_graph->nvtxs;j++){ 4177 /* check for same number of sharing subdomains and dof number */ 4178 if(!mat_graph->touched[j] && mat_graph->count[i]==mat_graph->count[j] && mat_graph->which_dof[i] == mat_graph->which_dof[j] ){ 4179 /* check for same set of sharing subdomains */ 4180 same_set=PETSC_TRUE; 4181 for(k=0;k<mat_graph->count[j];k++){ 4182 if(mat_graph->neighbours_set[i][k]!=mat_graph->neighbours_set[j][k]) { 4183 same_set=PETSC_FALSE; 4184 } 4185 } 4186 /* I found a friend of mine */ 4187 if(same_set) { 4188 mat_graph->where[j]=where_values; 4189 mat_graph->touched[j]=PETSC_TRUE; 4190 nodes_touched++; 4191 } 4192 } 4193 } 4194 where_values++; 4195 } 4196 where_values--; if(where_values<0) where_values=0; 4197 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4198 /* Find connected components defined on the shared interface */ 4199 if(where_values) { 4200 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4201 /* For consistency among neughbouring procs, I need to sort (by global ordering) each connected component */ 4202 for(i=0;i<mat_graph->ncmps;i++) { 4203 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4204 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4205 } 4206 } 4207 /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */ 4208 for(i=0;i<where_values;i++) { 4209 /* We are not sure that two connected components will be the same among subdomains sharing a subset of local interface */ 4210 if(mat_graph->where_ncmps[i]>1) { 4211 adapt_interface=1; 4212 break; 4213 } 4214 } 4215 ierr = MPI_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_INT,MPI_LOR,interface_comm);CHKERRQ(ierr); 4216 if(pcbddc->dbg_flag && adapt_interface_reduced) { 4217 ierr = PetscViewerASCIIPrintf(viewer,"Interface adapted\n");CHKERRQ(ierr); 4218 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4219 } 4220 if(where_values && adapt_interface_reduced) { 4221 4222 PetscInt sum_requests=0,my_rank; 4223 PetscInt buffer_size,start_of_recv,size_of_recv,start_of_send; 4224 PetscInt temp_buffer_size,ins_val,global_where_counter; 4225 PetscInt *cum_recv_counts; 4226 PetscInt *where_to_nodes_indices; 4227 PetscInt *petsc_buffer; 4228 PetscMPIInt *recv_buffer; 4229 PetscMPIInt *recv_buffer_where; 4230 PetscMPIInt *send_buffer; 4231 PetscMPIInt size_of_send; 4232 PetscInt *sizes_of_sends; 4233 MPI_Request *send_requests; 4234 MPI_Request *recv_requests; 4235 PetscInt *where_cc_adapt; 4236 PetscInt **temp_buffer; 4237 PetscInt *nodes_to_temp_buffer_indices; 4238 PetscInt *add_to_where; 4239 4240 ierr = MPI_Comm_rank(interface_comm,&my_rank);CHKERRQ(ierr); 4241 ierr = PetscMalloc((where_values+1)*sizeof(PetscInt),&cum_recv_counts);CHKERRQ(ierr); 4242 ierr = PetscMemzero(cum_recv_counts,(where_values+1)*sizeof(PetscInt));CHKERRQ(ierr); 4243 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_to_nodes_indices);CHKERRQ(ierr); 4244 /* first count how many neighbours per connected component I will receive from */ 4245 cum_recv_counts[0]=0; 4246 for(i=1;i<where_values+1;i++){ 4247 j=0; 4248 while(mat_graph->where[j] != i) j++; 4249 where_to_nodes_indices[i-1]=j; 4250 if(mat_graph->neighbours_set[j][0]!=-1) { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]; } /* We don't want sends/recvs_to/from_self -> here I don't count myself */ 4251 else { cum_recv_counts[i]=cum_recv_counts[i-1]+mat_graph->count[j]-1; } 4252 } 4253 buffer_size=2*cum_recv_counts[where_values]+mat_graph->nvtxs; 4254 ierr = PetscMalloc(2*cum_recv_counts[where_values]*sizeof(PetscMPIInt),&recv_buffer_where);CHKERRQ(ierr); 4255 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr); 4256 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&send_requests);CHKERRQ(ierr); 4257 ierr = PetscMalloc(cum_recv_counts[where_values]*sizeof(MPI_Request),&recv_requests);CHKERRQ(ierr); 4258 for(i=0;i<cum_recv_counts[where_values];i++) { 4259 send_requests[i]=MPI_REQUEST_NULL; 4260 recv_requests[i]=MPI_REQUEST_NULL; 4261 } 4262 /* exchange with my neighbours the number of my connected components on the shared interface */ 4263 for(i=0;i<where_values;i++){ 4264 j=where_to_nodes_indices[i]; 4265 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4266 for(;k<mat_graph->count[j];k++){ 4267 ierr = MPI_Isend(&mat_graph->where_ncmps[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4268 ierr = MPI_Irecv(&recv_buffer_where[sum_requests],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4269 sum_requests++; 4270 } 4271 } 4272 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4273 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4274 /* determine the connected component I need to adapt */ 4275 ierr = PetscMalloc(where_values*sizeof(PetscInt),&where_cc_adapt);CHKERRQ(ierr); 4276 ierr = PetscMemzero(where_cc_adapt,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4277 for(i=0;i<where_values;i++){ 4278 for(j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){ 4279 /* The first condition is natural (i.e someone has a different number of cc than me), the second one is just to be safe */ 4280 if( mat_graph->where_ncmps[i]!=recv_buffer_where[j] || mat_graph->where_ncmps[i] > 1 ) { 4281 where_cc_adapt[i]=PETSC_TRUE; 4282 break; 4283 } 4284 } 4285 } 4286 /* now get from neighbours their ccs (in global numbering) and adapt them (in case it is needed) */ 4287 /* first determine how much data to send (size of each queue plus the global indices) and communicate it to neighbours */ 4288 ierr = PetscMalloc(where_values*sizeof(PetscInt),&sizes_of_sends);CHKERRQ(ierr); 4289 ierr = PetscMemzero(sizes_of_sends,where_values*sizeof(PetscInt));CHKERRQ(ierr); 4290 sum_requests=0; 4291 start_of_send=0; 4292 start_of_recv=cum_recv_counts[where_values]; 4293 for(i=0;i<where_values;i++) { 4294 if(where_cc_adapt[i]) { 4295 size_of_send=0; 4296 for(j=i;j<mat_graph->ncmps;j++) { 4297 if(mat_graph->where[mat_graph->queue[mat_graph->cptr[j]]] == i+1) { /* WARNING -> where values goes from 1 to where_values included */ 4298 send_buffer[start_of_send+size_of_send]=mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4299 size_of_send+=1; 4300 for(k=0;k<mat_graph->cptr[j+1]-mat_graph->cptr[j];k++) { 4301 send_buffer[start_of_send+size_of_send+k]=queue_in_global_numbering[mat_graph->cptr[j]+k]; 4302 } 4303 size_of_send=size_of_send+mat_graph->cptr[j+1]-mat_graph->cptr[j]; 4304 } 4305 } 4306 j = where_to_nodes_indices[i]; 4307 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4308 sizes_of_sends[i]=size_of_send; 4309 for(;k<mat_graph->count[j];k++){ 4310 ierr = MPI_Isend(&sizes_of_sends[i],1,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4311 ierr = MPI_Irecv(&recv_buffer_where[sum_requests+start_of_recv],1,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4312 sum_requests++; 4313 } 4314 start_of_send+=size_of_send; 4315 } 4316 } 4317 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4318 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4319 buffer_size=0; 4320 for(k=0;k<sum_requests;k++) { buffer_size+=recv_buffer_where[start_of_recv+k]; } 4321 ierr = PetscMalloc(buffer_size*sizeof(PetscMPIInt),&recv_buffer);CHKERRQ(ierr); 4322 /* now exchange the data */ 4323 start_of_recv=0; 4324 start_of_send=0; 4325 sum_requests=0; 4326 for(i=0;i<where_values;i++) { 4327 if(where_cc_adapt[i]) { 4328 size_of_send = sizes_of_sends[i]; 4329 j = where_to_nodes_indices[i]; 4330 k = (mat_graph->neighbours_set[j][0] == -1 ? 1 : 0); 4331 for(;k<mat_graph->count[j];k++){ 4332 ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,mat_graph->neighbours_set[j][k],(my_rank+1)*mat_graph->count[j],interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr); 4333 size_of_recv=recv_buffer_where[cum_recv_counts[where_values]+sum_requests]; 4334 ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_recv,MPIU_INT,mat_graph->neighbours_set[j][k],(mat_graph->neighbours_set[j][k]+1)*mat_graph->count[j],interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr); 4335 start_of_recv+=size_of_recv; 4336 sum_requests++; 4337 } 4338 start_of_send+=size_of_send; 4339 } 4340 } 4341 ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4342 ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4343 ierr = PetscMalloc(buffer_size*sizeof(PetscInt),&petsc_buffer);CHKERRQ(ierr); 4344 for(k=0;k<start_of_recv;k++) { petsc_buffer[k]=(PetscInt)recv_buffer[k]; } 4345 for(j=0;j<buffer_size;) { 4346 ierr = ISGlobalToLocalMappingApply(matis->mapping,IS_GTOLM_MASK,petsc_buffer[j],&petsc_buffer[j+1],&petsc_buffer[j],&petsc_buffer[j+1]);CHKERRQ(ierr); 4347 k=petsc_buffer[j]+1; 4348 j+=k; 4349 } 4350 sum_requests=cum_recv_counts[where_values]; 4351 start_of_recv=0; 4352 ierr = PetscMalloc(mat_graph->nvtxs*sizeof(PetscInt),&nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4353 global_where_counter=0; 4354 for(i=0;i<where_values;i++){ 4355 if(where_cc_adapt[i]){ 4356 temp_buffer_size=0; 4357 /* find nodes on the shared interface we need to adapt */ 4358 for(j=0;j<mat_graph->nvtxs;j++){ 4359 if(mat_graph->where[j]==i+1) { 4360 nodes_to_temp_buffer_indices[j]=temp_buffer_size; 4361 temp_buffer_size++; 4362 } else { 4363 nodes_to_temp_buffer_indices[j]=-1; 4364 } 4365 } 4366 /* allocate some temporary space */ 4367 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt*),&temp_buffer);CHKERRQ(ierr); 4368 ierr = PetscMalloc(temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt),&temp_buffer[0]);CHKERRQ(ierr); 4369 ierr = PetscMemzero(temp_buffer[0],temp_buffer_size*(cum_recv_counts[i+1]-cum_recv_counts[i])*sizeof(PetscInt));CHKERRQ(ierr); 4370 for(j=1;j<temp_buffer_size;j++){ 4371 temp_buffer[j]=temp_buffer[j-1]+cum_recv_counts[i+1]-cum_recv_counts[i]; 4372 } 4373 /* analyze contributions from neighbouring subdomains for i-th conn comp 4374 temp buffer structure: 4375 supposing part of the interface has dimension 5 (global nodes 0,1,2,3,4) 4376 3 neighs procs with structured connected components: 4377 neigh 0: [0 1 4], [2 3]; (2 connected components) 4378 neigh 1: [0 1], [2 3 4]; (2 connected components) 4379 neigh 2: [0 4], [1], [2 3]; (3 connected components) 4380 tempbuffer (row-oriented) should be filled as: 4381 [ 0, 0, 0; 4382 0, 0, 1; 4383 1, 1, 2; 4384 1, 1, 2; 4385 0, 1, 0; ]; 4386 This way we can simply recover the resulting structure account for possible intersections of ccs among neighs. 4387 The mat_graph->where array will be modified to reproduce the following 4 connected components [0], [1], [2 3], [4]; 4388 */ 4389 for(j=0;j<cum_recv_counts[i+1]-cum_recv_counts[i];j++) { 4390 ins_val=0; 4391 size_of_recv=recv_buffer_where[sum_requests]; /* total size of recv from neighs */ 4392 for(buffer_size=0;buffer_size<size_of_recv;) { /* loop until all data from neighs has been taken into account */ 4393 for(k=1;k<petsc_buffer[buffer_size+start_of_recv]+1;k++) { /* filling properly temp_buffer using data from a single recv */ 4394 temp_buffer[ nodes_to_temp_buffer_indices[ petsc_buffer[ start_of_recv+buffer_size+k ] ] ][j]=ins_val; 4395 } 4396 buffer_size+=k; 4397 ins_val++; 4398 } 4399 start_of_recv+=size_of_recv; 4400 sum_requests++; 4401 } 4402 ierr = PetscMalloc(temp_buffer_size*sizeof(PetscInt),&add_to_where);CHKERRQ(ierr); 4403 ierr = PetscMemzero(add_to_where,temp_buffer_size*sizeof(PetscInt));CHKERRQ(ierr); 4404 for(j=0;j<temp_buffer_size;j++){ 4405 if(!add_to_where[j]){ /* found a new cc */ 4406 global_where_counter++; 4407 add_to_where[j]=global_where_counter; 4408 for(k=j+1;k<temp_buffer_size;k++){ /* check for other nodes in new cc */ 4409 same_set=PETSC_TRUE; 4410 for(s=0;s<cum_recv_counts[i+1]-cum_recv_counts[i];s++){ 4411 if(temp_buffer[j][s]!=temp_buffer[k][s]) { 4412 same_set=PETSC_FALSE; 4413 break; 4414 } 4415 } 4416 if(same_set) add_to_where[k]=global_where_counter; 4417 } 4418 } 4419 } 4420 /* insert new data in where array */ 4421 temp_buffer_size=0; 4422 for(j=0;j<mat_graph->nvtxs;j++){ 4423 if(mat_graph->where[j]==i+1) { 4424 mat_graph->where[j]=where_values+add_to_where[temp_buffer_size]; 4425 temp_buffer_size++; 4426 } 4427 } 4428 ierr = PetscFree(temp_buffer[0]);CHKERRQ(ierr); 4429 ierr = PetscFree(temp_buffer);CHKERRQ(ierr); 4430 ierr = PetscFree(add_to_where);CHKERRQ(ierr); 4431 } 4432 } 4433 ierr = PetscFree(nodes_to_temp_buffer_indices);CHKERRQ(ierr); 4434 ierr = PetscFree(sizes_of_sends);CHKERRQ(ierr); 4435 ierr = PetscFree(send_requests);CHKERRQ(ierr); 4436 ierr = PetscFree(recv_requests);CHKERRQ(ierr); 4437 ierr = PetscFree(petsc_buffer);CHKERRQ(ierr); 4438 ierr = PetscFree(recv_buffer);CHKERRQ(ierr); 4439 ierr = PetscFree(recv_buffer_where);CHKERRQ(ierr); 4440 ierr = PetscFree(send_buffer);CHKERRQ(ierr); 4441 ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr); 4442 ierr = PetscFree(where_to_nodes_indices);CHKERRQ(ierr); 4443 ierr = PetscFree(where_cc_adapt);CHKERRQ(ierr); 4444 /* We are ready to evaluate consistent connected components on each part of the shared interface */ 4445 if(global_where_counter) { 4446 for(i=0;i<mat_graph->nvtxs;i++){ mat_graph->touched[i]=PETSC_FALSE; } 4447 global_where_counter=0; 4448 for(i=0;i<mat_graph->nvtxs;i++){ 4449 if(mat_graph->where[i] && !mat_graph->touched[i]) { 4450 global_where_counter++; 4451 for(j=i+1;j<mat_graph->nvtxs;j++){ 4452 if(!mat_graph->touched[j] && mat_graph->where[j]==mat_graph->where[i]) { 4453 mat_graph->where[j]=global_where_counter; 4454 mat_graph->touched[j]=PETSC_TRUE; 4455 } 4456 } 4457 mat_graph->where[i]=global_where_counter; 4458 mat_graph->touched[i]=PETSC_TRUE; 4459 } 4460 } 4461 where_values=global_where_counter; 4462 } 4463 if(global_where_counter) { 4464 ierr = PetscMemzero(mat_graph->cptr,(mat_graph->nvtxs+1)*sizeof(PetscInt));CHKERRQ(ierr); 4465 ierr = PetscMemzero(mat_graph->queue,mat_graph->nvtxs*sizeof(PetscInt));CHKERRQ(ierr); 4466 ierr = PetscFree(mat_graph->where_ncmps);CHKERRQ(ierr); 4467 ierr = PetscMalloc(where_values*sizeof(PetscMPIInt),&mat_graph->where_ncmps);CHKERRQ(ierr); 4468 ierr = PCBDDCFindConnectedComponents(mat_graph, where_values); 4469 for(i=0;i<mat_graph->ncmps;i++) { 4470 ierr = ISLocalToGlobalMappingApply(matis->mapping,mat_graph->cptr[i+1]-mat_graph->cptr[i],&mat_graph->queue[mat_graph->cptr[i]],&queue_in_global_numbering[mat_graph->cptr[i]]);CHKERRQ(ierr); 4471 ierr = PetscSortIntWithArray(mat_graph->cptr[i+1]-mat_graph->cptr[i],&queue_in_global_numbering[mat_graph->cptr[i]],&mat_graph->queue[mat_graph->cptr[i]]);CHKERRQ(ierr); 4472 } 4473 } 4474 } /* Finished adapting interface */ 4475 PetscInt nfc=0; 4476 PetscInt nec=0; 4477 PetscInt nvc=0; 4478 PetscBool twodim_flag=PETSC_FALSE; 4479 for (i=0; i<mat_graph->ncmps; i++) { 4480 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4481 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ /* 1 neigh Neumann fake included */ 4482 nfc++; 4483 } else { /* note that nec will be zero in 2d */ 4484 nec++; 4485 } 4486 } else { 4487 nvc+=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4488 } 4489 } 4490 4491 if(!nec) { /* we are in a 2d case -> no faces, only edges */ 4492 nec = nfc; 4493 nfc = 0; 4494 twodim_flag = PETSC_TRUE; 4495 } 4496 /* allocate IS arrays for faces, edges. Vertices need a single index set. */ 4497 k=0; 4498 for (i=0; i<mat_graph->ncmps; i++) { 4499 j=mat_graph->cptr[i+1]-mat_graph->cptr[i]; 4500 if( j > k) { 4501 k=j; 4502 } 4503 if(j<=vertex_size) { 4504 k+=vertex_size; 4505 } 4506 } 4507 ierr = PetscMalloc(k*sizeof(PetscInt),&auxis);CHKERRQ(ierr); 4508 4509 if(!pcbddc->vertices_flag && !pcbddc->edges_flag) { 4510 ierr = PetscMalloc(nfc*sizeof(IS),&pcbddc->ISForFaces);CHKERRQ(ierr); 4511 use_faces=PETSC_TRUE; 4512 } 4513 if(!pcbddc->vertices_flag && !pcbddc->faces_flag) { 4514 ierr = PetscMalloc(nec*sizeof(IS),&pcbddc->ISForEdges);CHKERRQ(ierr); 4515 use_edges=PETSC_TRUE; 4516 } 4517 nfc=0; 4518 nec=0; 4519 for (i=0; i<mat_graph->ncmps; i++) { 4520 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] > vertex_size ){ 4521 for(j=0;j<mat_graph->cptr[i+1]-mat_graph->cptr[i];j++) { 4522 auxis[j]=mat_graph->queue[mat_graph->cptr[i]+j]; 4523 } 4524 if(mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]==1){ 4525 if(twodim_flag) { 4526 if(use_edges) { 4527 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4528 nec++; 4529 } 4530 } else { 4531 if(use_faces) { 4532 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForFaces[nfc]);CHKERRQ(ierr); 4533 nfc++; 4534 } 4535 } 4536 } else { 4537 if(use_edges) { 4538 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,auxis,PETSC_COPY_VALUES,&pcbddc->ISForEdges[nec]);CHKERRQ(ierr); 4539 nec++; 4540 } 4541 } 4542 } 4543 } 4544 pcbddc->n_ISForFaces=nfc; 4545 pcbddc->n_ISForEdges=nec; 4546 nvc=0; 4547 if( !pcbddc->constraints_flag ) { 4548 for (i=0; i<mat_graph->ncmps; i++) { 4549 if( mat_graph->cptr[i+1]-mat_graph->cptr[i] <= vertex_size ){ 4550 for( j=mat_graph->cptr[i];j<mat_graph->cptr[i+1];j++) { 4551 auxis[nvc]=mat_graph->queue[j]; 4552 nvc++; 4553 } 4554 } 4555 } 4556 } 4557 /* sort vertex set (by local ordering) */ 4558 ierr = PetscSortInt(nvc,auxis);CHKERRQ(ierr); 4559 ierr = ISCreateGeneral(PETSC_COMM_SELF,nvc,auxis,PETSC_COPY_VALUES,&pcbddc->ISForVertices);CHKERRQ(ierr); 4560 4561 if(pcbddc->dbg_flag) { 4562 4563 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4564 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Details from PCBDDCManageLocalBoundaries for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4565 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4566 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Graph (adjacency structure) of local Neumann mat\n");CHKERRQ(ierr); 4567 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4568 for(i=0;i<mat_graph->nvtxs;i++) { 4569 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Nodes connected to node number %d are %d\n",i,mat_graph->xadj[i+1]-mat_graph->xadj[i]);CHKERRQ(ierr); 4570 for(j=mat_graph->xadj[i];j<mat_graph->xadj[i+1];j++){ 4571 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->adjncy[j]);CHKERRQ(ierr); 4572 } 4573 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4574 }*/ 4575 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Matrix graph has %d connected components", mat_graph->ncmps);CHKERRQ(ierr); 4576 for(i=0;i<mat_graph->ncmps;i++) { 4577 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\nDetails for connected component number %02d: size %04d, count %01d. Nodes follow.\n", 4578 i,mat_graph->cptr[i+1]-mat_graph->cptr[i],mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]);CHKERRQ(ierr); 4579 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"subdomains: "); 4580 for (j=0;j<mat_graph->count[mat_graph->queue[mat_graph->cptr[i]]]; j++) { 4581 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d ",mat_graph->neighbours_set[mat_graph->queue[mat_graph->cptr[i]]][j]); 4582 } 4583 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n"); 4584 for (j=mat_graph->cptr[i]; j<mat_graph->cptr[i+1]; j++){ 4585 /* ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d (%d), ",queue_in_global_numbering[j],mat_graph->queue[j]);CHKERRQ(ierr); */ 4586 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%d, ",mat_graph->queue[j]);CHKERRQ(ierr); 4587 } 4588 } 4589 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n--------------------------------------------------------------\n");CHKERRQ(ierr); 4590 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local vertices\n",PetscGlobalRank,nvc);CHKERRQ(ierr); 4591 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local faces\n",PetscGlobalRank,nfc);CHKERRQ(ierr); 4592 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d detected %02d local edges\n",PetscGlobalRank,nec);CHKERRQ(ierr); 4593 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 4594 } 4595 4596 ierr = PetscFree(queue_in_global_numbering);CHKERRQ(ierr); 4597 ierr = PetscFree(auxis);CHKERRQ(ierr); 4598 PetscFunctionReturn(0); 4599 4600 } 4601 4602 /* -------------------------------------------------------------------------- */ 4603 4604 /* The following code has been adapted from function IsConnectedSubdomain contained 4605 in source file contig.c of METIS library (version 5.0.1) 4606 It finds connected components of each partition labeled from 1 to n_dist */ 4607 4608 #undef __FUNCT__ 4609 #define __FUNCT__ "PCBDDCFindConnectedComponents" 4610 static PetscErrorCode PCBDDCFindConnectedComponents(PCBDDCGraph graph, PetscInt n_dist ) 4611 { 4612 PetscInt i, j, k, nvtxs, first, last, nleft, ncmps,pid,cum_queue,n,ncmps_pid; 4613 PetscInt *xadj, *adjncy, *where, *queue; 4614 PetscInt *cptr; 4615 PetscBool *touched; 4616 4617 PetscFunctionBegin; 4618 4619 nvtxs = graph->nvtxs; 4620 xadj = graph->xadj; 4621 adjncy = graph->adjncy; 4622 where = graph->where; 4623 touched = graph->touched; 4624 queue = graph->queue; 4625 cptr = graph->cptr; 4626 4627 for (i=0; i<nvtxs; i++) 4628 touched[i] = PETSC_FALSE; 4629 4630 cum_queue=0; 4631 ncmps=0; 4632 4633 for(n=0; n<n_dist; n++) { 4634 pid = n+1; /* partition labeled by 0 is discarded */ 4635 nleft = 0; 4636 for (i=0; i<nvtxs; i++) { 4637 if (where[i] == pid) 4638 nleft++; 4639 } 4640 for (i=0; i<nvtxs; i++) { 4641 if (where[i] == pid) 4642 break; 4643 } 4644 touched[i] = PETSC_TRUE; 4645 queue[cum_queue] = i; 4646 first = 0; last = 1; 4647 cptr[ncmps] = cum_queue; /* This actually points to queue */ 4648 ncmps_pid = 0; 4649 while (first != nleft) { 4650 if (first == last) { /* Find another starting vertex */ 4651 cptr[++ncmps] = first+cum_queue; 4652 ncmps_pid++; 4653 for (i=0; i<nvtxs; i++) { 4654 if (where[i] == pid && !touched[i]) 4655 break; 4656 } 4657 queue[cum_queue+last] = i; 4658 last++; 4659 touched[i] = PETSC_TRUE; 4660 } 4661 i = queue[cum_queue+first]; 4662 first++; 4663 for (j=xadj[i]; j<xadj[i+1]; j++) { 4664 k = adjncy[j]; 4665 if (where[k] == pid && !touched[k]) { 4666 queue[cum_queue+last] = k; 4667 last++; 4668 touched[k] = PETSC_TRUE; 4669 } 4670 } 4671 } 4672 cptr[++ncmps] = first+cum_queue; 4673 ncmps_pid++; 4674 cum_queue=cptr[ncmps]; 4675 graph->where_ncmps[n] = ncmps_pid; 4676 } 4677 graph->ncmps = ncmps; 4678 4679 PetscFunctionReturn(0); 4680 } 4681