1 /* 2 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 3 In this version each processor may intersect multiple subdomains and any subdomain may 4 intersect multiple processors. Intersections of subdomains with processors are called *local 5 subdomains*. 6 7 N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM()) 8 n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains()) 9 nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM()) 10 */ 11 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 12 #include <petscdm.h> 13 14 typedef struct { 15 PetscInt N,n,nmax; 16 PetscInt overlap; /* overlap requested by user */ 17 PCGASMType type; /* use reduced interpolation, restriction or both */ 18 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 19 PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */ 20 PetscBool sort_indices; /* flag to sort subdomain indices */ 21 PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */ 22 PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */ 23 PetscBool hierarchicalpartitioning; 24 IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */ 25 IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */ 26 KSP *ksp; /* linear solvers for each subdomain */ 27 Mat *pmat; /* subdomain block matrices */ 28 Vec gx,gy; /* Merged work vectors */ 29 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 30 VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */ 31 VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */ 32 VecScatter pctoouter; 33 IS permutationIS; 34 Mat permutationP; 35 Mat pcmat; 36 Vec pcx,pcy; 37 } PC_GASM; 38 39 static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation) 40 { 41 PC_GASM *osm = (PC_GASM*)pc->data; 42 PetscInt i; 43 PetscErrorCode ierr; 44 45 PetscFunctionBegin; 46 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */ 47 ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr); 48 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr); 49 for (i = 0; i < osm->n; ++i) (*permutation)[i] = i; 50 ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer) 55 { 56 PC_GASM *osm = (PC_GASM*)pc->data; 57 PetscInt j,nidx; 58 const PetscInt *idx; 59 PetscViewer sviewer; 60 char *cidx; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n); 65 /* Inner subdomains. */ 66 ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr); 67 /* 68 No more than 15 characters per index plus a space. 69 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 70 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 71 For nidx == 0, the whole string 16 '\0'. 72 */ 73 #define len 16*(nidx+1)+1 74 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr); 75 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr); 76 #undef len 77 ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr); 78 for (j = 0; j < nidx; ++j) { 79 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 80 } 81 ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr); 82 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 83 ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr); 84 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 85 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 86 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 87 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 88 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 89 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 90 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 91 ierr = PetscFree(cidx);CHKERRQ(ierr); 92 /* Outer subdomains. */ 93 ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr); 94 /* 95 No more than 15 characters per index plus a space. 96 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 97 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 98 For nidx == 0, the whole string 16 '\0'. 99 */ 100 #define len 16*(nidx+1)+1 101 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr); 102 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr); 103 #undef len 104 ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr); 105 for (j = 0; j < nidx; ++j) { 106 ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr); 107 } 108 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 109 ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr); 110 ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr); 111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 112 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 113 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 114 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 115 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 116 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 117 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 118 ierr = PetscFree(cidx);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 123 { 124 PC_GASM *osm = (PC_GASM*)pc->data; 125 const char *prefix; 126 char fname[PETSC_MAX_PATH_LEN+1]; 127 PetscInt l, d, count; 128 PetscBool doprint,found; 129 PetscViewer viewer, sviewer = NULL; 130 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */ 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 135 doprint = PETSC_FALSE; 136 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr); 137 if (!doprint) PetscFunctionReturn(0); 138 ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr); 139 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 140 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr); 141 /* 142 Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer: 143 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm. 144 */ 145 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr); 146 l = 0; 147 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr); 148 for (count = 0; count < osm->N; ++count) { 149 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 150 if (l<osm->n) { 151 d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 152 if (numbering[d] == count) { 153 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 154 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr); 155 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 156 ++l; 157 } 158 } 159 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 160 } 161 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr); 162 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 167 { 168 PC_GASM *osm = (PC_GASM*)pc->data; 169 const char *prefix; 170 PetscErrorCode ierr; 171 PetscMPIInt rank, size; 172 PetscInt bsz; 173 PetscBool iascii,view_subdomains=PETSC_FALSE; 174 PetscViewer sviewer; 175 PetscInt count, l; 176 char overlap[256] = "user-defined overlap"; 177 char gsubdomains[256] = "unknown total number of subdomains"; 178 char msubdomains[256] = "unknown max number of local subdomains"; 179 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */ 180 181 PetscFunctionBegin; 182 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRQ(ierr); 183 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRQ(ierr); 184 185 if (osm->overlap >= 0) { 186 ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr); 187 } 188 if (osm->N != PETSC_DETERMINE) { 189 ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr); 190 } 191 if (osm->nmax != PETSC_DETERMINE) { 192 ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr); 193 } 194 195 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 196 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr); 197 198 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 199 if (iascii) { 200 /* 201 Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer: 202 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed 203 collectively on the comm. 204 */ 205 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr); 206 ierr = PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 207 ierr = PetscViewerASCIIPrintf(viewer," %s\n",overlap);CHKERRQ(ierr); 208 ierr = PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);CHKERRQ(ierr); 209 ierr = PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);CHKERRQ(ierr); 210 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 211 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr); 212 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 213 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 214 /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */ 215 ierr = PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");CHKERRQ(ierr); 216 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 217 ierr = PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 218 /* Make sure that everybody waits for the banner to be printed. */ 219 ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 220 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 221 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr); 222 l = 0; 223 for (count = 0; count < osm->N; ++count) { 224 PetscMPIInt srank, ssize; 225 if (l<osm->n) { 226 PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 227 if (numbering[d] == count) { 228 ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRQ(ierr); 229 ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRQ(ierr); 230 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 231 ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr); 232 ierr = PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr); 233 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 234 if (view_subdomains) { 235 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr); 236 } 237 if (!pc->setupcalled) { 238 ierr = PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr); 239 } else { 240 ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr); 241 } 242 ierr = PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 243 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 244 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 245 ++l; 246 } 247 } 248 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 249 } 250 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr); 251 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 252 } 253 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 254 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 255 PetscFunctionReturn(0); 256 } 257 258 PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]); 259 260 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc) 261 { 262 PC_GASM *osm = (PC_GASM*)pc->data; 263 MatPartitioning part; 264 MPI_Comm comm; 265 PetscMPIInt size; 266 PetscInt nlocalsubdomains,fromrows_localsize; 267 IS partitioning,fromrows,isn; 268 Vec outervec; 269 PetscErrorCode ierr; 270 271 PetscFunctionBegin; 272 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 273 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 274 /* we do not need a hierarchical partitioning when 275 * the total number of subdomains is consistent with 276 * the number of MPI tasks. 277 * For the following cases, we do not need to use HP 278 * */ 279 if(osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0); 280 if(size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size); 281 nlocalsubdomains = size/osm->N; 282 osm->n = 1; 283 ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr); 284 ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr); 285 ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr); 286 ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr); 287 ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr); 288 ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr); 289 /* get new processor owner number of each vertex */ 290 ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr); 291 ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr); 292 ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr); 293 ierr = ISDestroy(&isn);CHKERRQ(ierr); 294 ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr); 295 ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr); 296 ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr); 297 ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr); 298 ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr); 299 ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr); 300 ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr); 301 ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr); 302 osm->permutationIS = fromrows; 303 osm->pcmat = pc->pmat; 304 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr); 305 pc->pmat = osm->permutationP; 306 ierr = VecDestroy(&outervec);CHKERRQ(ierr); 307 ierr = ISDestroy(&fromrows);CHKERRQ(ierr); 308 ierr = ISDestroy(&partitioning);CHKERRQ(ierr); 309 osm->n = PETSC_DETERMINE; 310 PetscFunctionReturn(0); 311 } 312 313 static PetscErrorCode PCSetUp_GASM(PC pc) 314 { 315 PC_GASM *osm = (PC_GASM*)pc->data; 316 PetscErrorCode ierr; 317 PetscInt i,nInnerIndices,nTotalInnerIndices; 318 PetscMPIInt rank, size; 319 MatReuse scall = MAT_REUSE_MATRIX; 320 KSP ksp; 321 PC subpc; 322 const char *prefix,*pprefix; 323 Vec x,y; 324 PetscInt oni; /* Number of indices in the i-th local outer subdomain. */ 325 const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */ 326 PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */ 327 PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */ 328 IS gois; /* Disjoint union the global indices of outer subdomains. */ 329 IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */ 330 PetscScalar *gxarray, *gyarray; 331 PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */ 332 PetscInt num_subdomains = 0; 333 DM *subdomain_dm = NULL; 334 char **subdomain_names = NULL; 335 PetscInt *numbering; 336 337 338 PetscFunctionBegin; 339 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 340 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 341 if (!pc->setupcalled) { 342 /* use a hierarchical partitioning */ 343 if(osm->hierarchicalpartitioning){ 344 ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr); 345 } 346 if (osm->n == PETSC_DETERMINE) { 347 if (osm->N != PETSC_DETERMINE) { 348 /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */ 349 ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr); 350 } else if (osm->dm_subdomains && pc->dm) { 351 /* try pc->dm next, if allowed */ 352 PetscInt d; 353 IS *inner_subdomain_is, *outer_subdomain_is; 354 ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr); 355 if (num_subdomains) { 356 ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr); 357 } 358 for (d = 0; d < num_subdomains; ++d) { 359 if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);} 360 if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);} 361 } 362 ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr); 363 ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr); 364 } else { 365 /* still no subdomains; use one per processor */ 366 osm->nmax = osm->n = 1; 367 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 368 osm->N = size; 369 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr); 370 } 371 } 372 if (!osm->iis) { 373 /* 374 osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied. 375 We create the requisite number of local inner subdomains and then expand them into 376 out subdomains, if necessary. 377 */ 378 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr); 379 } 380 if (!osm->ois) { 381 /* 382 Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap 383 has been requested, copy the inner subdomains over so they can be modified. 384 */ 385 ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr); 386 for (i=0; i<osm->n; ++i) { 387 if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */ 388 ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr); 389 ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr); 390 } else { 391 ierr = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr); 392 osm->ois[i] = osm->iis[i]; 393 } 394 } 395 if (osm->overlap>0 && osm->N>1) { 396 /* Extend the "overlapping" regions by a number of steps */ 397 ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr); 398 } 399 } 400 401 /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */ 402 if (osm->nmax == PETSC_DETERMINE) { 403 PetscMPIInt inwork,outwork; 404 /* determine global number of subdomains and the max number of local subdomains */ 405 inwork = osm->n; 406 ierr = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 407 osm->nmax = outwork; 408 } 409 if (osm->N == PETSC_DETERMINE) { 410 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */ 411 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr); 412 } 413 414 415 if (osm->sort_indices) { 416 for (i=0; i<osm->n; i++) { 417 ierr = ISSort(osm->ois[i]);CHKERRQ(ierr); 418 ierr = ISSort(osm->iis[i]);CHKERRQ(ierr); 419 } 420 } 421 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 422 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); 423 424 /* 425 Merge the ISs, create merged vectors and restrictions. 426 */ 427 /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */ 428 on = 0; 429 for (i=0; i<osm->n; i++) { 430 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 431 on += oni; 432 } 433 ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr); 434 on = 0; 435 /* Merge local indices together */ 436 for (i=0; i<osm->n; i++) { 437 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 438 ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 439 ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr); 440 ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 441 on += oni; 442 } 443 ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr); 444 nTotalInnerIndices = 0; 445 for(i=0; i<osm->n; i++){ 446 ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr); 447 nTotalInnerIndices += nInnerIndices; 448 } 449 ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr); 450 ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 451 452 ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr); 453 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 454 ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr); 455 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr); 456 /* gois might indices not on local */ 457 ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr); 458 ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr); 459 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr); 460 ierr = VecDestroy(&x);CHKERRQ(ierr); 461 ierr = ISDestroy(&gois);CHKERRQ(ierr); 462 463 /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */ 464 { 465 PetscInt ini; /* Number of indices the i-th a local inner subdomain. */ 466 PetscInt in; /* Number of indices in the disjoint union of local inner subdomains. */ 467 PetscInt *iidx; /* Global indices in the merged local inner subdomain. */ 468 PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 469 IS giis; /* IS for the disjoint union of inner subdomains. */ 470 IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 471 PetscScalar *array; 472 const PetscInt *indices; 473 PetscInt k; 474 on = 0; 475 for (i=0; i<osm->n; i++) { 476 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 477 on += oni; 478 } 479 ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr); 480 ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr); 481 ierr = VecGetArray(y,&array);CHKERRQ(ierr); 482 /* set communicator id to determine where overlap is */ 483 in = 0; 484 for (i=0; i<osm->n; i++) { 485 ierr = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr); 486 for (k = 0; k < ini; ++k){ 487 array[in+k] = numbering[i]; 488 } 489 in += ini; 490 } 491 ierr = VecRestoreArray(y,&array);CHKERRQ(ierr); 492 ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 494 ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr); 495 ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr); 496 on = 0; 497 in = 0; 498 for (i=0; i<osm->n; i++) { 499 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 500 ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr); 501 for (k=0; k<oni; k++) { 502 /* skip overlapping indices to get inner domain */ 503 if(PetscRealPart(array[on+k]) != numbering[i]) continue; 504 iidx[in] = indices[k]; 505 ioidx[in++] = gostart+on+k; 506 } 507 ierr = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr); 508 on += oni; 509 } 510 ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr); 511 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr); 512 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr); 513 ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr); 514 ierr = VecDestroy(&y);CHKERRQ(ierr); 515 ierr = ISDestroy(&giis);CHKERRQ(ierr); 516 ierr = ISDestroy(&giois);CHKERRQ(ierr); 517 } 518 ierr = ISDestroy(&goid);CHKERRQ(ierr); 519 ierr = PetscFree(numbering);CHKERRQ(ierr); 520 521 /* Create the subdomain work vectors. */ 522 ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr); 523 ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr); 524 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 525 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 526 for (i=0, on=0; i<osm->n; ++i, on += oni) { 527 PetscInt oNi; 528 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 529 /* on a sub communicator */ 530 ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr); 531 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr); 532 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr); 533 } 534 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 535 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 536 /* Create the subdomain solvers */ 537 ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr); 538 for (i=0; i<osm->n; i++) { 539 char subprefix[PETSC_MAX_PATH_LEN+1]; 540 ierr = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr); 541 ierr = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr); 542 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr); 543 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 544 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 545 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */ 546 if (subdomain_dm) { 547 ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr); 548 ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr); 549 } 550 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 551 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 552 if (subdomain_names && subdomain_names[i]) { 553 ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr); 554 ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr); 555 ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr); 556 } 557 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 558 osm->ksp[i] = ksp; 559 } 560 ierr = PetscFree(subdomain_dm);CHKERRQ(ierr); 561 ierr = PetscFree(subdomain_names);CHKERRQ(ierr); 562 scall = MAT_INITIAL_MATRIX; 563 564 } else { /* if (pc->setupcalled) */ 565 /* 566 Destroy the submatrices from the previous iteration 567 */ 568 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 569 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 570 scall = MAT_INITIAL_MATRIX; 571 } 572 if(osm->permutationIS){ 573 ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr); 574 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr); 575 osm->pcmat = pc->pmat; 576 pc->pmat = osm->permutationP; 577 } 578 579 } 580 581 582 /* 583 Extract out the submatrices. 584 */ 585 if (size > 1) { 586 ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 587 } else { 588 ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 589 } 590 if (scall == MAT_INITIAL_MATRIX) { 591 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 592 for (i=0; i<osm->n; i++) { 593 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr); 594 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 595 } 596 } 597 598 /* Return control to the user so that the submatrices can be modified (e.g., to apply 599 different boundary conditions for the submatrices than for the global problem) */ 600 ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 601 602 /* 603 Loop over submatrices putting them into local ksps 604 */ 605 for (i=0; i<osm->n; i++) { 606 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr); 607 if (!pc->setupcalled) { 608 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 609 } 610 } 611 if(osm->pcmat){ 612 ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr); 613 pc->pmat = osm->pcmat; 614 osm->pcmat = NULL; 615 } 616 PetscFunctionReturn(0); 617 } 618 619 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 620 { 621 PC_GASM *osm = (PC_GASM*)pc->data; 622 PetscErrorCode ierr; 623 PetscInt i; 624 625 PetscFunctionBegin; 626 for (i=0; i<osm->n; i++) { 627 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout) 633 { 634 PC_GASM *osm = (PC_GASM*)pc->data; 635 PetscErrorCode ierr; 636 PetscInt i; 637 Vec x,y; 638 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 639 640 PetscFunctionBegin; 641 if (osm->pctoouter) { 642 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 643 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 644 x = osm->pcx; 645 y = osm->pcy; 646 } else { 647 x = xin; 648 y = yout; 649 } 650 /* 651 support for limiting the restriction or interpolation only to the inner 652 subdomain values (leaving the other values 0). 653 */ 654 if (!(osm->type & PC_GASM_RESTRICT)) { 655 /* have to zero the work RHS since scatter may leave some slots empty */ 656 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 657 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 658 } else { 659 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 660 } 661 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 662 if (!(osm->type & PC_GASM_RESTRICT)) { 663 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 664 } else { 665 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 666 } 667 /* do the subdomain solves */ 668 for (i=0; i<osm->n; ++i) { 669 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 670 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr); 671 } 672 /* do we need to zero y? */ 673 ierr = VecZeroEntries(y);CHKERRQ(ierr); 674 if (!(osm->type & PC_GASM_INTERPOLATE)) { 675 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 676 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 677 } else { 678 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 679 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 680 } 681 if (osm->pctoouter) { 682 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 683 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 684 } 685 PetscFunctionReturn(0); 686 } 687 688 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout) 689 { 690 PC_GASM *osm = (PC_GASM*)pc->data; 691 Mat X,Y,O=NULL,Z,W; 692 Vec x,y; 693 PetscInt i,m,M,N; 694 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 699 ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr); 700 if (osm->pctoouter) { 701 ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr); 702 ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr); 703 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr); 704 for (i = 0; i < N; ++i) { 705 ierr = MatDenseGetColumnVecRead(Xin,i,&x); 706 ierr = MatDenseGetColumnVecWrite(O,i,&y); 707 ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 708 ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 709 ierr = MatDenseRestoreColumnVecWrite(O,i,&y); 710 ierr = MatDenseRestoreColumnVecRead(Xin,i,&x); 711 } 712 X = Y = O; 713 } else { 714 X = Xin; 715 Y = Yout; 716 } 717 /* 718 support for limiting the restriction or interpolation only to the inner 719 subdomain values (leaving the other values 0). 720 */ 721 ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr); 722 ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr); 723 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr); 724 for (i = 0; i < N; ++i) { 725 ierr = MatDenseGetColumnVecRead(X,i,&x); 726 ierr = MatDenseGetColumnVecWrite(Z,i,&y); 727 if (!(osm->type & PC_GASM_RESTRICT)) { 728 /* have to zero the work RHS since scatter may leave some slots empty */ 729 ierr = VecZeroEntries(y);CHKERRQ(ierr); 730 ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 731 ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 732 } else { 733 ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 734 ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 735 } 736 ierr = MatDenseRestoreColumnVecWrite(Z,i,&y); 737 ierr = MatDenseRestoreColumnVecRead(X,i,&x); 738 } 739 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr); 740 ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 741 ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 742 ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 743 /* do the subdomain solve */ 744 ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr); 745 ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr); 746 ierr = MatDestroy(&Z);CHKERRQ(ierr); 747 /* do we need to zero y? */ 748 ierr = MatZeroEntries(Y);CHKERRQ(ierr); 749 for (i = 0; i < N; ++i) { 750 ierr = MatDenseGetColumnVecWrite(Y,i,&y); 751 ierr = MatDenseGetColumnVecRead(W,i,&x); 752 if (!(osm->type & PC_GASM_INTERPOLATE)) { 753 ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 754 ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 755 } else { 756 ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 757 ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 758 } 759 ierr = MatDenseRestoreColumnVecRead(W,i,&x); 760 if (osm->pctoouter) { 761 ierr = MatDenseGetColumnVecWrite(Yout,i,&x); 762 ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763 ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 764 ierr = MatDenseRestoreColumnVecRead(Yout,i,&x); 765 } 766 ierr = MatDenseRestoreColumnVecWrite(Y,i,&y); 767 } 768 ierr = MatDestroy(&W);CHKERRQ(ierr); 769 ierr = MatDestroy(&O);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } 772 773 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout) 774 { 775 PC_GASM *osm = (PC_GASM*)pc->data; 776 PetscErrorCode ierr; 777 PetscInt i; 778 Vec x,y; 779 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 780 781 PetscFunctionBegin; 782 if(osm->pctoouter){ 783 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 784 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 785 x = osm->pcx; 786 y = osm->pcy; 787 }else{ 788 x = xin; 789 y = yout; 790 } 791 /* 792 Support for limiting the restriction or interpolation to only local 793 subdomain values (leaving the other values 0). 794 795 Note: these are reversed from the PCApply_GASM() because we are applying the 796 transpose of the three terms 797 */ 798 if (!(osm->type & PC_GASM_INTERPOLATE)) { 799 /* have to zero the work RHS since scatter may leave some slots empty */ 800 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 801 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 802 } else { 803 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 804 } 805 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 806 if (!(osm->type & PC_GASM_INTERPOLATE)) { 807 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 808 } else { 809 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 810 } 811 /* do the local solves */ 812 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 813 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 814 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr); 815 } 816 ierr = VecZeroEntries(y);CHKERRQ(ierr); 817 if (!(osm->type & PC_GASM_RESTRICT)) { 818 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 819 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 820 } else { 821 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 822 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 823 } 824 if(osm->pctoouter){ 825 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 static PetscErrorCode PCReset_GASM(PC pc) 832 { 833 PC_GASM *osm = (PC_GASM*)pc->data; 834 PetscErrorCode ierr; 835 PetscInt i; 836 837 PetscFunctionBegin; 838 if (osm->ksp) { 839 for (i=0; i<osm->n; i++) { 840 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 841 } 842 } 843 if (osm->pmat) { 844 if (osm->n > 0) { 845 PetscMPIInt size; 846 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 847 if (size > 1) { 848 /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */ 849 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 850 } else { 851 ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 852 } 853 } 854 } 855 if (osm->x) { 856 for (i=0; i<osm->n; i++) { 857 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 858 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 859 } 860 } 861 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 862 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 863 864 ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr); 865 ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr); 866 if (!osm->user_subdomains) { 867 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr); 868 osm->N = PETSC_DETERMINE; 869 osm->nmax = PETSC_DETERMINE; 870 } 871 if(osm->pctoouter){ 872 ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr); 873 } 874 if(osm->permutationIS){ 875 ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr); 876 } 877 if(osm->pcx){ 878 ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr); 879 } 880 if(osm->pcy){ 881 ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr); 882 } 883 if(osm->permutationP){ 884 ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr); 885 } 886 if(osm->pcmat){ 887 ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr); 888 } 889 PetscFunctionReturn(0); 890 } 891 892 static PetscErrorCode PCDestroy_GASM(PC pc) 893 { 894 PC_GASM *osm = (PC_GASM*)pc->data; 895 PetscErrorCode ierr; 896 PetscInt i; 897 898 PetscFunctionBegin; 899 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 900 /* PCReset will not destroy subdomains, if user_subdomains is true. */ 901 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr); 902 if (osm->ksp) { 903 for (i=0; i<osm->n; i++) { 904 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 905 } 906 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 907 } 908 ierr = PetscFree(osm->x);CHKERRQ(ierr); 909 ierr = PetscFree(osm->y);CHKERRQ(ierr); 910 ierr = PetscFree(pc->data);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc) 915 { 916 PC_GASM *osm = (PC_GASM*)pc->data; 917 PetscErrorCode ierr; 918 PetscInt blocks,ovl; 919 PetscBool flg; 920 PCGASMType gasmtype; 921 922 PetscFunctionBegin; 923 ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr); 924 ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr); 925 ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr); 926 if (flg) { 927 ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); 928 } 929 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 930 if (flg) { 931 ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); 932 osm->dm_subdomains = PETSC_FALSE; 933 } 934 flg = PETSC_FALSE; 935 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 936 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);} 937 ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr); 938 ierr = PetscOptionsTail();CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 /*------------------------------------------------------------------------------------*/ 943 944 /*@ 945 PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the 946 communicator. 947 Logically collective on pc 948 949 Input Parameters: 950 + pc - the preconditioner 951 - N - total number of subdomains 952 953 954 Level: beginner 955 956 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap() 957 PCGASMCreateSubdomains2D() 958 @*/ 959 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 960 { 961 PC_GASM *osm = (PC_GASM*)pc->data; 962 PetscMPIInt size,rank; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N); 967 if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp()."); 968 969 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 970 osm->ois = osm->iis = NULL; 971 972 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 973 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 974 osm->N = N; 975 osm->n = PETSC_DETERMINE; 976 osm->nmax = PETSC_DETERMINE; 977 osm->dm_subdomains = PETSC_FALSE; 978 PetscFunctionReturn(0); 979 } 980 981 static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[]) 982 { 983 PC_GASM *osm = (PC_GASM*)pc->data; 984 PetscErrorCode ierr; 985 PetscInt i; 986 987 PetscFunctionBegin; 988 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n); 989 if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp()."); 990 991 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 992 osm->iis = osm->ois = NULL; 993 osm->n = n; 994 osm->N = PETSC_DETERMINE; 995 osm->nmax = PETSC_DETERMINE; 996 if (ois) { 997 ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr); 998 for (i=0; i<n; i++) { 999 ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr); 1000 osm->ois[i] = ois[i]; 1001 } 1002 /* 1003 Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(), 1004 it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1. 1005 */ 1006 osm->overlap = -1; 1007 /* inner subdomains must be provided */ 1008 if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n"); 1009 }/* end if */ 1010 if (iis) { 1011 ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr); 1012 for (i=0; i<n; i++) { 1013 ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr); 1014 osm->iis[i] = iis[i]; 1015 } 1016 if (!ois) { 1017 osm->ois = NULL; 1018 /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */ 1019 } 1020 } 1021 if (PetscDefined(USE_DEBUG)) { 1022 PetscInt j,rstart,rend,*covered,lsize; 1023 const PetscInt *indices; 1024 /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */ 1025 ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr); 1026 ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr); 1027 /* check if the current processor owns indices from others */ 1028 for (i=0; i<n; i++) { 1029 ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr); 1030 ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr); 1031 for (j=0; j<lsize; j++) { 1032 if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]); 1033 else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]); 1034 else covered[indices[j]-rstart] = 1; 1035 } 1036 ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr); 1037 } 1038 /* check if we miss any indices */ 1039 for (i=rstart; i<rend; i++) { 1040 if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i); 1041 } 1042 ierr = PetscFree(covered);CHKERRQ(ierr); 1043 } 1044 if (iis) osm->user_subdomains = PETSC_TRUE; 1045 PetscFunctionReturn(0); 1046 } 1047 1048 static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 1049 { 1050 PC_GASM *osm = (PC_GASM*)pc->data; 1051 1052 PetscFunctionBegin; 1053 if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 1054 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 1055 if (!pc->setupcalled) osm->overlap = ovl; 1056 PetscFunctionReturn(0); 1057 } 1058 1059 static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 1060 { 1061 PC_GASM *osm = (PC_GASM*)pc->data; 1062 1063 PetscFunctionBegin; 1064 osm->type = type; 1065 osm->type_set = PETSC_TRUE; 1066 PetscFunctionReturn(0); 1067 } 1068 1069 static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 1070 { 1071 PC_GASM *osm = (PC_GASM*)pc->data; 1072 1073 PetscFunctionBegin; 1074 osm->sort_indices = doSort; 1075 PetscFunctionReturn(0); 1076 } 1077 1078 /* 1079 FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed. 1080 In particular, it would upset the global subdomain number calculation. 1081 */ 1082 static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 1083 { 1084 PC_GASM *osm = (PC_GASM*)pc->data; 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 1089 1090 if (n) *n = osm->n; 1091 if (first) { 1092 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1093 *first -= osm->n; 1094 } 1095 if (ksp) { 1096 /* Assume that local solves are now different; not necessarily 1097 true, though! This flag is used only for PCView_GASM() */ 1098 *ksp = osm->ksp; 1099 osm->same_subdomain_solvers = PETSC_FALSE; 1100 } 1101 PetscFunctionReturn(0); 1102 } /* PCGASMGetSubKSP_GASM() */ 1103 1104 /*@C 1105 PCGASMSetSubdomains - Sets the subdomains for this processor 1106 for the additive Schwarz preconditioner. 1107 1108 Collective on pc 1109 1110 Input Parameters: 1111 + pc - the preconditioner object 1112 . n - the number of subdomains for this processor 1113 . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains) 1114 - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap) 1115 1116 Notes: 1117 The IS indices use the parallel, global numbering of the vector entries. 1118 Inner subdomains are those where the correction is applied. 1119 Outer subdomains are those where the residual necessary to obtain the 1120 corrections is obtained (see PCGASMType for the use of inner/outer subdomains). 1121 Both inner and outer subdomains can extend over several processors. 1122 This processor's portion of a subdomain is known as a local subdomain. 1123 1124 Inner subdomains can not overlap with each other, do not have any entities from remote processors, 1125 and have to cover the entire local subdomain owned by the current processor. The index sets on each 1126 process should be ordered such that the ith local subdomain is connected to the ith remote subdomain 1127 on another MPI process. 1128 1129 By default the GASM preconditioner uses 1 (local) subdomain per processor. 1130 1131 1132 Level: advanced 1133 1134 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1135 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 1136 @*/ 1137 PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[]) 1138 { 1139 PC_GASM *osm = (PC_GASM*)pc->data; 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1144 ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr); 1145 osm->dm_subdomains = PETSC_FALSE; 1146 PetscFunctionReturn(0); 1147 } 1148 1149 /*@ 1150 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 1151 additive Schwarz preconditioner. Either all or no processors in the 1152 pc communicator must call this routine. 1153 1154 Logically Collective on pc 1155 1156 Input Parameters: 1157 + pc - the preconditioner context 1158 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0) 1159 1160 Options Database Key: 1161 . -pc_gasm_overlap <overlap> - Sets overlap 1162 1163 Notes: 1164 By default the GASM preconditioner uses 1 subdomain per processor. To use 1165 multiple subdomain per perocessor or "straddling" subdomains that intersect 1166 multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>). 1167 1168 The overlap defaults to 0, so if one desires that no additional 1169 overlap be computed beyond what may have been set with a call to 1170 PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does 1171 not explicitly set the subdomains in application code, then all overlap would be computed 1172 internally by PETSc, and using an overlap of 0 would result in an GASM 1173 variant that is equivalent to the block Jacobi preconditioner. 1174 1175 Note that one can define initial index sets with any overlap via 1176 PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows 1177 PETSc to extend that overlap further, if desired. 1178 1179 Level: intermediate 1180 1181 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1182 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 1183 @*/ 1184 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 1185 { 1186 PetscErrorCode ierr; 1187 PC_GASM *osm = (PC_GASM*)pc->data; 1188 1189 PetscFunctionBegin; 1190 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1191 PetscValidLogicalCollectiveInt(pc,ovl,2); 1192 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 1193 osm->dm_subdomains = PETSC_FALSE; 1194 PetscFunctionReturn(0); 1195 } 1196 1197 /*@ 1198 PCGASMSetType - Sets the type of restriction and interpolation used 1199 for local problems in the additive Schwarz method. 1200 1201 Logically Collective on PC 1202 1203 Input Parameters: 1204 + pc - the preconditioner context 1205 - type - variant of GASM, one of 1206 .vb 1207 PC_GASM_BASIC - full interpolation and restriction 1208 PC_GASM_RESTRICT - full restriction, local processor interpolation 1209 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 1210 PC_GASM_NONE - local processor restriction and interpolation 1211 .ve 1212 1213 Options Database Key: 1214 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1215 1216 Level: intermediate 1217 1218 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1219 PCGASMCreateSubdomains2D() 1220 @*/ 1221 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 1222 { 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1227 PetscValidLogicalCollectiveEnum(pc,type,2); 1228 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 /*@ 1233 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 1234 1235 Logically Collective on PC 1236 1237 Input Parameters: 1238 + pc - the preconditioner context 1239 - doSort - sort the subdomain indices 1240 1241 Level: intermediate 1242 1243 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1244 PCGASMCreateSubdomains2D() 1245 @*/ 1246 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 1247 { 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1252 PetscValidLogicalCollectiveBool(pc,doSort,2); 1253 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 /*@C 1258 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 1259 this processor. 1260 1261 Collective on PC iff first_local is requested 1262 1263 Input Parameter: 1264 . pc - the preconditioner context 1265 1266 Output Parameters: 1267 + n_local - the number of blocks on this processor or NULL 1268 . first_local - the global number of the first block on this processor or NULL, 1269 all processors must request or all must pass NULL 1270 - ksp - the array of KSP contexts 1271 1272 Note: 1273 After PCGASMGetSubKSP() the array of KSPes is not to be freed 1274 1275 Currently for some matrix implementations only 1 block per processor 1276 is supported. 1277 1278 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 1279 1280 Level: advanced 1281 1282 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(), 1283 PCGASMCreateSubdomains2D(), 1284 @*/ 1285 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 1286 { 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1291 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 /* -------------------------------------------------------------------------------------*/ 1296 /*MC 1297 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1298 its own KSP object. 1299 1300 Options Database Keys: 1301 + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors 1302 . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view 1303 . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp() 1304 . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains 1305 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1306 1307 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1308 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1309 -pc_gasm_type basic to use the standard GASM. 1310 1311 Notes: 1312 Blocks can be shared by multiple processes. 1313 1314 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1315 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1316 1317 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1318 and set the options directly on the resulting KSP object (you can access its PC 1319 with KSPGetPC()) 1320 1321 1322 Level: beginner 1323 1324 References: 1325 + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions 1326 Courant Institute, New York University Technical report 1327 - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1328 Cambridge University Press. 1329 1330 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1331 PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(), 1332 PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType() 1333 1334 M*/ 1335 1336 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc) 1337 { 1338 PetscErrorCode ierr; 1339 PC_GASM *osm; 1340 1341 PetscFunctionBegin; 1342 ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr); 1343 1344 osm->N = PETSC_DETERMINE; 1345 osm->n = PETSC_DECIDE; 1346 osm->nmax = PETSC_DETERMINE; 1347 osm->overlap = 0; 1348 osm->ksp = NULL; 1349 osm->gorestriction = NULL; 1350 osm->girestriction = NULL; 1351 osm->pctoouter = NULL; 1352 osm->gx = NULL; 1353 osm->gy = NULL; 1354 osm->x = NULL; 1355 osm->y = NULL; 1356 osm->pcx = NULL; 1357 osm->pcy = NULL; 1358 osm->permutationIS = NULL; 1359 osm->permutationP = NULL; 1360 osm->pcmat = NULL; 1361 osm->ois = NULL; 1362 osm->iis = NULL; 1363 osm->pmat = NULL; 1364 osm->type = PC_GASM_RESTRICT; 1365 osm->same_subdomain_solvers = PETSC_TRUE; 1366 osm->sort_indices = PETSC_TRUE; 1367 osm->dm_subdomains = PETSC_FALSE; 1368 osm->hierarchicalpartitioning = PETSC_FALSE; 1369 1370 pc->data = (void*)osm; 1371 pc->ops->apply = PCApply_GASM; 1372 pc->ops->matapply = PCMatApply_GASM; 1373 pc->ops->applytranspose = PCApplyTranspose_GASM; 1374 pc->ops->setup = PCSetUp_GASM; 1375 pc->ops->reset = PCReset_GASM; 1376 pc->ops->destroy = PCDestroy_GASM; 1377 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1378 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1379 pc->ops->view = PCView_GASM; 1380 pc->ops->applyrichardson = NULL; 1381 1382 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr); 1383 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1384 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr); 1385 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1386 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1387 PetscFunctionReturn(0); 1388 } 1389 1390 PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]) 1391 { 1392 MatPartitioning mpart; 1393 const char *prefix; 1394 PetscInt i,j,rstart,rend,bs; 1395 PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1396 Mat Ad = NULL, adj; 1397 IS ispart,isnumb,*is; 1398 PetscErrorCode ierr; 1399 1400 PetscFunctionBegin; 1401 if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc); 1402 1403 /* Get prefix, row distribution, and block size */ 1404 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1405 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1406 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1407 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1408 1409 /* Get diagonal block from matrix if possible */ 1410 ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1411 if (hasop) { 1412 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1413 } 1414 if (Ad) { 1415 ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1416 if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1417 } 1418 if (Ad && nloc > 1) { 1419 PetscBool match,done; 1420 /* Try to setup a good matrix partitioning if available */ 1421 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1422 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1423 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1424 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1425 if (!match) { 1426 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1427 } 1428 if (!match) { /* assume a "good" partitioner is available */ 1429 PetscInt na; 1430 const PetscInt *ia,*ja; 1431 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1432 if (done) { 1433 /* Build adjacency matrix by hand. Unfortunately a call to 1434 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1435 remove the block-aij structure and we cannot expect 1436 MatPartitioning to split vertices as we need */ 1437 PetscInt i,j,len,nnz,cnt,*iia=NULL,*jja=NULL; 1438 const PetscInt *row; 1439 nnz = 0; 1440 for (i=0; i<na; i++) { /* count number of nonzeros */ 1441 len = ia[i+1] - ia[i]; 1442 row = ja + ia[i]; 1443 for (j=0; j<len; j++) { 1444 if (row[j] == i) { /* don't count diagonal */ 1445 len--; break; 1446 } 1447 } 1448 nnz += len; 1449 } 1450 ierr = PetscMalloc1(na+1,&iia);CHKERRQ(ierr); 1451 ierr = PetscMalloc1(nnz,&jja);CHKERRQ(ierr); 1452 nnz = 0; 1453 iia[0] = 0; 1454 for (i=0; i<na; i++) { /* fill adjacency */ 1455 cnt = 0; 1456 len = ia[i+1] - ia[i]; 1457 row = ja + ia[i]; 1458 for (j=0; j<len; j++) { 1459 if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */ 1460 } 1461 nnz += cnt; 1462 iia[i+1] = nnz; 1463 } 1464 /* Partitioning of the adjacency matrix */ 1465 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr); 1466 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1467 ierr = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr); 1468 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1469 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1470 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1471 foundpart = PETSC_TRUE; 1472 } 1473 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1474 } 1475 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1476 } 1477 ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr); 1478 if (!foundpart) { 1479 1480 /* Partitioning by contiguous chunks of rows */ 1481 1482 PetscInt mbs = (rend-rstart)/bs; 1483 PetscInt start = rstart; 1484 for (i=0; i<nloc; i++) { 1485 PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs; 1486 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1487 start += count; 1488 } 1489 1490 } else { 1491 1492 /* Partitioning by adjacency of diagonal block */ 1493 1494 const PetscInt *numbering; 1495 PetscInt *count,nidx,*indices,*newidx,start=0; 1496 /* Get node count in each partition */ 1497 ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr); 1498 ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr); 1499 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1500 for (i=0; i<nloc; i++) count[i] *= bs; 1501 } 1502 /* Build indices from node numbering */ 1503 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1504 ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr); 1505 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1506 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1507 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1508 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1509 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1510 ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr); 1511 for (i=0; i<nidx; i++) { 1512 for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j; 1513 } 1514 ierr = PetscFree(indices);CHKERRQ(ierr); 1515 nidx *= bs; 1516 indices = newidx; 1517 } 1518 /* Shift to get global indices */ 1519 for (i=0; i<nidx; i++) indices[i] += rstart; 1520 1521 /* Build the index sets for each block */ 1522 for (i=0; i<nloc; i++) { 1523 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1524 ierr = ISSort(is[i]);CHKERRQ(ierr); 1525 start += count[i]; 1526 } 1527 1528 ierr = PetscFree(count);CHKERRQ(ierr); 1529 ierr = PetscFree(indices);CHKERRQ(ierr); 1530 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1531 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1532 } 1533 *iis = is; 1534 PetscFunctionReturn(0); 1535 } 1536 1537 PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[]) 1538 { 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 /*@C 1547 PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive 1548 Schwarz preconditioner for a any problem based on its matrix. 1549 1550 Collective 1551 1552 Input Parameters: 1553 + A - The global matrix operator 1554 - N - the number of global subdomains requested 1555 1556 Output Parameters: 1557 + n - the number of subdomains created on this processor 1558 - iis - the array of index sets defining the local inner subdomains (on which the correction is applied) 1559 1560 Level: advanced 1561 1562 Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor. 1563 When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local. 1564 The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping 1565 outer subdomains will be automatically generated from these according to the requested amount of 1566 overlap; this is currently supported only with local subdomains. 1567 1568 1569 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains() 1570 @*/ 1571 PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[]) 1572 { 1573 PetscMPIInt size; 1574 PetscErrorCode ierr; 1575 1576 PetscFunctionBegin; 1577 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1578 PetscValidPointer(iis,4); 1579 1580 if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N); 1581 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1582 if (N >= size) { 1583 *n = N/size + (N%size); 1584 ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr); 1585 } else { 1586 ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 /*@C 1592 PCGASMDestroySubdomains - Destroys the index sets created with 1593 PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be 1594 called after setting subdomains with PCGASMSetSubdomains(). 1595 1596 Collective 1597 1598 Input Parameters: 1599 + n - the number of index sets 1600 . iis - the array of inner subdomains, 1601 - ois - the array of outer subdomains, can be NULL 1602 1603 Level: intermediate 1604 1605 Notes: 1606 this is merely a convenience subroutine that walks each list, 1607 destroys each IS on the list, and then frees the list. At the end the 1608 list pointers are set to NULL. 1609 1610 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains() 1611 @*/ 1612 PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois) 1613 { 1614 PetscInt i; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 if (n <= 0) PetscFunctionReturn(0); 1619 if (ois) { 1620 PetscValidPointer(ois,3); 1621 if (*ois) { 1622 PetscValidPointer(*ois,3); 1623 for (i=0; i<n; i++) { 1624 ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr); 1625 } 1626 ierr = PetscFree((*ois));CHKERRQ(ierr); 1627 } 1628 } 1629 if (iis) { 1630 PetscValidPointer(iis,2); 1631 if (*iis) { 1632 PetscValidPointer(*iis,2); 1633 for (i=0; i<n; i++) { 1634 ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr); 1635 } 1636 ierr = PetscFree((*iis));CHKERRQ(ierr); 1637 } 1638 } 1639 PetscFunctionReturn(0); 1640 } 1641 1642 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1643 { \ 1644 PetscInt first_row = first/M, last_row = last/M+1; \ 1645 /* \ 1646 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1647 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1648 subdomain). \ 1649 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1650 of the intersection. \ 1651 */ \ 1652 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1653 *ylow_loc = PetscMax(first_row,ylow); \ 1654 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1655 *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \ 1656 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1657 *yhigh_loc = PetscMin(last_row,yhigh); \ 1658 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1659 *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \ 1660 /* Now compute the size of the local subdomain n. */ \ 1661 *n = 0; \ 1662 if (*ylow_loc < *yhigh_loc) { \ 1663 PetscInt width = xright-xleft; \ 1664 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1665 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1666 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1667 } \ 1668 } 1669 1670 /*@ 1671 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1672 preconditioner for a two-dimensional problem on a regular grid. 1673 1674 Collective 1675 1676 Input Parameters: 1677 + M, N - the global number of grid points in the x and y directions 1678 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1679 . dof - degrees of freedom per node 1680 - overlap - overlap in mesh lines 1681 1682 Output Parameters: 1683 + Nsub - the number of local subdomains created 1684 . iis - array of index sets defining inner (nonoverlapping) subdomains 1685 - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains 1686 1687 1688 Level: advanced 1689 1690 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap() 1691 @*/ 1692 PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois) 1693 { 1694 PetscErrorCode ierr; 1695 PetscMPIInt size, rank; 1696 PetscInt i, j; 1697 PetscInt maxheight, maxwidth; 1698 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1699 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1700 PetscInt x[2][2], y[2][2], n[2]; 1701 PetscInt first, last; 1702 PetscInt nidx, *idx; 1703 PetscInt ii,jj,s,q,d; 1704 PetscInt k,kk; 1705 PetscMPIInt color; 1706 MPI_Comm comm, subcomm; 1707 IS **xis = NULL, **is = ois, **is_local = iis; 1708 1709 PetscFunctionBegin; 1710 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1711 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1712 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1713 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1714 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1715 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1716 1717 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1718 s = 0; 1719 ystart = 0; 1720 for (j=0; j<Ndomains; ++j) { 1721 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1722 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1723 /* Vertical domain limits with an overlap. */ 1724 ylow = PetscMax(ystart - overlap,0); 1725 yhigh = PetscMin(ystart + maxheight + overlap,N); 1726 xstart = 0; 1727 for (i=0; i<Mdomains; ++i) { 1728 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1729 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1730 /* Horizontal domain limits with an overlap. */ 1731 xleft = PetscMax(xstart - overlap,0); 1732 xright = PetscMin(xstart + maxwidth + overlap,M); 1733 /* 1734 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1735 */ 1736 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1737 if (nidx) ++s; 1738 xstart += maxwidth; 1739 } /* for (i = 0; i < Mdomains; ++i) */ 1740 ystart += maxheight; 1741 } /* for (j = 0; j < Ndomains; ++j) */ 1742 1743 /* Now we can allocate the necessary number of ISs. */ 1744 *nsub = s; 1745 ierr = PetscMalloc1(*nsub,is);CHKERRQ(ierr); 1746 ierr = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr); 1747 s = 0; 1748 ystart = 0; 1749 for (j=0; j<Ndomains; ++j) { 1750 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1751 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1752 /* Vertical domain limits with an overlap. */ 1753 y[0][0] = PetscMax(ystart - overlap,0); 1754 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1755 /* Vertical domain limits without an overlap. */ 1756 y[1][0] = ystart; 1757 y[1][1] = PetscMin(ystart + maxheight,N); 1758 xstart = 0; 1759 for (i=0; i<Mdomains; ++i) { 1760 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1761 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1762 /* Horizontal domain limits with an overlap. */ 1763 x[0][0] = PetscMax(xstart - overlap,0); 1764 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1765 /* Horizontal domain limits without an overlap. */ 1766 x[1][0] = xstart; 1767 x[1][1] = PetscMin(xstart+maxwidth,M); 1768 /* 1769 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1770 Do this twice: first for the domains with overlaps, and once without. 1771 During the first pass create the subcommunicators, and use them on the second pass as well. 1772 */ 1773 for (q = 0; q < 2; ++q) { 1774 PetscBool split = PETSC_FALSE; 1775 /* 1776 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1777 according to whether the domain with an overlap or without is considered. 1778 */ 1779 xleft = x[q][0]; xright = x[q][1]; 1780 ylow = y[q][0]; yhigh = y[q][1]; 1781 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1782 nidx *= dof; 1783 n[q] = nidx; 1784 /* 1785 Based on the counted number of indices in the local domain *with an overlap*, 1786 construct a subcommunicator of all the processors supporting this domain. 1787 Observe that a domain with an overlap might have nontrivial local support, 1788 while the domain without an overlap might not. Hence, the decision to participate 1789 in the subcommunicator must be based on the domain with an overlap. 1790 */ 1791 if (q == 0) { 1792 if (nidx) color = 1; 1793 else color = MPI_UNDEFINED; 1794 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr); 1795 split = PETSC_TRUE; 1796 } 1797 /* 1798 Proceed only if the number of local indices *with an overlap* is nonzero. 1799 */ 1800 if (n[0]) { 1801 if (q == 0) xis = is; 1802 if (q == 1) { 1803 /* 1804 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1805 Moreover, if the overlap is zero, the two ISs are identical. 1806 */ 1807 if (overlap == 0) { 1808 (*is_local)[s] = (*is)[s]; 1809 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1810 continue; 1811 } else { 1812 xis = is_local; 1813 subcomm = ((PetscObject)(*is)[s])->comm; 1814 } 1815 } /* if (q == 1) */ 1816 idx = NULL; 1817 ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr); 1818 if (nidx) { 1819 k = 0; 1820 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1821 PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft; 1822 PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright; 1823 kk = dof*(M*jj + x0); 1824 for (ii=x0; ii<x1; ++ii) { 1825 for (d = 0; d < dof; ++d) { 1826 idx[k++] = kk++; 1827 } 1828 } 1829 } 1830 } 1831 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr); 1832 if (split) { 1833 ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr); 1834 } 1835 }/* if (n[0]) */ 1836 }/* for (q = 0; q < 2; ++q) */ 1837 if (n[0]) ++s; 1838 xstart += maxwidth; 1839 } /* for (i = 0; i < Mdomains; ++i) */ 1840 ystart += maxheight; 1841 } /* for (j = 0; j < Ndomains; ++j) */ 1842 PetscFunctionReturn(0); 1843 } 1844 1845 /*@C 1846 PCGASMGetSubdomains - Gets the subdomains supported on this processor 1847 for the additive Schwarz preconditioner. 1848 1849 Not Collective 1850 1851 Input Parameter: 1852 . pc - the preconditioner context 1853 1854 Output Parameters: 1855 + n - the number of subdomains for this processor (default value = 1) 1856 . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL) 1857 - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL) 1858 1859 1860 Notes: 1861 The user is responsible for destroying the ISs and freeing the returned arrays. 1862 The IS numbering is in the parallel, global numbering of the vector. 1863 1864 Level: advanced 1865 1866 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(), 1867 PCGASMSetSubdomains(), PCGASMGetSubmatrices() 1868 @*/ 1869 PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[]) 1870 { 1871 PC_GASM *osm; 1872 PetscErrorCode ierr; 1873 PetscBool match; 1874 PetscInt i; 1875 1876 PetscFunctionBegin; 1877 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1878 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1879 if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1880 osm = (PC_GASM*)pc->data; 1881 if (n) *n = osm->n; 1882 if (iis) { 1883 ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr); 1884 } 1885 if (ois) { 1886 ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr); 1887 } 1888 if (iis || ois) { 1889 for (i = 0; i < osm->n; ++i) { 1890 if (iis) (*iis)[i] = osm->iis[i]; 1891 if (ois) (*ois)[i] = osm->ois[i]; 1892 } 1893 } 1894 PetscFunctionReturn(0); 1895 } 1896 1897 /*@C 1898 PCGASMGetSubmatrices - Gets the local submatrices (for this processor 1899 only) for the additive Schwarz preconditioner. 1900 1901 Not Collective 1902 1903 Input Parameter: 1904 . pc - the preconditioner context 1905 1906 Output Parameters: 1907 + n - the number of matrices for this processor (default value = 1) 1908 - mat - the matrices 1909 1910 Notes: 1911 matrices returned by this routine have the same communicators as the index sets (IS) 1912 used to define subdomains in PCGASMSetSubdomains() 1913 Level: advanced 1914 1915 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), 1916 PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains() 1917 @*/ 1918 PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1919 { 1920 PC_GASM *osm; 1921 PetscErrorCode ierr; 1922 PetscBool match; 1923 1924 PetscFunctionBegin; 1925 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1926 PetscValidIntPointer(n,2); 1927 if (mat) PetscValidPointer(mat,3); 1928 if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp()."); 1929 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1930 if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1931 osm = (PC_GASM*)pc->data; 1932 if (n) *n = osm->n; 1933 if (mat) *mat = osm->pmat; 1934 PetscFunctionReturn(0); 1935 } 1936 1937 /*@ 1938 PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1939 Logically Collective 1940 1941 Input Parameter: 1942 + pc - the preconditioner 1943 - flg - boolean indicating whether to use subdomains defined by the DM 1944 1945 Options Database Key: 1946 . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains 1947 1948 Level: intermediate 1949 1950 Notes: 1951 PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(), 1952 so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap() 1953 automatically turns the latter off. 1954 1955 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap() 1956 PCGASMCreateSubdomains2D() 1957 @*/ 1958 PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg) 1959 { 1960 PC_GASM *osm = (PC_GASM*)pc->data; 1961 PetscErrorCode ierr; 1962 PetscBool match; 1963 1964 PetscFunctionBegin; 1965 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1966 PetscValidLogicalCollectiveBool(pc,flg,2); 1967 if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC."); 1968 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1969 if (match) { 1970 if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) { 1971 osm->dm_subdomains = flg; 1972 } 1973 } 1974 PetscFunctionReturn(0); 1975 } 1976 1977 /*@ 1978 PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1979 Not Collective 1980 1981 Input Parameter: 1982 . pc - the preconditioner 1983 1984 Output Parameter: 1985 . flg - boolean indicating whether to use subdomains defined by the DM 1986 1987 Level: intermediate 1988 1989 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap() 1990 PCGASMCreateSubdomains2D() 1991 @*/ 1992 PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg) 1993 { 1994 PC_GASM *osm = (PC_GASM*)pc->data; 1995 PetscErrorCode ierr; 1996 PetscBool match; 1997 1998 PetscFunctionBegin; 1999 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 2000 PetscValidBoolPointer(flg,2); 2001 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 2002 if (match) { 2003 if (flg) *flg = osm->dm_subdomains; 2004 } 2005 PetscFunctionReturn(0); 2006 } 2007