1 /* 2 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 3 In this version each processor may intersect multiple subdomains and any subdomain may 4 intersect multiple processors. Intersections of subdomains with processors are called *local 5 subdomains*. 6 7 N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM()) 8 n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains()) 9 nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM()) 10 */ 11 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 12 #include <petscdm.h> 13 14 typedef struct { 15 PetscInt N,n,nmax; 16 PetscInt overlap; /* overlap requested by user */ 17 PCGASMType type; /* use reduced interpolation, restriction or both */ 18 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 19 PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */ 20 PetscBool sort_indices; /* flag to sort subdomain indices */ 21 PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */ 22 PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */ 23 PetscBool hierarchicalpartitioning; 24 IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */ 25 IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */ 26 KSP *ksp; /* linear solvers for each subdomain */ 27 Mat *pmat; /* subdomain block matrices */ 28 Vec gx,gy; /* Merged work vectors */ 29 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 30 VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */ 31 VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */ 32 VecScatter pctoouter; 33 IS permutationIS; 34 Mat permutationP; 35 Mat pcmat; 36 Vec pcx,pcy; 37 } PC_GASM; 38 39 static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation) 40 { 41 PC_GASM *osm = (PC_GASM*)pc->data; 42 PetscInt i; 43 PetscErrorCode ierr; 44 45 PetscFunctionBegin; 46 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */ 47 ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr); 48 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr); 49 for (i = 0; i < osm->n; ++i) (*permutation)[i] = i; 50 ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer) 55 { 56 PC_GASM *osm = (PC_GASM*)pc->data; 57 PetscInt j,nidx; 58 const PetscInt *idx; 59 PetscViewer sviewer; 60 char *cidx; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 PetscCheckFalse(i < 0 || i > osm->n,PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n); 65 /* Inner subdomains. */ 66 ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr); 67 /* 68 No more than 15 characters per index plus a space. 69 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 70 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 71 For nidx == 0, the whole string 16 '\0'. 72 */ 73 #define len 16*(nidx+1)+1 74 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr); 75 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr); 76 #undef len 77 ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr); 78 for (j = 0; j < nidx; ++j) { 79 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 80 } 81 ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr); 82 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 83 ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr); 84 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 85 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 86 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 87 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 88 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 89 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 90 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 91 ierr = PetscFree(cidx);CHKERRQ(ierr); 92 /* Outer subdomains. */ 93 ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr); 94 /* 95 No more than 15 characters per index plus a space. 96 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 97 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 98 For nidx == 0, the whole string 16 '\0'. 99 */ 100 #define len 16*(nidx+1)+1 101 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr); 102 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr); 103 #undef len 104 ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr); 105 for (j = 0; j < nidx; ++j) { 106 ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr); 107 } 108 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 109 ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr); 110 ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr); 111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 112 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 113 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 114 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 115 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 116 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 117 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 118 ierr = PetscFree(cidx);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 123 { 124 PC_GASM *osm = (PC_GASM*)pc->data; 125 const char *prefix; 126 char fname[PETSC_MAX_PATH_LEN+1]; 127 PetscInt l, d, count; 128 PetscBool found; 129 PetscViewer viewer, sviewer = NULL; 130 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */ 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 135 ierr = PetscOptionsHasName(NULL,prefix,"-pc_gasm_print_subdomains",&found);CHKERRQ(ierr); 136 if (!found) PetscFunctionReturn(0); 137 ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr); 138 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 139 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr); 140 /* 141 Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer: 142 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm. 143 */ 144 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr); 145 l = 0; 146 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr); 147 for (count = 0; count < osm->N; ++count) { 148 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 149 if (l<osm->n) { 150 d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 151 if (numbering[d] == count) { 152 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 153 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr); 154 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 155 ++l; 156 } 157 } 158 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 159 } 160 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr); 161 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 166 { 167 PC_GASM *osm = (PC_GASM*)pc->data; 168 const char *prefix; 169 PetscErrorCode ierr; 170 PetscMPIInt rank, size; 171 PetscInt bsz; 172 PetscBool iascii,view_subdomains=PETSC_FALSE; 173 PetscViewer sviewer; 174 PetscInt count, l; 175 char overlap[256] = "user-defined overlap"; 176 char gsubdomains[256] = "unknown total number of subdomains"; 177 char msubdomains[256] = "unknown max number of local subdomains"; 178 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */ 179 180 PetscFunctionBegin; 181 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRMPI(ierr); 182 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRMPI(ierr); 183 184 if (osm->overlap >= 0) { 185 ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr); 186 } 187 if (osm->N != PETSC_DETERMINE) { 188 ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr); 189 } 190 if (osm->nmax != PETSC_DETERMINE) { 191 ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr); 192 } 193 194 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 195 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr); 196 197 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 198 if (iascii) { 199 /* 200 Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer: 201 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed 202 collectively on the comm. 203 */ 204 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr); 205 ierr = PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 206 ierr = PetscViewerASCIIPrintf(viewer," %s\n",overlap);CHKERRQ(ierr); 207 ierr = PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);CHKERRQ(ierr); 208 ierr = PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);CHKERRQ(ierr); 209 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 210 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr); 211 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 212 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 213 /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */ 214 ierr = PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");CHKERRQ(ierr); 215 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 216 ierr = PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 217 /* Make sure that everybody waits for the banner to be printed. */ 218 ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 219 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 220 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr); 221 l = 0; 222 for (count = 0; count < osm->N; ++count) { 223 PetscMPIInt srank, ssize; 224 if (l<osm->n) { 225 PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 226 if (numbering[d] == count) { 227 ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRMPI(ierr); 228 ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRMPI(ierr); 229 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 230 ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr); 231 ierr = PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr); 232 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 233 if (view_subdomains) { 234 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr); 235 } 236 if (!pc->setupcalled) { 237 ierr = PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr); 238 } else { 239 ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr); 240 } 241 ierr = PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 242 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 243 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 244 ++l; 245 } 246 } 247 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 248 } 249 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr); 250 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 251 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 252 /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 253 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 254 } 255 PetscFunctionReturn(0); 256 } 257 258 PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]); 259 260 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc) 261 { 262 PC_GASM *osm = (PC_GASM*)pc->data; 263 MatPartitioning part; 264 MPI_Comm comm; 265 PetscMPIInt size; 266 PetscInt nlocalsubdomains,fromrows_localsize; 267 IS partitioning,fromrows,isn; 268 Vec outervec; 269 PetscErrorCode ierr; 270 271 PetscFunctionBegin; 272 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 273 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 274 /* we do not need a hierarchical partitioning when 275 * the total number of subdomains is consistent with 276 * the number of MPI tasks. 277 * For the following cases, we do not need to use HP 278 * */ 279 if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0); 280 PetscCheckFalse(size%osm->N != 0,PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d ",osm->N,size); 281 nlocalsubdomains = size/osm->N; 282 osm->n = 1; 283 ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr); 284 ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr); 285 ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr); 286 ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr); 287 ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr); 288 ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr); 289 /* get new processor owner number of each vertex */ 290 ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr); 291 ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr); 292 ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr); 293 ierr = ISDestroy(&isn);CHKERRQ(ierr); 294 ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr); 295 ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr); 296 ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr); 297 ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr); 298 ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr); 299 ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr); 300 ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr); 301 ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr); 302 osm->permutationIS = fromrows; 303 osm->pcmat = pc->pmat; 304 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr); 305 pc->pmat = osm->permutationP; 306 ierr = VecDestroy(&outervec);CHKERRQ(ierr); 307 ierr = ISDestroy(&fromrows);CHKERRQ(ierr); 308 ierr = ISDestroy(&partitioning);CHKERRQ(ierr); 309 osm->n = PETSC_DETERMINE; 310 PetscFunctionReturn(0); 311 } 312 313 static PetscErrorCode PCSetUp_GASM(PC pc) 314 { 315 PC_GASM *osm = (PC_GASM*)pc->data; 316 PetscErrorCode ierr; 317 PetscInt i,nInnerIndices,nTotalInnerIndices; 318 PetscMPIInt rank, size; 319 MatReuse scall = MAT_REUSE_MATRIX; 320 KSP ksp; 321 PC subpc; 322 const char *prefix,*pprefix; 323 Vec x,y; 324 PetscInt oni; /* Number of indices in the i-th local outer subdomain. */ 325 const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */ 326 PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */ 327 PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */ 328 IS gois; /* Disjoint union the global indices of outer subdomains. */ 329 IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */ 330 PetscScalar *gxarray, *gyarray; 331 PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */ 332 PetscInt num_subdomains = 0; 333 DM *subdomain_dm = NULL; 334 char **subdomain_names = NULL; 335 PetscInt *numbering; 336 337 PetscFunctionBegin; 338 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 339 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 340 if (!pc->setupcalled) { 341 /* use a hierarchical partitioning */ 342 if (osm->hierarchicalpartitioning) { 343 ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr); 344 } 345 if (osm->n == PETSC_DETERMINE) { 346 if (osm->N != PETSC_DETERMINE) { 347 /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */ 348 ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr); 349 } else if (osm->dm_subdomains && pc->dm) { 350 /* try pc->dm next, if allowed */ 351 PetscInt d; 352 IS *inner_subdomain_is, *outer_subdomain_is; 353 ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr); 354 if (num_subdomains) { 355 ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr); 356 } 357 for (d = 0; d < num_subdomains; ++d) { 358 if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);} 359 if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);} 360 } 361 ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr); 362 ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr); 363 } else { 364 /* still no subdomains; use one per processor */ 365 osm->nmax = osm->n = 1; 366 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 367 osm->N = size; 368 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr); 369 } 370 } 371 if (!osm->iis) { 372 /* 373 osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied. 374 We create the requisite number of local inner subdomains and then expand them into 375 out subdomains, if necessary. 376 */ 377 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr); 378 } 379 if (!osm->ois) { 380 /* 381 Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap 382 has been requested, copy the inner subdomains over so they can be modified. 383 */ 384 ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr); 385 for (i=0; i<osm->n; ++i) { 386 if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */ 387 ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr); 388 ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr); 389 } else { 390 ierr = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr); 391 osm->ois[i] = osm->iis[i]; 392 } 393 } 394 if (osm->overlap>0 && osm->N>1) { 395 /* Extend the "overlapping" regions by a number of steps */ 396 ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr); 397 } 398 } 399 400 /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */ 401 if (osm->nmax == PETSC_DETERMINE) { 402 PetscMPIInt inwork,outwork; 403 /* determine global number of subdomains and the max number of local subdomains */ 404 inwork = osm->n; 405 ierr = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 406 osm->nmax = outwork; 407 } 408 if (osm->N == PETSC_DETERMINE) { 409 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */ 410 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr); 411 } 412 413 if (osm->sort_indices) { 414 for (i=0; i<osm->n; i++) { 415 ierr = ISSort(osm->ois[i]);CHKERRQ(ierr); 416 ierr = ISSort(osm->iis[i]);CHKERRQ(ierr); 417 } 418 } 419 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 420 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); 421 422 /* 423 Merge the ISs, create merged vectors and restrictions. 424 */ 425 /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */ 426 on = 0; 427 for (i=0; i<osm->n; i++) { 428 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 429 on += oni; 430 } 431 ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr); 432 on = 0; 433 /* Merge local indices together */ 434 for (i=0; i<osm->n; i++) { 435 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 436 ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 437 ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr); 438 ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 439 on += oni; 440 } 441 ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr); 442 nTotalInnerIndices = 0; 443 for (i=0; i<osm->n; i++) { 444 ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr); 445 nTotalInnerIndices += nInnerIndices; 446 } 447 ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr); 448 ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 449 450 ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr); 451 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 452 ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr); 453 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr); 454 /* gois might indices not on local */ 455 ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr); 456 ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr); 457 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr); 458 ierr = VecDestroy(&x);CHKERRQ(ierr); 459 ierr = ISDestroy(&gois);CHKERRQ(ierr); 460 461 /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */ 462 { 463 PetscInt ini; /* Number of indices the i-th a local inner subdomain. */ 464 PetscInt in; /* Number of indices in the disjoint union of local inner subdomains. */ 465 PetscInt *iidx; /* Global indices in the merged local inner subdomain. */ 466 PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 467 IS giis; /* IS for the disjoint union of inner subdomains. */ 468 IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 469 PetscScalar *array; 470 const PetscInt *indices; 471 PetscInt k; 472 on = 0; 473 for (i=0; i<osm->n; i++) { 474 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 475 on += oni; 476 } 477 ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr); 478 ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr); 479 ierr = VecGetArray(y,&array);CHKERRQ(ierr); 480 /* set communicator id to determine where overlap is */ 481 in = 0; 482 for (i=0; i<osm->n; i++) { 483 ierr = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr); 484 for (k = 0; k < ini; ++k) { 485 array[in+k] = numbering[i]; 486 } 487 in += ini; 488 } 489 ierr = VecRestoreArray(y,&array);CHKERRQ(ierr); 490 ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr); 493 ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr); 494 on = 0; 495 in = 0; 496 for (i=0; i<osm->n; i++) { 497 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 498 ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr); 499 for (k=0; k<oni; k++) { 500 /* skip overlapping indices to get inner domain */ 501 if (PetscRealPart(array[on+k]) != numbering[i]) continue; 502 iidx[in] = indices[k]; 503 ioidx[in++] = gostart+on+k; 504 } 505 ierr = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr); 506 on += oni; 507 } 508 ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr); 509 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr); 510 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr); 511 ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr); 512 ierr = VecDestroy(&y);CHKERRQ(ierr); 513 ierr = ISDestroy(&giis);CHKERRQ(ierr); 514 ierr = ISDestroy(&giois);CHKERRQ(ierr); 515 } 516 ierr = ISDestroy(&goid);CHKERRQ(ierr); 517 ierr = PetscFree(numbering);CHKERRQ(ierr); 518 519 /* Create the subdomain work vectors. */ 520 ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr); 521 ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr); 522 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 523 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 524 for (i=0, on=0; i<osm->n; ++i, on += oni) { 525 PetscInt oNi; 526 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 527 /* on a sub communicator */ 528 ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr); 529 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr); 530 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr); 531 } 532 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 533 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 534 /* Create the subdomain solvers */ 535 ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr); 536 for (i=0; i<osm->n; i++) { 537 char subprefix[PETSC_MAX_PATH_LEN+1]; 538 ierr = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr); 539 ierr = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr); 540 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr); 541 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 542 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 543 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */ 544 if (subdomain_dm) { 545 ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr); 546 ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr); 547 } 548 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 549 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 550 if (subdomain_names && subdomain_names[i]) { 551 ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr); 552 ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr); 553 ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr); 554 } 555 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 556 osm->ksp[i] = ksp; 557 } 558 ierr = PetscFree(subdomain_dm);CHKERRQ(ierr); 559 ierr = PetscFree(subdomain_names);CHKERRQ(ierr); 560 scall = MAT_INITIAL_MATRIX; 561 } else { /* if (pc->setupcalled) */ 562 /* 563 Destroy the submatrices from the previous iteration 564 */ 565 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 566 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 567 scall = MAT_INITIAL_MATRIX; 568 } 569 if (osm->permutationIS) { 570 ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr); 571 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr); 572 osm->pcmat = pc->pmat; 573 pc->pmat = osm->permutationP; 574 } 575 } 576 577 /* 578 Extract the submatrices. 579 */ 580 if (size > 1) { 581 ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 582 } else { 583 ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 584 } 585 if (scall == MAT_INITIAL_MATRIX) { 586 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 587 for (i=0; i<osm->n; i++) { 588 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr); 589 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 590 } 591 } 592 593 /* Return control to the user so that the submatrices can be modified (e.g., to apply 594 different boundary conditions for the submatrices than for the global problem) */ 595 ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 596 597 /* 598 Loop over submatrices putting them into local ksps 599 */ 600 for (i=0; i<osm->n; i++) { 601 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr); 602 ierr = KSPGetOptionsPrefix(osm->ksp[i],&prefix);CHKERRQ(ierr); 603 ierr = MatSetOptionsPrefix(osm->pmat[i],prefix);CHKERRQ(ierr); 604 if (!pc->setupcalled) { 605 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 606 } 607 } 608 if (osm->pcmat) { 609 ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr); 610 pc->pmat = osm->pcmat; 611 osm->pcmat = NULL; 612 } 613 PetscFunctionReturn(0); 614 } 615 616 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 617 { 618 PC_GASM *osm = (PC_GASM*)pc->data; 619 PetscErrorCode ierr; 620 PetscInt i; 621 622 PetscFunctionBegin; 623 for (i=0; i<osm->n; i++) { 624 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 625 } 626 PetscFunctionReturn(0); 627 } 628 629 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout) 630 { 631 PC_GASM *osm = (PC_GASM*)pc->data; 632 PetscErrorCode ierr; 633 PetscInt i; 634 Vec x,y; 635 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 636 637 PetscFunctionBegin; 638 if (osm->pctoouter) { 639 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 640 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 641 x = osm->pcx; 642 y = osm->pcy; 643 } else { 644 x = xin; 645 y = yout; 646 } 647 /* 648 support for limiting the restriction or interpolation only to the inner 649 subdomain values (leaving the other values 0). 650 */ 651 if (!(osm->type & PC_GASM_RESTRICT)) { 652 /* have to zero the work RHS since scatter may leave some slots empty */ 653 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 654 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 655 } else { 656 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 657 } 658 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 659 if (!(osm->type & PC_GASM_RESTRICT)) { 660 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 661 } else { 662 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 663 } 664 /* do the subdomain solves */ 665 for (i=0; i<osm->n; ++i) { 666 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 667 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr); 668 } 669 /* do we need to zero y? */ 670 ierr = VecZeroEntries(y);CHKERRQ(ierr); 671 if (!(osm->type & PC_GASM_INTERPOLATE)) { 672 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 673 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 674 } else { 675 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 676 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 677 } 678 if (osm->pctoouter) { 679 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 680 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 681 } 682 PetscFunctionReturn(0); 683 } 684 685 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout) 686 { 687 PC_GASM *osm = (PC_GASM*)pc->data; 688 Mat X,Y,O=NULL,Z,W; 689 Vec x,y; 690 PetscInt i,m,M,N; 691 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 692 PetscErrorCode ierr; 693 694 PetscFunctionBegin; 695 PetscCheckFalse(osm->n != 1,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 696 ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr); 697 if (osm->pctoouter) { 698 ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr); 699 ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr); 700 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr); 701 for (i = 0; i < N; ++i) { 702 ierr = MatDenseGetColumnVecRead(Xin,i,&x);CHKERRQ(ierr); 703 ierr = MatDenseGetColumnVecWrite(O,i,&y);CHKERRQ(ierr); 704 ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 705 ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 706 ierr = MatDenseRestoreColumnVecWrite(O,i,&y);CHKERRQ(ierr); 707 ierr = MatDenseRestoreColumnVecRead(Xin,i,&x);CHKERRQ(ierr); 708 } 709 X = Y = O; 710 } else { 711 X = Xin; 712 Y = Yout; 713 } 714 /* 715 support for limiting the restriction or interpolation only to the inner 716 subdomain values (leaving the other values 0). 717 */ 718 ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr); 719 ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr); 720 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr); 721 for (i = 0; i < N; ++i) { 722 ierr = MatDenseGetColumnVecRead(X,i,&x);CHKERRQ(ierr); 723 ierr = MatDenseGetColumnVecWrite(Z,i,&y);CHKERRQ(ierr); 724 if (!(osm->type & PC_GASM_RESTRICT)) { 725 /* have to zero the work RHS since scatter may leave some slots empty */ 726 ierr = VecZeroEntries(y);CHKERRQ(ierr); 727 ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 728 ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 729 } else { 730 ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 731 ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 732 } 733 ierr = MatDenseRestoreColumnVecWrite(Z,i,&y);CHKERRQ(ierr); 734 ierr = MatDenseRestoreColumnVecRead(X,i,&x);CHKERRQ(ierr); 735 } 736 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr); 737 ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 740 /* do the subdomain solve */ 741 ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr); 742 ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr); 743 ierr = MatDestroy(&Z);CHKERRQ(ierr); 744 /* do we need to zero y? */ 745 ierr = MatZeroEntries(Y);CHKERRQ(ierr); 746 for (i = 0; i < N; ++i) { 747 ierr = MatDenseGetColumnVecWrite(Y,i,&y);CHKERRQ(ierr); 748 ierr = MatDenseGetColumnVecRead(W,i,&x);CHKERRQ(ierr); 749 if (!(osm->type & PC_GASM_INTERPOLATE)) { 750 ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 751 ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 752 } else { 753 ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 754 ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 755 } 756 ierr = MatDenseRestoreColumnVecRead(W,i,&x);CHKERRQ(ierr); 757 if (osm->pctoouter) { 758 ierr = MatDenseGetColumnVecWrite(Yout,i,&x);CHKERRQ(ierr); 759 ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 760 ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 761 ierr = MatDenseRestoreColumnVecRead(Yout,i,&x);CHKERRQ(ierr); 762 } 763 ierr = MatDenseRestoreColumnVecWrite(Y,i,&y);CHKERRQ(ierr); 764 } 765 ierr = MatDestroy(&W);CHKERRQ(ierr); 766 ierr = MatDestroy(&O);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout) 771 { 772 PC_GASM *osm = (PC_GASM*)pc->data; 773 PetscErrorCode ierr; 774 PetscInt i; 775 Vec x,y; 776 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 777 778 PetscFunctionBegin; 779 if (osm->pctoouter) { 780 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 781 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 782 x = osm->pcx; 783 y = osm->pcy; 784 }else{ 785 x = xin; 786 y = yout; 787 } 788 /* 789 Support for limiting the restriction or interpolation to only local 790 subdomain values (leaving the other values 0). 791 792 Note: these are reversed from the PCApply_GASM() because we are applying the 793 transpose of the three terms 794 */ 795 if (!(osm->type & PC_GASM_INTERPOLATE)) { 796 /* have to zero the work RHS since scatter may leave some slots empty */ 797 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 798 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 799 } else { 800 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 801 } 802 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 803 if (!(osm->type & PC_GASM_INTERPOLATE)) { 804 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 805 } else { 806 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 807 } 808 /* do the local solves */ 809 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 810 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 811 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr); 812 } 813 ierr = VecZeroEntries(y);CHKERRQ(ierr); 814 if (!(osm->type & PC_GASM_RESTRICT)) { 815 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 816 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 817 } else { 818 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 819 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 820 } 821 if (osm->pctoouter) { 822 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 } 825 PetscFunctionReturn(0); 826 } 827 828 static PetscErrorCode PCReset_GASM(PC pc) 829 { 830 PC_GASM *osm = (PC_GASM*)pc->data; 831 PetscErrorCode ierr; 832 PetscInt i; 833 834 PetscFunctionBegin; 835 if (osm->ksp) { 836 for (i=0; i<osm->n; i++) { 837 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 838 } 839 } 840 if (osm->pmat) { 841 if (osm->n > 0) { 842 PetscMPIInt size; 843 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 844 if (size > 1) { 845 /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */ 846 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 847 } else { 848 ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 849 } 850 } 851 } 852 if (osm->x) { 853 for (i=0; i<osm->n; i++) { 854 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 855 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 856 } 857 } 858 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 859 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 860 861 ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr); 862 ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr); 863 if (!osm->user_subdomains) { 864 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr); 865 osm->N = PETSC_DETERMINE; 866 osm->nmax = PETSC_DETERMINE; 867 } 868 if (osm->pctoouter) { 869 ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr); 870 } 871 if (osm->permutationIS) { 872 ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr); 873 } 874 if (osm->pcx) { 875 ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr); 876 } 877 if (osm->pcy) { 878 ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr); 879 } 880 if (osm->permutationP) { 881 ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr); 882 } 883 if (osm->pcmat) { 884 ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 static PetscErrorCode PCDestroy_GASM(PC pc) 890 { 891 PC_GASM *osm = (PC_GASM*)pc->data; 892 PetscErrorCode ierr; 893 PetscInt i; 894 895 PetscFunctionBegin; 896 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 897 /* PCReset will not destroy subdomains, if user_subdomains is true. */ 898 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr); 899 if (osm->ksp) { 900 for (i=0; i<osm->n; i++) { 901 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 902 } 903 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 904 } 905 ierr = PetscFree(osm->x);CHKERRQ(ierr); 906 ierr = PetscFree(osm->y);CHKERRQ(ierr); 907 ierr = PetscFree(pc->data);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc) 912 { 913 PC_GASM *osm = (PC_GASM*)pc->data; 914 PetscErrorCode ierr; 915 PetscInt blocks,ovl; 916 PetscBool flg; 917 PCGASMType gasmtype; 918 919 PetscFunctionBegin; 920 ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr); 921 ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr); 922 ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr); 923 if (flg) { 924 ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); 925 } 926 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 927 if (flg) { 928 ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); 929 osm->dm_subdomains = PETSC_FALSE; 930 } 931 flg = PETSC_FALSE; 932 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 933 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);} 934 ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr); 935 ierr = PetscOptionsTail();CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 /*------------------------------------------------------------------------------------*/ 940 941 /*@ 942 PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the 943 communicator. 944 Logically collective on pc 945 946 Input Parameters: 947 + pc - the preconditioner 948 - N - total number of subdomains 949 950 Level: beginner 951 952 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap() 953 PCGASMCreateSubdomains2D() 954 @*/ 955 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 956 { 957 PC_GASM *osm = (PC_GASM*)pc->data; 958 PetscMPIInt size,rank; 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 PetscCheckFalse(N < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N); 963 PetscCheckFalse(pc->setupcalled,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp()."); 964 965 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 966 osm->ois = osm->iis = NULL; 967 968 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 969 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 970 osm->N = N; 971 osm->n = PETSC_DETERMINE; 972 osm->nmax = PETSC_DETERMINE; 973 osm->dm_subdomains = PETSC_FALSE; 974 PetscFunctionReturn(0); 975 } 976 977 static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[]) 978 { 979 PC_GASM *osm = (PC_GASM*)pc->data; 980 PetscErrorCode ierr; 981 PetscInt i; 982 983 PetscFunctionBegin; 984 PetscCheckFalse(n < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n); 985 PetscCheckFalse(pc->setupcalled,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp()."); 986 987 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 988 osm->iis = osm->ois = NULL; 989 osm->n = n; 990 osm->N = PETSC_DETERMINE; 991 osm->nmax = PETSC_DETERMINE; 992 if (ois) { 993 ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr); 994 for (i=0; i<n; i++) { 995 ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr); 996 osm->ois[i] = ois[i]; 997 } 998 /* 999 Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(), 1000 it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1. 1001 */ 1002 osm->overlap = -1; 1003 /* inner subdomains must be provided */ 1004 PetscCheckFalse(!iis,PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided "); 1005 }/* end if */ 1006 if (iis) { 1007 ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr); 1008 for (i=0; i<n; i++) { 1009 ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr); 1010 osm->iis[i] = iis[i]; 1011 } 1012 if (!ois) { 1013 osm->ois = NULL; 1014 /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */ 1015 } 1016 } 1017 if (PetscDefined(USE_DEBUG)) { 1018 PetscInt j,rstart,rend,*covered,lsize; 1019 const PetscInt *indices; 1020 /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */ 1021 ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr); 1022 ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr); 1023 /* check if the current processor owns indices from others */ 1024 for (i=0; i<n; i++) { 1025 ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr); 1026 ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr); 1027 for (j=0; j<lsize; j++) { 1028 PetscCheckFalse(indices[j]<rstart || indices[j]>=rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]); 1029 else PetscCheckFalse(covered[indices[j]-rstart]==1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]); 1030 else covered[indices[j]-rstart] = 1; 1031 } 1032 ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr); 1033 } 1034 /* check if we miss any indices */ 1035 for (i=rstart; i<rend; i++) { 1036 PetscCheckFalse(!covered[i-rstart],PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i); 1037 } 1038 ierr = PetscFree(covered);CHKERRQ(ierr); 1039 } 1040 if (iis) osm->user_subdomains = PETSC_TRUE; 1041 PetscFunctionReturn(0); 1042 } 1043 1044 static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 1045 { 1046 PC_GASM *osm = (PC_GASM*)pc->data; 1047 1048 PetscFunctionBegin; 1049 PetscCheckFalse(ovl < 0,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 1050 PetscCheckFalse(pc->setupcalled && ovl != osm->overlap,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 1051 if (!pc->setupcalled) osm->overlap = ovl; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 1056 { 1057 PC_GASM *osm = (PC_GASM*)pc->data; 1058 1059 PetscFunctionBegin; 1060 osm->type = type; 1061 osm->type_set = PETSC_TRUE; 1062 PetscFunctionReturn(0); 1063 } 1064 1065 static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 1066 { 1067 PC_GASM *osm = (PC_GASM*)pc->data; 1068 1069 PetscFunctionBegin; 1070 osm->sort_indices = doSort; 1071 PetscFunctionReturn(0); 1072 } 1073 1074 /* 1075 FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed. 1076 In particular, it would upset the global subdomain number calculation. 1077 */ 1078 static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 1079 { 1080 PC_GASM *osm = (PC_GASM*)pc->data; 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 PetscCheckFalse(osm->n < 1,PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 1085 1086 if (n) *n = osm->n; 1087 if (first) { 1088 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 1089 *first -= osm->n; 1090 } 1091 if (ksp) { 1092 /* Assume that local solves are now different; not necessarily 1093 true, though! This flag is used only for PCView_GASM() */ 1094 *ksp = osm->ksp; 1095 osm->same_subdomain_solvers = PETSC_FALSE; 1096 } 1097 PetscFunctionReturn(0); 1098 } /* PCGASMGetSubKSP_GASM() */ 1099 1100 /*@C 1101 PCGASMSetSubdomains - Sets the subdomains for this processor 1102 for the additive Schwarz preconditioner. 1103 1104 Collective on pc 1105 1106 Input Parameters: 1107 + pc - the preconditioner object 1108 . n - the number of subdomains for this processor 1109 . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains) 1110 - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap) 1111 1112 Notes: 1113 The IS indices use the parallel, global numbering of the vector entries. 1114 Inner subdomains are those where the correction is applied. 1115 Outer subdomains are those where the residual necessary to obtain the 1116 corrections is obtained (see PCGASMType for the use of inner/outer subdomains). 1117 Both inner and outer subdomains can extend over several processors. 1118 This processor's portion of a subdomain is known as a local subdomain. 1119 1120 Inner subdomains can not overlap with each other, do not have any entities from remote processors, 1121 and have to cover the entire local subdomain owned by the current processor. The index sets on each 1122 process should be ordered such that the ith local subdomain is connected to the ith remote subdomain 1123 on another MPI process. 1124 1125 By default the GASM preconditioner uses 1 (local) subdomain per processor. 1126 1127 Level: advanced 1128 1129 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), 1130 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 1131 @*/ 1132 PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[]) 1133 { 1134 PC_GASM *osm = (PC_GASM*)pc->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1139 ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr); 1140 osm->dm_subdomains = PETSC_FALSE; 1141 PetscFunctionReturn(0); 1142 } 1143 1144 /*@ 1145 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 1146 additive Schwarz preconditioner. Either all or no processors in the 1147 pc communicator must call this routine. 1148 1149 Logically Collective on pc 1150 1151 Input Parameters: 1152 + pc - the preconditioner context 1153 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0) 1154 1155 Options Database Key: 1156 . -pc_gasm_overlap <overlap> - Sets overlap 1157 1158 Notes: 1159 By default the GASM preconditioner uses 1 subdomain per processor. To use 1160 multiple subdomain per perocessor or "straddling" subdomains that intersect 1161 multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>). 1162 1163 The overlap defaults to 0, so if one desires that no additional 1164 overlap be computed beyond what may have been set with a call to 1165 PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does 1166 not explicitly set the subdomains in application code, then all overlap would be computed 1167 internally by PETSc, and using an overlap of 0 would result in an GASM 1168 variant that is equivalent to the block Jacobi preconditioner. 1169 1170 Note that one can define initial index sets with any overlap via 1171 PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows 1172 PETSc to extend that overlap further, if desired. 1173 1174 Level: intermediate 1175 1176 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1177 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 1178 @*/ 1179 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 1180 { 1181 PetscErrorCode ierr; 1182 PC_GASM *osm = (PC_GASM*)pc->data; 1183 1184 PetscFunctionBegin; 1185 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1186 PetscValidLogicalCollectiveInt(pc,ovl,2); 1187 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 1188 osm->dm_subdomains = PETSC_FALSE; 1189 PetscFunctionReturn(0); 1190 } 1191 1192 /*@ 1193 PCGASMSetType - Sets the type of restriction and interpolation used 1194 for local problems in the additive Schwarz method. 1195 1196 Logically Collective on PC 1197 1198 Input Parameters: 1199 + pc - the preconditioner context 1200 - type - variant of GASM, one of 1201 .vb 1202 PC_GASM_BASIC - full interpolation and restriction 1203 PC_GASM_RESTRICT - full restriction, local processor interpolation 1204 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 1205 PC_GASM_NONE - local processor restriction and interpolation 1206 .ve 1207 1208 Options Database Key: 1209 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1210 1211 Level: intermediate 1212 1213 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1214 PCGASMCreateSubdomains2D() 1215 @*/ 1216 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 1217 { 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1222 PetscValidLogicalCollectiveEnum(pc,type,2); 1223 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 /*@ 1228 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 1229 1230 Logically Collective on PC 1231 1232 Input Parameters: 1233 + pc - the preconditioner context 1234 - doSort - sort the subdomain indices 1235 1236 Level: intermediate 1237 1238 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1239 PCGASMCreateSubdomains2D() 1240 @*/ 1241 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 1242 { 1243 PetscErrorCode ierr; 1244 1245 PetscFunctionBegin; 1246 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1247 PetscValidLogicalCollectiveBool(pc,doSort,2); 1248 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 /*@C 1253 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 1254 this processor. 1255 1256 Collective on PC iff first_local is requested 1257 1258 Input Parameter: 1259 . pc - the preconditioner context 1260 1261 Output Parameters: 1262 + n_local - the number of blocks on this processor or NULL 1263 . first_local - the global number of the first block on this processor or NULL, 1264 all processors must request or all must pass NULL 1265 - ksp - the array of KSP contexts 1266 1267 Note: 1268 After PCGASMGetSubKSP() the array of KSPes is not to be freed 1269 1270 Currently for some matrix implementations only 1 block per processor 1271 is supported. 1272 1273 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 1274 1275 Level: advanced 1276 1277 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(), 1278 PCGASMCreateSubdomains2D(), 1279 @*/ 1280 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 1281 { 1282 PetscErrorCode ierr; 1283 1284 PetscFunctionBegin; 1285 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1286 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 /* -------------------------------------------------------------------------------------*/ 1291 /*MC 1292 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1293 its own KSP object. 1294 1295 Options Database Keys: 1296 + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors 1297 . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view 1298 . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp() 1299 . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains 1300 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1301 1302 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1303 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1304 -pc_gasm_type basic to use the standard GASM. 1305 1306 Notes: 1307 Blocks can be shared by multiple processes. 1308 1309 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1310 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1311 1312 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1313 and set the options directly on the resulting KSP object (you can access its PC 1314 with KSPGetPC()) 1315 1316 Level: beginner 1317 1318 References: 1319 + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions 1320 Courant Institute, New York University Technical report 1321 - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1322 Cambridge University Press. 1323 1324 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1325 PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(), 1326 PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType() 1327 1328 M*/ 1329 1330 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc) 1331 { 1332 PetscErrorCode ierr; 1333 PC_GASM *osm; 1334 1335 PetscFunctionBegin; 1336 ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr); 1337 1338 osm->N = PETSC_DETERMINE; 1339 osm->n = PETSC_DECIDE; 1340 osm->nmax = PETSC_DETERMINE; 1341 osm->overlap = 0; 1342 osm->ksp = NULL; 1343 osm->gorestriction = NULL; 1344 osm->girestriction = NULL; 1345 osm->pctoouter = NULL; 1346 osm->gx = NULL; 1347 osm->gy = NULL; 1348 osm->x = NULL; 1349 osm->y = NULL; 1350 osm->pcx = NULL; 1351 osm->pcy = NULL; 1352 osm->permutationIS = NULL; 1353 osm->permutationP = NULL; 1354 osm->pcmat = NULL; 1355 osm->ois = NULL; 1356 osm->iis = NULL; 1357 osm->pmat = NULL; 1358 osm->type = PC_GASM_RESTRICT; 1359 osm->same_subdomain_solvers = PETSC_TRUE; 1360 osm->sort_indices = PETSC_TRUE; 1361 osm->dm_subdomains = PETSC_FALSE; 1362 osm->hierarchicalpartitioning = PETSC_FALSE; 1363 1364 pc->data = (void*)osm; 1365 pc->ops->apply = PCApply_GASM; 1366 pc->ops->matapply = PCMatApply_GASM; 1367 pc->ops->applytranspose = PCApplyTranspose_GASM; 1368 pc->ops->setup = PCSetUp_GASM; 1369 pc->ops->reset = PCReset_GASM; 1370 pc->ops->destroy = PCDestroy_GASM; 1371 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1372 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1373 pc->ops->view = PCView_GASM; 1374 pc->ops->applyrichardson = NULL; 1375 1376 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr); 1377 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1378 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr); 1379 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1380 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]) 1385 { 1386 MatPartitioning mpart; 1387 const char *prefix; 1388 PetscInt i,j,rstart,rend,bs; 1389 PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1390 Mat Ad = NULL, adj; 1391 IS ispart,isnumb,*is; 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 PetscCheckFalse(nloc < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc); 1396 1397 /* Get prefix, row distribution, and block size */ 1398 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1399 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1400 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1401 PetscCheckFalse(rstart/bs*bs != rstart || rend/bs*bs != rend,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1402 1403 /* Get diagonal block from matrix if possible */ 1404 ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1405 if (hasop) { 1406 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1407 } 1408 if (Ad) { 1409 ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1410 if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1411 } 1412 if (Ad && nloc > 1) { 1413 PetscBool match,done; 1414 /* Try to setup a good matrix partitioning if available */ 1415 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1416 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1417 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1418 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1419 if (!match) { 1420 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1421 } 1422 if (!match) { /* assume a "good" partitioner is available */ 1423 PetscInt na; 1424 const PetscInt *ia,*ja; 1425 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1426 if (done) { 1427 /* Build adjacency matrix by hand. Unfortunately a call to 1428 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1429 remove the block-aij structure and we cannot expect 1430 MatPartitioning to split vertices as we need */ 1431 PetscInt i,j,len,nnz,cnt,*iia=NULL,*jja=NULL; 1432 const PetscInt *row; 1433 nnz = 0; 1434 for (i=0; i<na; i++) { /* count number of nonzeros */ 1435 len = ia[i+1] - ia[i]; 1436 row = ja + ia[i]; 1437 for (j=0; j<len; j++) { 1438 if (row[j] == i) { /* don't count diagonal */ 1439 len--; break; 1440 } 1441 } 1442 nnz += len; 1443 } 1444 ierr = PetscMalloc1(na+1,&iia);CHKERRQ(ierr); 1445 ierr = PetscMalloc1(nnz,&jja);CHKERRQ(ierr); 1446 nnz = 0; 1447 iia[0] = 0; 1448 for (i=0; i<na; i++) { /* fill adjacency */ 1449 cnt = 0; 1450 len = ia[i+1] - ia[i]; 1451 row = ja + ia[i]; 1452 for (j=0; j<len; j++) { 1453 if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */ 1454 } 1455 nnz += cnt; 1456 iia[i+1] = nnz; 1457 } 1458 /* Partitioning of the adjacency matrix */ 1459 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr); 1460 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1461 ierr = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr); 1462 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1463 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1464 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1465 foundpart = PETSC_TRUE; 1466 } 1467 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1468 } 1469 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1470 } 1471 ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr); 1472 if (!foundpart) { 1473 1474 /* Partitioning by contiguous chunks of rows */ 1475 1476 PetscInt mbs = (rend-rstart)/bs; 1477 PetscInt start = rstart; 1478 for (i=0; i<nloc; i++) { 1479 PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs; 1480 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1481 start += count; 1482 } 1483 1484 } else { 1485 1486 /* Partitioning by adjacency of diagonal block */ 1487 1488 const PetscInt *numbering; 1489 PetscInt *count,nidx,*indices,*newidx,start=0; 1490 /* Get node count in each partition */ 1491 ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr); 1492 ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr); 1493 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1494 for (i=0; i<nloc; i++) count[i] *= bs; 1495 } 1496 /* Build indices from node numbering */ 1497 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1498 ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr); 1499 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1500 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1501 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1502 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1503 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1504 ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr); 1505 for (i=0; i<nidx; i++) { 1506 for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j; 1507 } 1508 ierr = PetscFree(indices);CHKERRQ(ierr); 1509 nidx *= bs; 1510 indices = newidx; 1511 } 1512 /* Shift to get global indices */ 1513 for (i=0; i<nidx; i++) indices[i] += rstart; 1514 1515 /* Build the index sets for each block */ 1516 for (i=0; i<nloc; i++) { 1517 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1518 ierr = ISSort(is[i]);CHKERRQ(ierr); 1519 start += count[i]; 1520 } 1521 1522 ierr = PetscFree(count);CHKERRQ(ierr); 1523 ierr = PetscFree(indices);CHKERRQ(ierr); 1524 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1525 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1526 } 1527 *iis = is; 1528 PetscFunctionReturn(0); 1529 } 1530 1531 PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[]) 1532 { 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 /*@C 1541 PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive 1542 Schwarz preconditioner for a any problem based on its matrix. 1543 1544 Collective 1545 1546 Input Parameters: 1547 + A - The global matrix operator 1548 - N - the number of global subdomains requested 1549 1550 Output Parameters: 1551 + n - the number of subdomains created on this processor 1552 - iis - the array of index sets defining the local inner subdomains (on which the correction is applied) 1553 1554 Level: advanced 1555 1556 Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor. 1557 When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local. 1558 The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping 1559 outer subdomains will be automatically generated from these according to the requested amount of 1560 overlap; this is currently supported only with local subdomains. 1561 1562 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains() 1563 @*/ 1564 PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[]) 1565 { 1566 PetscMPIInt size; 1567 PetscErrorCode ierr; 1568 1569 PetscFunctionBegin; 1570 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1571 PetscValidPointer(iis,4); 1572 1573 PetscCheckFalse(N < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N); 1574 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 1575 if (N >= size) { 1576 *n = N/size + (N%size); 1577 ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr); 1578 } else { 1579 ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 /*@C 1585 PCGASMDestroySubdomains - Destroys the index sets created with 1586 PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be 1587 called after setting subdomains with PCGASMSetSubdomains(). 1588 1589 Collective 1590 1591 Input Parameters: 1592 + n - the number of index sets 1593 . iis - the array of inner subdomains, 1594 - ois - the array of outer subdomains, can be NULL 1595 1596 Level: intermediate 1597 1598 Notes: 1599 this is merely a convenience subroutine that walks each list, 1600 destroys each IS on the list, and then frees the list. At the end the 1601 list pointers are set to NULL. 1602 1603 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains() 1604 @*/ 1605 PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois) 1606 { 1607 PetscInt i; 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 if (n <= 0) PetscFunctionReturn(0); 1612 if (ois) { 1613 PetscValidPointer(ois,3); 1614 if (*ois) { 1615 PetscValidPointer(*ois,3); 1616 for (i=0; i<n; i++) { 1617 ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr); 1618 } 1619 ierr = PetscFree((*ois));CHKERRQ(ierr); 1620 } 1621 } 1622 if (iis) { 1623 PetscValidPointer(iis,2); 1624 if (*iis) { 1625 PetscValidPointer(*iis,2); 1626 for (i=0; i<n; i++) { 1627 ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr); 1628 } 1629 ierr = PetscFree((*iis));CHKERRQ(ierr); 1630 } 1631 } 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1636 { \ 1637 PetscInt first_row = first/M, last_row = last/M+1; \ 1638 /* \ 1639 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1640 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1641 subdomain). \ 1642 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1643 of the intersection. \ 1644 */ \ 1645 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1646 *ylow_loc = PetscMax(first_row,ylow); \ 1647 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1648 *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \ 1649 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1650 *yhigh_loc = PetscMin(last_row,yhigh); \ 1651 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1652 *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \ 1653 /* Now compute the size of the local subdomain n. */ \ 1654 *n = 0; \ 1655 if (*ylow_loc < *yhigh_loc) { \ 1656 PetscInt width = xright-xleft; \ 1657 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1658 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1659 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1660 } \ 1661 } 1662 1663 /*@ 1664 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1665 preconditioner for a two-dimensional problem on a regular grid. 1666 1667 Collective 1668 1669 Input Parameters: 1670 + pc - the preconditioner context 1671 . M - the global number of grid points in the x direction 1672 . N - the global number of grid points in the y direction 1673 . Mdomains - the global number of subdomains in the x direction 1674 . Ndomains - the global number of subdomains in the y direction 1675 . dof - degrees of freedom per node 1676 - overlap - overlap in mesh lines 1677 1678 Output Parameters: 1679 + Nsub - the number of local subdomains created 1680 . iis - array of index sets defining inner (nonoverlapping) subdomains 1681 - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains 1682 1683 Level: advanced 1684 1685 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap() 1686 @*/ 1687 PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois) 1688 { 1689 PetscErrorCode ierr; 1690 PetscMPIInt size, rank; 1691 PetscInt i, j; 1692 PetscInt maxheight, maxwidth; 1693 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1694 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1695 PetscInt x[2][2], y[2][2], n[2]; 1696 PetscInt first, last; 1697 PetscInt nidx, *idx; 1698 PetscInt ii,jj,s,q,d; 1699 PetscInt k,kk; 1700 PetscMPIInt color; 1701 MPI_Comm comm, subcomm; 1702 IS **xis = NULL, **is = ois, **is_local = iis; 1703 1704 PetscFunctionBegin; 1705 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1706 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 1707 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 1708 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1709 PetscCheckFalse(first%dof || last%dof,PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1710 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1711 1712 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1713 s = 0; 1714 ystart = 0; 1715 for (j=0; j<Ndomains; ++j) { 1716 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1717 PetscCheckFalse(maxheight < 2,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1718 /* Vertical domain limits with an overlap. */ 1719 ylow = PetscMax(ystart - overlap,0); 1720 yhigh = PetscMin(ystart + maxheight + overlap,N); 1721 xstart = 0; 1722 for (i=0; i<Mdomains; ++i) { 1723 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1724 PetscCheckFalse(maxwidth < 2,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1725 /* Horizontal domain limits with an overlap. */ 1726 xleft = PetscMax(xstart - overlap,0); 1727 xright = PetscMin(xstart + maxwidth + overlap,M); 1728 /* 1729 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1730 */ 1731 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1732 if (nidx) ++s; 1733 xstart += maxwidth; 1734 } /* for (i = 0; i < Mdomains; ++i) */ 1735 ystart += maxheight; 1736 } /* for (j = 0; j < Ndomains; ++j) */ 1737 1738 /* Now we can allocate the necessary number of ISs. */ 1739 *nsub = s; 1740 ierr = PetscMalloc1(*nsub,is);CHKERRQ(ierr); 1741 ierr = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr); 1742 s = 0; 1743 ystart = 0; 1744 for (j=0; j<Ndomains; ++j) { 1745 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1746 PetscCheckFalse(maxheight < 2,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1747 /* Vertical domain limits with an overlap. */ 1748 y[0][0] = PetscMax(ystart - overlap,0); 1749 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1750 /* Vertical domain limits without an overlap. */ 1751 y[1][0] = ystart; 1752 y[1][1] = PetscMin(ystart + maxheight,N); 1753 xstart = 0; 1754 for (i=0; i<Mdomains; ++i) { 1755 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1756 PetscCheckFalse(maxwidth < 2,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1757 /* Horizontal domain limits with an overlap. */ 1758 x[0][0] = PetscMax(xstart - overlap,0); 1759 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1760 /* Horizontal domain limits without an overlap. */ 1761 x[1][0] = xstart; 1762 x[1][1] = PetscMin(xstart+maxwidth,M); 1763 /* 1764 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1765 Do this twice: first for the domains with overlaps, and once without. 1766 During the first pass create the subcommunicators, and use them on the second pass as well. 1767 */ 1768 for (q = 0; q < 2; ++q) { 1769 PetscBool split = PETSC_FALSE; 1770 /* 1771 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1772 according to whether the domain with an overlap or without is considered. 1773 */ 1774 xleft = x[q][0]; xright = x[q][1]; 1775 ylow = y[q][0]; yhigh = y[q][1]; 1776 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1777 nidx *= dof; 1778 n[q] = nidx; 1779 /* 1780 Based on the counted number of indices in the local domain *with an overlap*, 1781 construct a subcommunicator of all the processors supporting this domain. 1782 Observe that a domain with an overlap might have nontrivial local support, 1783 while the domain without an overlap might not. Hence, the decision to participate 1784 in the subcommunicator must be based on the domain with an overlap. 1785 */ 1786 if (q == 0) { 1787 if (nidx) color = 1; 1788 else color = MPI_UNDEFINED; 1789 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRMPI(ierr); 1790 split = PETSC_TRUE; 1791 } 1792 /* 1793 Proceed only if the number of local indices *with an overlap* is nonzero. 1794 */ 1795 if (n[0]) { 1796 if (q == 0) xis = is; 1797 if (q == 1) { 1798 /* 1799 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1800 Moreover, if the overlap is zero, the two ISs are identical. 1801 */ 1802 if (overlap == 0) { 1803 (*is_local)[s] = (*is)[s]; 1804 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1805 continue; 1806 } else { 1807 xis = is_local; 1808 subcomm = ((PetscObject)(*is)[s])->comm; 1809 } 1810 } /* if (q == 1) */ 1811 idx = NULL; 1812 ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr); 1813 if (nidx) { 1814 k = 0; 1815 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1816 PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft; 1817 PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright; 1818 kk = dof*(M*jj + x0); 1819 for (ii=x0; ii<x1; ++ii) { 1820 for (d = 0; d < dof; ++d) { 1821 idx[k++] = kk++; 1822 } 1823 } 1824 } 1825 } 1826 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr); 1827 if (split) { 1828 ierr = MPI_Comm_free(&subcomm);CHKERRMPI(ierr); 1829 } 1830 }/* if (n[0]) */ 1831 }/* for (q = 0; q < 2; ++q) */ 1832 if (n[0]) ++s; 1833 xstart += maxwidth; 1834 } /* for (i = 0; i < Mdomains; ++i) */ 1835 ystart += maxheight; 1836 } /* for (j = 0; j < Ndomains; ++j) */ 1837 PetscFunctionReturn(0); 1838 } 1839 1840 /*@C 1841 PCGASMGetSubdomains - Gets the subdomains supported on this processor 1842 for the additive Schwarz preconditioner. 1843 1844 Not Collective 1845 1846 Input Parameter: 1847 . pc - the preconditioner context 1848 1849 Output Parameters: 1850 + n - the number of subdomains for this processor (default value = 1) 1851 . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL) 1852 - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL) 1853 1854 Notes: 1855 The user is responsible for destroying the ISs and freeing the returned arrays. 1856 The IS numbering is in the parallel, global numbering of the vector. 1857 1858 Level: advanced 1859 1860 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(), 1861 PCGASMSetSubdomains(), PCGASMGetSubmatrices() 1862 @*/ 1863 PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[]) 1864 { 1865 PC_GASM *osm; 1866 PetscErrorCode ierr; 1867 PetscBool match; 1868 PetscInt i; 1869 1870 PetscFunctionBegin; 1871 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1872 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1873 PetscCheckFalse(!match,PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1874 osm = (PC_GASM*)pc->data; 1875 if (n) *n = osm->n; 1876 if (iis) { 1877 ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr); 1878 } 1879 if (ois) { 1880 ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr); 1881 } 1882 if (iis || ois) { 1883 for (i = 0; i < osm->n; ++i) { 1884 if (iis) (*iis)[i] = osm->iis[i]; 1885 if (ois) (*ois)[i] = osm->ois[i]; 1886 } 1887 } 1888 PetscFunctionReturn(0); 1889 } 1890 1891 /*@C 1892 PCGASMGetSubmatrices - Gets the local submatrices (for this processor 1893 only) for the additive Schwarz preconditioner. 1894 1895 Not Collective 1896 1897 Input Parameter: 1898 . pc - the preconditioner context 1899 1900 Output Parameters: 1901 + n - the number of matrices for this processor (default value = 1) 1902 - mat - the matrices 1903 1904 Notes: 1905 matrices returned by this routine have the same communicators as the index sets (IS) 1906 used to define subdomains in PCGASMSetSubdomains() 1907 Level: advanced 1908 1909 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), 1910 PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains() 1911 @*/ 1912 PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1913 { 1914 PC_GASM *osm; 1915 PetscErrorCode ierr; 1916 PetscBool match; 1917 1918 PetscFunctionBegin; 1919 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1920 PetscValidIntPointer(n,2); 1921 if (mat) PetscValidPointer(mat,3); 1922 PetscCheckFalse(!pc->setupcalled,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp()."); 1923 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1924 PetscCheckFalse(!match,PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1925 osm = (PC_GASM*)pc->data; 1926 if (n) *n = osm->n; 1927 if (mat) *mat = osm->pmat; 1928 PetscFunctionReturn(0); 1929 } 1930 1931 /*@ 1932 PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1933 Logically Collective 1934 1935 Input Parameters: 1936 + pc - the preconditioner 1937 - flg - boolean indicating whether to use subdomains defined by the DM 1938 1939 Options Database Key: 1940 . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains 1941 1942 Level: intermediate 1943 1944 Notes: 1945 PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(), 1946 so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap() 1947 automatically turns the latter off. 1948 1949 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap() 1950 PCGASMCreateSubdomains2D() 1951 @*/ 1952 PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg) 1953 { 1954 PC_GASM *osm = (PC_GASM*)pc->data; 1955 PetscErrorCode ierr; 1956 PetscBool match; 1957 1958 PetscFunctionBegin; 1959 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1960 PetscValidLogicalCollectiveBool(pc,flg,2); 1961 PetscCheckFalse(pc->setupcalled,((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC."); 1962 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1963 if (match) { 1964 if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) { 1965 osm->dm_subdomains = flg; 1966 } 1967 } 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /*@ 1972 PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1973 Not Collective 1974 1975 Input Parameter: 1976 . pc - the preconditioner 1977 1978 Output Parameter: 1979 . flg - boolean indicating whether to use subdomains defined by the DM 1980 1981 Level: intermediate 1982 1983 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap() 1984 PCGASMCreateSubdomains2D() 1985 @*/ 1986 PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg) 1987 { 1988 PC_GASM *osm = (PC_GASM*)pc->data; 1989 PetscErrorCode ierr; 1990 PetscBool match; 1991 1992 PetscFunctionBegin; 1993 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1994 PetscValidBoolPointer(flg,2); 1995 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1996 if (match) { 1997 if (flg) *flg = osm->dm_subdomains; 1998 } 1999 PetscFunctionReturn(0); 2000 } 2001