1 /* 2 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 3 In this version each processor may intersect multiple subdomains and any subdomain may 4 intersect multiple processors. Intersections of subdomains with processors are called *local 5 subdomains*. 6 7 N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM()) 8 n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains()) 9 nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM()) 10 */ 11 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 12 #include <petscdm.h> 13 14 typedef struct { 15 PetscInt N,n,nmax; 16 PetscInt overlap; /* overlap requested by user */ 17 PCGASMType type; /* use reduced interpolation, restriction or both */ 18 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 19 PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */ 20 PetscBool sort_indices; /* flag to sort subdomain indices */ 21 PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */ 22 PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */ 23 PetscBool hierarchicalpartitioning; 24 IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */ 25 IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */ 26 KSP *ksp; /* linear solvers for each subdomain */ 27 Mat *pmat; /* subdomain block matrices */ 28 Vec gx,gy; /* Merged work vectors */ 29 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 30 VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */ 31 VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */ 32 VecScatter pctoouter; 33 IS permutationIS; 34 Mat permutationP; 35 Mat pcmat; 36 Vec pcx,pcy; 37 } PC_GASM; 38 39 static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation) 40 { 41 PC_GASM *osm = (PC_GASM*)pc->data; 42 PetscInt i; 43 PetscErrorCode ierr; 44 45 PetscFunctionBegin; 46 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */ 47 ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr); 48 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr); 49 for (i = 0; i < osm->n; ++i) (*permutation)[i] = i; 50 ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer) 55 { 56 PC_GASM *osm = (PC_GASM*)pc->data; 57 PetscInt j,nidx; 58 const PetscInt *idx; 59 PetscViewer sviewer; 60 char *cidx; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n); 65 /* Inner subdomains. */ 66 ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr); 67 /* 68 No more than 15 characters per index plus a space. 69 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 70 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 71 For nidx == 0, the whole string 16 '\0'. 72 */ 73 #define len 16*(nidx+1)+1 74 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr); 75 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr); 76 #undef len 77 ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr); 78 for (j = 0; j < nidx; ++j) { 79 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 80 } 81 ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr); 82 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 83 ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr); 84 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 85 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 86 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 87 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 88 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 89 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 90 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 91 ierr = PetscFree(cidx);CHKERRQ(ierr); 92 /* Outer subdomains. */ 93 ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr); 94 /* 95 No more than 15 characters per index plus a space. 96 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 97 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 98 For nidx == 0, the whole string 16 '\0'. 99 */ 100 #define len 16*(nidx+1)+1 101 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr); 102 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr); 103 #undef len 104 ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr); 105 for (j = 0; j < nidx; ++j) { 106 ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr); 107 } 108 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 109 ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr); 110 ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr); 111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 112 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 113 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 114 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 115 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 116 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 117 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 118 ierr = PetscFree(cidx);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 123 { 124 PC_GASM *osm = (PC_GASM*)pc->data; 125 const char *prefix; 126 char fname[PETSC_MAX_PATH_LEN+1]; 127 PetscInt l, d, count; 128 PetscBool doprint,found; 129 PetscViewer viewer, sviewer = NULL; 130 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */ 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 135 doprint = PETSC_FALSE; 136 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr); 137 if (!doprint) PetscFunctionReturn(0); 138 ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr); 139 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 140 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr); 141 /* 142 Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer: 143 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm. 144 */ 145 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr); 146 l = 0; 147 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr); 148 for (count = 0; count < osm->N; ++count) { 149 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 150 if (l<osm->n) { 151 d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 152 if (numbering[d] == count) { 153 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 154 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr); 155 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 156 ++l; 157 } 158 } 159 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 160 } 161 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr); 162 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 167 { 168 PC_GASM *osm = (PC_GASM*)pc->data; 169 const char *prefix; 170 PetscErrorCode ierr; 171 PetscMPIInt rank, size; 172 PetscInt bsz; 173 PetscBool iascii,view_subdomains=PETSC_FALSE; 174 PetscViewer sviewer; 175 PetscInt count, l; 176 char overlap[256] = "user-defined overlap"; 177 char gsubdomains[256] = "unknown total number of subdomains"; 178 char msubdomains[256] = "unknown max number of local subdomains"; 179 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */ 180 181 PetscFunctionBegin; 182 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRMPI(ierr); 183 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRMPI(ierr); 184 185 if (osm->overlap >= 0) { 186 ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr); 187 } 188 if (osm->N != PETSC_DETERMINE) { 189 ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr); 190 } 191 if (osm->nmax != PETSC_DETERMINE) { 192 ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr); 193 } 194 195 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 196 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr); 197 198 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 199 if (iascii) { 200 /* 201 Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer: 202 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed 203 collectively on the comm. 204 */ 205 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr); 206 ierr = PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 207 ierr = PetscViewerASCIIPrintf(viewer," %s\n",overlap);CHKERRQ(ierr); 208 ierr = PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);CHKERRQ(ierr); 209 ierr = PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);CHKERRQ(ierr); 210 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 211 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr); 212 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 213 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 214 /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */ 215 ierr = PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");CHKERRQ(ierr); 216 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 217 ierr = PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 218 /* Make sure that everybody waits for the banner to be printed. */ 219 ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 220 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 221 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr); 222 l = 0; 223 for (count = 0; count < osm->N; ++count) { 224 PetscMPIInt srank, ssize; 225 if (l<osm->n) { 226 PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 227 if (numbering[d] == count) { 228 ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRMPI(ierr); 229 ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRMPI(ierr); 230 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 231 ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr); 232 ierr = PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr); 233 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 234 if (view_subdomains) { 235 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr); 236 } 237 if (!pc->setupcalled) { 238 ierr = PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr); 239 } else { 240 ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr); 241 } 242 ierr = PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 243 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 244 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 245 ++l; 246 } 247 } 248 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 249 } 250 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr); 251 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 252 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 253 /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 254 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 255 } 256 PetscFunctionReturn(0); 257 } 258 259 PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]); 260 261 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc) 262 { 263 PC_GASM *osm = (PC_GASM*)pc->data; 264 MatPartitioning part; 265 MPI_Comm comm; 266 PetscMPIInt size; 267 PetscInt nlocalsubdomains,fromrows_localsize; 268 IS partitioning,fromrows,isn; 269 Vec outervec; 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 274 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 275 /* we do not need a hierarchical partitioning when 276 * the total number of subdomains is consistent with 277 * the number of MPI tasks. 278 * For the following cases, we do not need to use HP 279 * */ 280 if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0); 281 if (size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size); 282 nlocalsubdomains = size/osm->N; 283 osm->n = 1; 284 ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr); 285 ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr); 286 ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr); 287 ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr); 288 ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr); 289 ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr); 290 /* get new processor owner number of each vertex */ 291 ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr); 292 ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr); 293 ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr); 294 ierr = ISDestroy(&isn);CHKERRQ(ierr); 295 ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr); 296 ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr); 297 ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr); 298 ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr); 299 ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr); 300 ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr); 301 ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr); 302 ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr); 303 osm->permutationIS = fromrows; 304 osm->pcmat = pc->pmat; 305 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr); 306 pc->pmat = osm->permutationP; 307 ierr = VecDestroy(&outervec);CHKERRQ(ierr); 308 ierr = ISDestroy(&fromrows);CHKERRQ(ierr); 309 ierr = ISDestroy(&partitioning);CHKERRQ(ierr); 310 osm->n = PETSC_DETERMINE; 311 PetscFunctionReturn(0); 312 } 313 314 static PetscErrorCode PCSetUp_GASM(PC pc) 315 { 316 PC_GASM *osm = (PC_GASM*)pc->data; 317 PetscErrorCode ierr; 318 PetscInt i,nInnerIndices,nTotalInnerIndices; 319 PetscMPIInt rank, size; 320 MatReuse scall = MAT_REUSE_MATRIX; 321 KSP ksp; 322 PC subpc; 323 const char *prefix,*pprefix; 324 Vec x,y; 325 PetscInt oni; /* Number of indices in the i-th local outer subdomain. */ 326 const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */ 327 PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */ 328 PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */ 329 IS gois; /* Disjoint union the global indices of outer subdomains. */ 330 IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */ 331 PetscScalar *gxarray, *gyarray; 332 PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */ 333 PetscInt num_subdomains = 0; 334 DM *subdomain_dm = NULL; 335 char **subdomain_names = NULL; 336 PetscInt *numbering; 337 338 339 PetscFunctionBegin; 340 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 341 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 342 if (!pc->setupcalled) { 343 /* use a hierarchical partitioning */ 344 if (osm->hierarchicalpartitioning){ 345 ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr); 346 } 347 if (osm->n == PETSC_DETERMINE) { 348 if (osm->N != PETSC_DETERMINE) { 349 /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */ 350 ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr); 351 } else if (osm->dm_subdomains && pc->dm) { 352 /* try pc->dm next, if allowed */ 353 PetscInt d; 354 IS *inner_subdomain_is, *outer_subdomain_is; 355 ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr); 356 if (num_subdomains) { 357 ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr); 358 } 359 for (d = 0; d < num_subdomains; ++d) { 360 if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);} 361 if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);} 362 } 363 ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr); 364 ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr); 365 } else { 366 /* still no subdomains; use one per processor */ 367 osm->nmax = osm->n = 1; 368 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 369 osm->N = size; 370 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr); 371 } 372 } 373 if (!osm->iis) { 374 /* 375 osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied. 376 We create the requisite number of local inner subdomains and then expand them into 377 out subdomains, if necessary. 378 */ 379 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr); 380 } 381 if (!osm->ois) { 382 /* 383 Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap 384 has been requested, copy the inner subdomains over so they can be modified. 385 */ 386 ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr); 387 for (i=0; i<osm->n; ++i) { 388 if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */ 389 ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr); 390 ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr); 391 } else { 392 ierr = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr); 393 osm->ois[i] = osm->iis[i]; 394 } 395 } 396 if (osm->overlap>0 && osm->N>1) { 397 /* Extend the "overlapping" regions by a number of steps */ 398 ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr); 399 } 400 } 401 402 /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */ 403 if (osm->nmax == PETSC_DETERMINE) { 404 PetscMPIInt inwork,outwork; 405 /* determine global number of subdomains and the max number of local subdomains */ 406 inwork = osm->n; 407 ierr = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 408 osm->nmax = outwork; 409 } 410 if (osm->N == PETSC_DETERMINE) { 411 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */ 412 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr); 413 } 414 415 if (osm->sort_indices) { 416 for (i=0; i<osm->n; i++) { 417 ierr = ISSort(osm->ois[i]);CHKERRQ(ierr); 418 ierr = ISSort(osm->iis[i]);CHKERRQ(ierr); 419 } 420 } 421 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 422 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); 423 424 /* 425 Merge the ISs, create merged vectors and restrictions. 426 */ 427 /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */ 428 on = 0; 429 for (i=0; i<osm->n; i++) { 430 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 431 on += oni; 432 } 433 ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr); 434 on = 0; 435 /* Merge local indices together */ 436 for (i=0; i<osm->n; i++) { 437 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 438 ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 439 ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr); 440 ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 441 on += oni; 442 } 443 ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr); 444 nTotalInnerIndices = 0; 445 for (i=0; i<osm->n; i++){ 446 ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr); 447 nTotalInnerIndices += nInnerIndices; 448 } 449 ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr); 450 ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 451 452 ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr); 453 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 454 ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr); 455 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr); 456 /* gois might indices not on local */ 457 ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr); 458 ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr); 459 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr); 460 ierr = VecDestroy(&x);CHKERRQ(ierr); 461 ierr = ISDestroy(&gois);CHKERRQ(ierr); 462 463 /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */ 464 { 465 PetscInt ini; /* Number of indices the i-th a local inner subdomain. */ 466 PetscInt in; /* Number of indices in the disjoint union of local inner subdomains. */ 467 PetscInt *iidx; /* Global indices in the merged local inner subdomain. */ 468 PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 469 IS giis; /* IS for the disjoint union of inner subdomains. */ 470 IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 471 PetscScalar *array; 472 const PetscInt *indices; 473 PetscInt k; 474 on = 0; 475 for (i=0; i<osm->n; i++) { 476 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 477 on += oni; 478 } 479 ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr); 480 ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr); 481 ierr = VecGetArray(y,&array);CHKERRQ(ierr); 482 /* set communicator id to determine where overlap is */ 483 in = 0; 484 for (i=0; i<osm->n; i++) { 485 ierr = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr); 486 for (k = 0; k < ini; ++k){ 487 array[in+k] = numbering[i]; 488 } 489 in += ini; 490 } 491 ierr = VecRestoreArray(y,&array);CHKERRQ(ierr); 492 ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 494 ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr); 495 ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr); 496 on = 0; 497 in = 0; 498 for (i=0; i<osm->n; i++) { 499 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 500 ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr); 501 for (k=0; k<oni; k++) { 502 /* skip overlapping indices to get inner domain */ 503 if (PetscRealPart(array[on+k]) != numbering[i]) continue; 504 iidx[in] = indices[k]; 505 ioidx[in++] = gostart+on+k; 506 } 507 ierr = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr); 508 on += oni; 509 } 510 ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr); 511 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr); 512 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr); 513 ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr); 514 ierr = VecDestroy(&y);CHKERRQ(ierr); 515 ierr = ISDestroy(&giis);CHKERRQ(ierr); 516 ierr = ISDestroy(&giois);CHKERRQ(ierr); 517 } 518 ierr = ISDestroy(&goid);CHKERRQ(ierr); 519 ierr = PetscFree(numbering);CHKERRQ(ierr); 520 521 /* Create the subdomain work vectors. */ 522 ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr); 523 ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr); 524 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 525 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 526 for (i=0, on=0; i<osm->n; ++i, on += oni) { 527 PetscInt oNi; 528 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 529 /* on a sub communicator */ 530 ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr); 531 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr); 532 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr); 533 } 534 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 535 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 536 /* Create the subdomain solvers */ 537 ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr); 538 for (i=0; i<osm->n; i++) { 539 char subprefix[PETSC_MAX_PATH_LEN+1]; 540 ierr = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr); 541 ierr = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr); 542 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr); 543 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 544 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 545 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */ 546 if (subdomain_dm) { 547 ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr); 548 ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr); 549 } 550 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 551 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 552 if (subdomain_names && subdomain_names[i]) { 553 ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr); 554 ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr); 555 ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr); 556 } 557 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 558 osm->ksp[i] = ksp; 559 } 560 ierr = PetscFree(subdomain_dm);CHKERRQ(ierr); 561 ierr = PetscFree(subdomain_names);CHKERRQ(ierr); 562 scall = MAT_INITIAL_MATRIX; 563 } else { /* if (pc->setupcalled) */ 564 /* 565 Destroy the submatrices from the previous iteration 566 */ 567 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 568 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 569 scall = MAT_INITIAL_MATRIX; 570 } 571 if (osm->permutationIS){ 572 ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr); 573 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr); 574 osm->pcmat = pc->pmat; 575 pc->pmat = osm->permutationP; 576 } 577 } 578 579 /* 580 Extract the submatrices. 581 */ 582 if (size > 1) { 583 ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 584 } else { 585 ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 586 } 587 if (scall == MAT_INITIAL_MATRIX) { 588 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 589 for (i=0; i<osm->n; i++) { 590 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr); 591 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 592 } 593 } 594 595 /* Return control to the user so that the submatrices can be modified (e.g., to apply 596 different boundary conditions for the submatrices than for the global problem) */ 597 ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 598 599 /* 600 Loop over submatrices putting them into local ksps 601 */ 602 for (i=0; i<osm->n; i++) { 603 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr); 604 ierr = KSPGetOptionsPrefix(osm->ksp[i],&prefix);CHKERRQ(ierr); 605 ierr = MatSetOptionsPrefix(osm->pmat[i],prefix);CHKERRQ(ierr); 606 if (!pc->setupcalled) { 607 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 608 } 609 } 610 if (osm->pcmat){ 611 ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr); 612 pc->pmat = osm->pcmat; 613 osm->pcmat = NULL; 614 } 615 PetscFunctionReturn(0); 616 } 617 618 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 619 { 620 PC_GASM *osm = (PC_GASM*)pc->data; 621 PetscErrorCode ierr; 622 PetscInt i; 623 624 PetscFunctionBegin; 625 for (i=0; i<osm->n; i++) { 626 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 627 } 628 PetscFunctionReturn(0); 629 } 630 631 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout) 632 { 633 PC_GASM *osm = (PC_GASM*)pc->data; 634 PetscErrorCode ierr; 635 PetscInt i; 636 Vec x,y; 637 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 638 639 PetscFunctionBegin; 640 if (osm->pctoouter) { 641 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 642 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 643 x = osm->pcx; 644 y = osm->pcy; 645 } else { 646 x = xin; 647 y = yout; 648 } 649 /* 650 support for limiting the restriction or interpolation only to the inner 651 subdomain values (leaving the other values 0). 652 */ 653 if (!(osm->type & PC_GASM_RESTRICT)) { 654 /* have to zero the work RHS since scatter may leave some slots empty */ 655 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 656 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 657 } else { 658 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 659 } 660 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 661 if (!(osm->type & PC_GASM_RESTRICT)) { 662 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 663 } else { 664 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 665 } 666 /* do the subdomain solves */ 667 for (i=0; i<osm->n; ++i) { 668 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 669 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr); 670 } 671 /* do we need to zero y? */ 672 ierr = VecZeroEntries(y);CHKERRQ(ierr); 673 if (!(osm->type & PC_GASM_INTERPOLATE)) { 674 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 675 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 676 } else { 677 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 678 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 679 } 680 if (osm->pctoouter) { 681 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 682 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 683 } 684 PetscFunctionReturn(0); 685 } 686 687 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout) 688 { 689 PC_GASM *osm = (PC_GASM*)pc->data; 690 Mat X,Y,O=NULL,Z,W; 691 Vec x,y; 692 PetscInt i,m,M,N; 693 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 694 PetscErrorCode ierr; 695 696 PetscFunctionBegin; 697 if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 698 ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr); 699 if (osm->pctoouter) { 700 ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr); 701 ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr); 702 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr); 703 for (i = 0; i < N; ++i) { 704 ierr = MatDenseGetColumnVecRead(Xin,i,&x); 705 ierr = MatDenseGetColumnVecWrite(O,i,&y); 706 ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 707 ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 708 ierr = MatDenseRestoreColumnVecWrite(O,i,&y); 709 ierr = MatDenseRestoreColumnVecRead(Xin,i,&x); 710 } 711 X = Y = O; 712 } else { 713 X = Xin; 714 Y = Yout; 715 } 716 /* 717 support for limiting the restriction or interpolation only to the inner 718 subdomain values (leaving the other values 0). 719 */ 720 ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr); 721 ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr); 722 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr); 723 for (i = 0; i < N; ++i) { 724 ierr = MatDenseGetColumnVecRead(X,i,&x); 725 ierr = MatDenseGetColumnVecWrite(Z,i,&y); 726 if (!(osm->type & PC_GASM_RESTRICT)) { 727 /* have to zero the work RHS since scatter may leave some slots empty */ 728 ierr = VecZeroEntries(y);CHKERRQ(ierr); 729 ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 730 ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 731 } else { 732 ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 733 ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 734 } 735 ierr = MatDenseRestoreColumnVecWrite(Z,i,&y); 736 ierr = MatDenseRestoreColumnVecRead(X,i,&x); 737 } 738 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr); 739 ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 740 ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 741 ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 742 /* do the subdomain solve */ 743 ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr); 744 ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr); 745 ierr = MatDestroy(&Z);CHKERRQ(ierr); 746 /* do we need to zero y? */ 747 ierr = MatZeroEntries(Y);CHKERRQ(ierr); 748 for (i = 0; i < N; ++i) { 749 ierr = MatDenseGetColumnVecWrite(Y,i,&y); 750 ierr = MatDenseGetColumnVecRead(W,i,&x); 751 if (!(osm->type & PC_GASM_INTERPOLATE)) { 752 ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 753 ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 754 } else { 755 ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 756 ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 757 } 758 ierr = MatDenseRestoreColumnVecRead(W,i,&x); 759 if (osm->pctoouter) { 760 ierr = MatDenseGetColumnVecWrite(Yout,i,&x); 761 ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 762 ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763 ierr = MatDenseRestoreColumnVecRead(Yout,i,&x); 764 } 765 ierr = MatDenseRestoreColumnVecWrite(Y,i,&y); 766 } 767 ierr = MatDestroy(&W);CHKERRQ(ierr); 768 ierr = MatDestroy(&O);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout) 773 { 774 PC_GASM *osm = (PC_GASM*)pc->data; 775 PetscErrorCode ierr; 776 PetscInt i; 777 Vec x,y; 778 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 779 780 PetscFunctionBegin; 781 if (osm->pctoouter){ 782 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 784 x = osm->pcx; 785 y = osm->pcy; 786 }else{ 787 x = xin; 788 y = yout; 789 } 790 /* 791 Support for limiting the restriction or interpolation to only local 792 subdomain values (leaving the other values 0). 793 794 Note: these are reversed from the PCApply_GASM() because we are applying the 795 transpose of the three terms 796 */ 797 if (!(osm->type & PC_GASM_INTERPOLATE)) { 798 /* have to zero the work RHS since scatter may leave some slots empty */ 799 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 800 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 801 } else { 802 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 803 } 804 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 805 if (!(osm->type & PC_GASM_INTERPOLATE)) { 806 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 807 } else { 808 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 809 } 810 /* do the local solves */ 811 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 812 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 813 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr); 814 } 815 ierr = VecZeroEntries(y);CHKERRQ(ierr); 816 if (!(osm->type & PC_GASM_RESTRICT)) { 817 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 818 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 819 } else { 820 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 821 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 822 } 823 if (osm->pctoouter){ 824 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 } 827 PetscFunctionReturn(0); 828 } 829 830 static PetscErrorCode PCReset_GASM(PC pc) 831 { 832 PC_GASM *osm = (PC_GASM*)pc->data; 833 PetscErrorCode ierr; 834 PetscInt i; 835 836 PetscFunctionBegin; 837 if (osm->ksp) { 838 for (i=0; i<osm->n; i++) { 839 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 840 } 841 } 842 if (osm->pmat) { 843 if (osm->n > 0) { 844 PetscMPIInt size; 845 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 846 if (size > 1) { 847 /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */ 848 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 849 } else { 850 ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 851 } 852 } 853 } 854 if (osm->x) { 855 for (i=0; i<osm->n; i++) { 856 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 857 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 858 } 859 } 860 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 861 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 862 863 ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr); 864 ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr); 865 if (!osm->user_subdomains) { 866 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr); 867 osm->N = PETSC_DETERMINE; 868 osm->nmax = PETSC_DETERMINE; 869 } 870 if (osm->pctoouter){ 871 ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr); 872 } 873 if (osm->permutationIS){ 874 ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr); 875 } 876 if (osm->pcx){ 877 ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr); 878 } 879 if (osm->pcy){ 880 ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr); 881 } 882 if (osm->permutationP){ 883 ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr); 884 } 885 if (osm->pcmat){ 886 ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr); 887 } 888 PetscFunctionReturn(0); 889 } 890 891 static PetscErrorCode PCDestroy_GASM(PC pc) 892 { 893 PC_GASM *osm = (PC_GASM*)pc->data; 894 PetscErrorCode ierr; 895 PetscInt i; 896 897 PetscFunctionBegin; 898 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 899 /* PCReset will not destroy subdomains, if user_subdomains is true. */ 900 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr); 901 if (osm->ksp) { 902 for (i=0; i<osm->n; i++) { 903 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 904 } 905 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 906 } 907 ierr = PetscFree(osm->x);CHKERRQ(ierr); 908 ierr = PetscFree(osm->y);CHKERRQ(ierr); 909 ierr = PetscFree(pc->data);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc) 914 { 915 PC_GASM *osm = (PC_GASM*)pc->data; 916 PetscErrorCode ierr; 917 PetscInt blocks,ovl; 918 PetscBool flg; 919 PCGASMType gasmtype; 920 921 PetscFunctionBegin; 922 ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr); 923 ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr); 924 ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr); 925 if (flg) { 926 ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); 927 } 928 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 929 if (flg) { 930 ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); 931 osm->dm_subdomains = PETSC_FALSE; 932 } 933 flg = PETSC_FALSE; 934 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 935 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);} 936 ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr); 937 ierr = PetscOptionsTail();CHKERRQ(ierr); 938 PetscFunctionReturn(0); 939 } 940 941 /*------------------------------------------------------------------------------------*/ 942 943 /*@ 944 PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the 945 communicator. 946 Logically collective on pc 947 948 Input Parameters: 949 + pc - the preconditioner 950 - N - total number of subdomains 951 952 953 Level: beginner 954 955 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap() 956 PCGASMCreateSubdomains2D() 957 @*/ 958 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 959 { 960 PC_GASM *osm = (PC_GASM*)pc->data; 961 PetscMPIInt size,rank; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N); 966 if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp()."); 967 968 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 969 osm->ois = osm->iis = NULL; 970 971 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 972 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 973 osm->N = N; 974 osm->n = PETSC_DETERMINE; 975 osm->nmax = PETSC_DETERMINE; 976 osm->dm_subdomains = PETSC_FALSE; 977 PetscFunctionReturn(0); 978 } 979 980 static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[]) 981 { 982 PC_GASM *osm = (PC_GASM*)pc->data; 983 PetscErrorCode ierr; 984 PetscInt i; 985 986 PetscFunctionBegin; 987 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n); 988 if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp()."); 989 990 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 991 osm->iis = osm->ois = NULL; 992 osm->n = n; 993 osm->N = PETSC_DETERMINE; 994 osm->nmax = PETSC_DETERMINE; 995 if (ois) { 996 ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr); 997 for (i=0; i<n; i++) { 998 ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr); 999 osm->ois[i] = ois[i]; 1000 } 1001 /* 1002 Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(), 1003 it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1. 1004 */ 1005 osm->overlap = -1; 1006 /* inner subdomains must be provided */ 1007 if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n"); 1008 }/* end if */ 1009 if (iis) { 1010 ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr); 1011 for (i=0; i<n; i++) { 1012 ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr); 1013 osm->iis[i] = iis[i]; 1014 } 1015 if (!ois) { 1016 osm->ois = NULL; 1017 /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */ 1018 } 1019 } 1020 if (PetscDefined(USE_DEBUG)) { 1021 PetscInt j,rstart,rend,*covered,lsize; 1022 const PetscInt *indices; 1023 /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */ 1024 ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr); 1025 ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr); 1026 /* check if the current processor owns indices from others */ 1027 for (i=0; i<n; i++) { 1028 ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr); 1029 ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr); 1030 for (j=0; j<lsize; j++) { 1031 if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]); 1032 else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]); 1033 else covered[indices[j]-rstart] = 1; 1034 } 1035 ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr); 1036 } 1037 /* check if we miss any indices */ 1038 for (i=rstart; i<rend; i++) { 1039 if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i); 1040 } 1041 ierr = PetscFree(covered);CHKERRQ(ierr); 1042 } 1043 if (iis) osm->user_subdomains = PETSC_TRUE; 1044 PetscFunctionReturn(0); 1045 } 1046 1047 static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 1048 { 1049 PC_GASM *osm = (PC_GASM*)pc->data; 1050 1051 PetscFunctionBegin; 1052 if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 1053 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 1054 if (!pc->setupcalled) osm->overlap = ovl; 1055 PetscFunctionReturn(0); 1056 } 1057 1058 static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 1059 { 1060 PC_GASM *osm = (PC_GASM*)pc->data; 1061 1062 PetscFunctionBegin; 1063 osm->type = type; 1064 osm->type_set = PETSC_TRUE; 1065 PetscFunctionReturn(0); 1066 } 1067 1068 static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 1069 { 1070 PC_GASM *osm = (PC_GASM*)pc->data; 1071 1072 PetscFunctionBegin; 1073 osm->sort_indices = doSort; 1074 PetscFunctionReturn(0); 1075 } 1076 1077 /* 1078 FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed. 1079 In particular, it would upset the global subdomain number calculation. 1080 */ 1081 static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 1082 { 1083 PC_GASM *osm = (PC_GASM*)pc->data; 1084 PetscErrorCode ierr; 1085 1086 PetscFunctionBegin; 1087 if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 1088 1089 if (n) *n = osm->n; 1090 if (first) { 1091 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1092 *first -= osm->n; 1093 } 1094 if (ksp) { 1095 /* Assume that local solves are now different; not necessarily 1096 true, though! This flag is used only for PCView_GASM() */ 1097 *ksp = osm->ksp; 1098 osm->same_subdomain_solvers = PETSC_FALSE; 1099 } 1100 PetscFunctionReturn(0); 1101 } /* PCGASMGetSubKSP_GASM() */ 1102 1103 /*@C 1104 PCGASMSetSubdomains - Sets the subdomains for this processor 1105 for the additive Schwarz preconditioner. 1106 1107 Collective on pc 1108 1109 Input Parameters: 1110 + pc - the preconditioner object 1111 . n - the number of subdomains for this processor 1112 . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains) 1113 - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap) 1114 1115 Notes: 1116 The IS indices use the parallel, global numbering of the vector entries. 1117 Inner subdomains are those where the correction is applied. 1118 Outer subdomains are those where the residual necessary to obtain the 1119 corrections is obtained (see PCGASMType for the use of inner/outer subdomains). 1120 Both inner and outer subdomains can extend over several processors. 1121 This processor's portion of a subdomain is known as a local subdomain. 1122 1123 Inner subdomains can not overlap with each other, do not have any entities from remote processors, 1124 and have to cover the entire local subdomain owned by the current processor. The index sets on each 1125 process should be ordered such that the ith local subdomain is connected to the ith remote subdomain 1126 on another MPI process. 1127 1128 By default the GASM preconditioner uses 1 (local) subdomain per processor. 1129 1130 1131 Level: advanced 1132 1133 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1134 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 1135 @*/ 1136 PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[]) 1137 { 1138 PC_GASM *osm = (PC_GASM*)pc->data; 1139 PetscErrorCode ierr; 1140 1141 PetscFunctionBegin; 1142 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1143 ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr); 1144 osm->dm_subdomains = PETSC_FALSE; 1145 PetscFunctionReturn(0); 1146 } 1147 1148 /*@ 1149 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 1150 additive Schwarz preconditioner. Either all or no processors in the 1151 pc communicator must call this routine. 1152 1153 Logically Collective on pc 1154 1155 Input Parameters: 1156 + pc - the preconditioner context 1157 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0) 1158 1159 Options Database Key: 1160 . -pc_gasm_overlap <overlap> - Sets overlap 1161 1162 Notes: 1163 By default the GASM preconditioner uses 1 subdomain per processor. To use 1164 multiple subdomain per perocessor or "straddling" subdomains that intersect 1165 multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>). 1166 1167 The overlap defaults to 0, so if one desires that no additional 1168 overlap be computed beyond what may have been set with a call to 1169 PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does 1170 not explicitly set the subdomains in application code, then all overlap would be computed 1171 internally by PETSc, and using an overlap of 0 would result in an GASM 1172 variant that is equivalent to the block Jacobi preconditioner. 1173 1174 Note that one can define initial index sets with any overlap via 1175 PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows 1176 PETSc to extend that overlap further, if desired. 1177 1178 Level: intermediate 1179 1180 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1181 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 1182 @*/ 1183 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 1184 { 1185 PetscErrorCode ierr; 1186 PC_GASM *osm = (PC_GASM*)pc->data; 1187 1188 PetscFunctionBegin; 1189 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1190 PetscValidLogicalCollectiveInt(pc,ovl,2); 1191 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 1192 osm->dm_subdomains = PETSC_FALSE; 1193 PetscFunctionReturn(0); 1194 } 1195 1196 /*@ 1197 PCGASMSetType - Sets the type of restriction and interpolation used 1198 for local problems in the additive Schwarz method. 1199 1200 Logically Collective on PC 1201 1202 Input Parameters: 1203 + pc - the preconditioner context 1204 - type - variant of GASM, one of 1205 .vb 1206 PC_GASM_BASIC - full interpolation and restriction 1207 PC_GASM_RESTRICT - full restriction, local processor interpolation 1208 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 1209 PC_GASM_NONE - local processor restriction and interpolation 1210 .ve 1211 1212 Options Database Key: 1213 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1214 1215 Level: intermediate 1216 1217 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1218 PCGASMCreateSubdomains2D() 1219 @*/ 1220 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 1221 { 1222 PetscErrorCode ierr; 1223 1224 PetscFunctionBegin; 1225 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1226 PetscValidLogicalCollectiveEnum(pc,type,2); 1227 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 /*@ 1232 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 1233 1234 Logically Collective on PC 1235 1236 Input Parameters: 1237 + pc - the preconditioner context 1238 - doSort - sort the subdomain indices 1239 1240 Level: intermediate 1241 1242 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1243 PCGASMCreateSubdomains2D() 1244 @*/ 1245 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 1246 { 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1251 PetscValidLogicalCollectiveBool(pc,doSort,2); 1252 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 /*@C 1257 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 1258 this processor. 1259 1260 Collective on PC iff first_local is requested 1261 1262 Input Parameter: 1263 . pc - the preconditioner context 1264 1265 Output Parameters: 1266 + n_local - the number of blocks on this processor or NULL 1267 . first_local - the global number of the first block on this processor or NULL, 1268 all processors must request or all must pass NULL 1269 - ksp - the array of KSP contexts 1270 1271 Note: 1272 After PCGASMGetSubKSP() the array of KSPes is not to be freed 1273 1274 Currently for some matrix implementations only 1 block per processor 1275 is supported. 1276 1277 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 1278 1279 Level: advanced 1280 1281 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(), 1282 PCGASMCreateSubdomains2D(), 1283 @*/ 1284 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 1285 { 1286 PetscErrorCode ierr; 1287 1288 PetscFunctionBegin; 1289 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1290 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 /* -------------------------------------------------------------------------------------*/ 1295 /*MC 1296 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1297 its own KSP object. 1298 1299 Options Database Keys: 1300 + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors 1301 . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view 1302 . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp() 1303 . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains 1304 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1305 1306 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1307 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1308 -pc_gasm_type basic to use the standard GASM. 1309 1310 Notes: 1311 Blocks can be shared by multiple processes. 1312 1313 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1314 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1315 1316 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1317 and set the options directly on the resulting KSP object (you can access its PC 1318 with KSPGetPC()) 1319 1320 1321 Level: beginner 1322 1323 References: 1324 + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions 1325 Courant Institute, New York University Technical report 1326 - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1327 Cambridge University Press. 1328 1329 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1330 PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(), 1331 PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType() 1332 1333 M*/ 1334 1335 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc) 1336 { 1337 PetscErrorCode ierr; 1338 PC_GASM *osm; 1339 1340 PetscFunctionBegin; 1341 ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr); 1342 1343 osm->N = PETSC_DETERMINE; 1344 osm->n = PETSC_DECIDE; 1345 osm->nmax = PETSC_DETERMINE; 1346 osm->overlap = 0; 1347 osm->ksp = NULL; 1348 osm->gorestriction = NULL; 1349 osm->girestriction = NULL; 1350 osm->pctoouter = NULL; 1351 osm->gx = NULL; 1352 osm->gy = NULL; 1353 osm->x = NULL; 1354 osm->y = NULL; 1355 osm->pcx = NULL; 1356 osm->pcy = NULL; 1357 osm->permutationIS = NULL; 1358 osm->permutationP = NULL; 1359 osm->pcmat = NULL; 1360 osm->ois = NULL; 1361 osm->iis = NULL; 1362 osm->pmat = NULL; 1363 osm->type = PC_GASM_RESTRICT; 1364 osm->same_subdomain_solvers = PETSC_TRUE; 1365 osm->sort_indices = PETSC_TRUE; 1366 osm->dm_subdomains = PETSC_FALSE; 1367 osm->hierarchicalpartitioning = PETSC_FALSE; 1368 1369 pc->data = (void*)osm; 1370 pc->ops->apply = PCApply_GASM; 1371 pc->ops->matapply = PCMatApply_GASM; 1372 pc->ops->applytranspose = PCApplyTranspose_GASM; 1373 pc->ops->setup = PCSetUp_GASM; 1374 pc->ops->reset = PCReset_GASM; 1375 pc->ops->destroy = PCDestroy_GASM; 1376 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1377 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1378 pc->ops->view = PCView_GASM; 1379 pc->ops->applyrichardson = NULL; 1380 1381 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr); 1382 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1383 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr); 1384 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1385 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1386 PetscFunctionReturn(0); 1387 } 1388 1389 PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]) 1390 { 1391 MatPartitioning mpart; 1392 const char *prefix; 1393 PetscInt i,j,rstart,rend,bs; 1394 PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1395 Mat Ad = NULL, adj; 1396 IS ispart,isnumb,*is; 1397 PetscErrorCode ierr; 1398 1399 PetscFunctionBegin; 1400 if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc); 1401 1402 /* Get prefix, row distribution, and block size */ 1403 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1404 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1405 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1406 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1407 1408 /* Get diagonal block from matrix if possible */ 1409 ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1410 if (hasop) { 1411 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1412 } 1413 if (Ad) { 1414 ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1415 if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1416 } 1417 if (Ad && nloc > 1) { 1418 PetscBool match,done; 1419 /* Try to setup a good matrix partitioning if available */ 1420 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1421 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1422 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1423 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1424 if (!match) { 1425 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1426 } 1427 if (!match) { /* assume a "good" partitioner is available */ 1428 PetscInt na; 1429 const PetscInt *ia,*ja; 1430 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1431 if (done) { 1432 /* Build adjacency matrix by hand. Unfortunately a call to 1433 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1434 remove the block-aij structure and we cannot expect 1435 MatPartitioning to split vertices as we need */ 1436 PetscInt i,j,len,nnz,cnt,*iia=NULL,*jja=NULL; 1437 const PetscInt *row; 1438 nnz = 0; 1439 for (i=0; i<na; i++) { /* count number of nonzeros */ 1440 len = ia[i+1] - ia[i]; 1441 row = ja + ia[i]; 1442 for (j=0; j<len; j++) { 1443 if (row[j] == i) { /* don't count diagonal */ 1444 len--; break; 1445 } 1446 } 1447 nnz += len; 1448 } 1449 ierr = PetscMalloc1(na+1,&iia);CHKERRQ(ierr); 1450 ierr = PetscMalloc1(nnz,&jja);CHKERRQ(ierr); 1451 nnz = 0; 1452 iia[0] = 0; 1453 for (i=0; i<na; i++) { /* fill adjacency */ 1454 cnt = 0; 1455 len = ia[i+1] - ia[i]; 1456 row = ja + ia[i]; 1457 for (j=0; j<len; j++) { 1458 if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */ 1459 } 1460 nnz += cnt; 1461 iia[i+1] = nnz; 1462 } 1463 /* Partitioning of the adjacency matrix */ 1464 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr); 1465 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1466 ierr = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr); 1467 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1468 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1469 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1470 foundpart = PETSC_TRUE; 1471 } 1472 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1473 } 1474 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1475 } 1476 ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr); 1477 if (!foundpart) { 1478 1479 /* Partitioning by contiguous chunks of rows */ 1480 1481 PetscInt mbs = (rend-rstart)/bs; 1482 PetscInt start = rstart; 1483 for (i=0; i<nloc; i++) { 1484 PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs; 1485 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1486 start += count; 1487 } 1488 1489 } else { 1490 1491 /* Partitioning by adjacency of diagonal block */ 1492 1493 const PetscInt *numbering; 1494 PetscInt *count,nidx,*indices,*newidx,start=0; 1495 /* Get node count in each partition */ 1496 ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr); 1497 ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr); 1498 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1499 for (i=0; i<nloc; i++) count[i] *= bs; 1500 } 1501 /* Build indices from node numbering */ 1502 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1503 ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr); 1504 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1505 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1506 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1507 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1508 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1509 ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr); 1510 for (i=0; i<nidx; i++) { 1511 for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j; 1512 } 1513 ierr = PetscFree(indices);CHKERRQ(ierr); 1514 nidx *= bs; 1515 indices = newidx; 1516 } 1517 /* Shift to get global indices */ 1518 for (i=0; i<nidx; i++) indices[i] += rstart; 1519 1520 /* Build the index sets for each block */ 1521 for (i=0; i<nloc; i++) { 1522 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1523 ierr = ISSort(is[i]);CHKERRQ(ierr); 1524 start += count[i]; 1525 } 1526 1527 ierr = PetscFree(count);CHKERRQ(ierr); 1528 ierr = PetscFree(indices);CHKERRQ(ierr); 1529 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1530 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1531 } 1532 *iis = is; 1533 PetscFunctionReturn(0); 1534 } 1535 1536 PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[]) 1537 { 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 /*@C 1546 PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive 1547 Schwarz preconditioner for a any problem based on its matrix. 1548 1549 Collective 1550 1551 Input Parameters: 1552 + A - The global matrix operator 1553 - N - the number of global subdomains requested 1554 1555 Output Parameters: 1556 + n - the number of subdomains created on this processor 1557 - iis - the array of index sets defining the local inner subdomains (on which the correction is applied) 1558 1559 Level: advanced 1560 1561 Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor. 1562 When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local. 1563 The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping 1564 outer subdomains will be automatically generated from these according to the requested amount of 1565 overlap; this is currently supported only with local subdomains. 1566 1567 1568 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains() 1569 @*/ 1570 PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[]) 1571 { 1572 PetscMPIInt size; 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1577 PetscValidPointer(iis,4); 1578 1579 if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N); 1580 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 1581 if (N >= size) { 1582 *n = N/size + (N%size); 1583 ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr); 1584 } else { 1585 ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 /*@C 1591 PCGASMDestroySubdomains - Destroys the index sets created with 1592 PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be 1593 called after setting subdomains with PCGASMSetSubdomains(). 1594 1595 Collective 1596 1597 Input Parameters: 1598 + n - the number of index sets 1599 . iis - the array of inner subdomains, 1600 - ois - the array of outer subdomains, can be NULL 1601 1602 Level: intermediate 1603 1604 Notes: 1605 this is merely a convenience subroutine that walks each list, 1606 destroys each IS on the list, and then frees the list. At the end the 1607 list pointers are set to NULL. 1608 1609 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains() 1610 @*/ 1611 PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois) 1612 { 1613 PetscInt i; 1614 PetscErrorCode ierr; 1615 1616 PetscFunctionBegin; 1617 if (n <= 0) PetscFunctionReturn(0); 1618 if (ois) { 1619 PetscValidPointer(ois,3); 1620 if (*ois) { 1621 PetscValidPointer(*ois,3); 1622 for (i=0; i<n; i++) { 1623 ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr); 1624 } 1625 ierr = PetscFree((*ois));CHKERRQ(ierr); 1626 } 1627 } 1628 if (iis) { 1629 PetscValidPointer(iis,2); 1630 if (*iis) { 1631 PetscValidPointer(*iis,2); 1632 for (i=0; i<n; i++) { 1633 ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr); 1634 } 1635 ierr = PetscFree((*iis));CHKERRQ(ierr); 1636 } 1637 } 1638 PetscFunctionReturn(0); 1639 } 1640 1641 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1642 { \ 1643 PetscInt first_row = first/M, last_row = last/M+1; \ 1644 /* \ 1645 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1646 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1647 subdomain). \ 1648 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1649 of the intersection. \ 1650 */ \ 1651 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1652 *ylow_loc = PetscMax(first_row,ylow); \ 1653 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1654 *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \ 1655 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1656 *yhigh_loc = PetscMin(last_row,yhigh); \ 1657 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1658 *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \ 1659 /* Now compute the size of the local subdomain n. */ \ 1660 *n = 0; \ 1661 if (*ylow_loc < *yhigh_loc) { \ 1662 PetscInt width = xright-xleft; \ 1663 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1664 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1665 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1666 } \ 1667 } 1668 1669 /*@ 1670 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1671 preconditioner for a two-dimensional problem on a regular grid. 1672 1673 Collective 1674 1675 Input Parameters: 1676 + M, N - the global number of grid points in the x and y directions 1677 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1678 . dof - degrees of freedom per node 1679 - overlap - overlap in mesh lines 1680 1681 Output Parameters: 1682 + Nsub - the number of local subdomains created 1683 . iis - array of index sets defining inner (nonoverlapping) subdomains 1684 - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains 1685 1686 1687 Level: advanced 1688 1689 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap() 1690 @*/ 1691 PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois) 1692 { 1693 PetscErrorCode ierr; 1694 PetscMPIInt size, rank; 1695 PetscInt i, j; 1696 PetscInt maxheight, maxwidth; 1697 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1698 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1699 PetscInt x[2][2], y[2][2], n[2]; 1700 PetscInt first, last; 1701 PetscInt nidx, *idx; 1702 PetscInt ii,jj,s,q,d; 1703 PetscInt k,kk; 1704 PetscMPIInt color; 1705 MPI_Comm comm, subcomm; 1706 IS **xis = NULL, **is = ois, **is_local = iis; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1710 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 1711 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 1712 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1713 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1714 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1715 1716 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1717 s = 0; 1718 ystart = 0; 1719 for (j=0; j<Ndomains; ++j) { 1720 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1721 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1722 /* Vertical domain limits with an overlap. */ 1723 ylow = PetscMax(ystart - overlap,0); 1724 yhigh = PetscMin(ystart + maxheight + overlap,N); 1725 xstart = 0; 1726 for (i=0; i<Mdomains; ++i) { 1727 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1728 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1729 /* Horizontal domain limits with an overlap. */ 1730 xleft = PetscMax(xstart - overlap,0); 1731 xright = PetscMin(xstart + maxwidth + overlap,M); 1732 /* 1733 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1734 */ 1735 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1736 if (nidx) ++s; 1737 xstart += maxwidth; 1738 } /* for (i = 0; i < Mdomains; ++i) */ 1739 ystart += maxheight; 1740 } /* for (j = 0; j < Ndomains; ++j) */ 1741 1742 /* Now we can allocate the necessary number of ISs. */ 1743 *nsub = s; 1744 ierr = PetscMalloc1(*nsub,is);CHKERRQ(ierr); 1745 ierr = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr); 1746 s = 0; 1747 ystart = 0; 1748 for (j=0; j<Ndomains; ++j) { 1749 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1750 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1751 /* Vertical domain limits with an overlap. */ 1752 y[0][0] = PetscMax(ystart - overlap,0); 1753 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1754 /* Vertical domain limits without an overlap. */ 1755 y[1][0] = ystart; 1756 y[1][1] = PetscMin(ystart + maxheight,N); 1757 xstart = 0; 1758 for (i=0; i<Mdomains; ++i) { 1759 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1760 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1761 /* Horizontal domain limits with an overlap. */ 1762 x[0][0] = PetscMax(xstart - overlap,0); 1763 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1764 /* Horizontal domain limits without an overlap. */ 1765 x[1][0] = xstart; 1766 x[1][1] = PetscMin(xstart+maxwidth,M); 1767 /* 1768 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1769 Do this twice: first for the domains with overlaps, and once without. 1770 During the first pass create the subcommunicators, and use them on the second pass as well. 1771 */ 1772 for (q = 0; q < 2; ++q) { 1773 PetscBool split = PETSC_FALSE; 1774 /* 1775 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1776 according to whether the domain with an overlap or without is considered. 1777 */ 1778 xleft = x[q][0]; xright = x[q][1]; 1779 ylow = y[q][0]; yhigh = y[q][1]; 1780 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1781 nidx *= dof; 1782 n[q] = nidx; 1783 /* 1784 Based on the counted number of indices in the local domain *with an overlap*, 1785 construct a subcommunicator of all the processors supporting this domain. 1786 Observe that a domain with an overlap might have nontrivial local support, 1787 while the domain without an overlap might not. Hence, the decision to participate 1788 in the subcommunicator must be based on the domain with an overlap. 1789 */ 1790 if (q == 0) { 1791 if (nidx) color = 1; 1792 else color = MPI_UNDEFINED; 1793 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRMPI(ierr); 1794 split = PETSC_TRUE; 1795 } 1796 /* 1797 Proceed only if the number of local indices *with an overlap* is nonzero. 1798 */ 1799 if (n[0]) { 1800 if (q == 0) xis = is; 1801 if (q == 1) { 1802 /* 1803 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1804 Moreover, if the overlap is zero, the two ISs are identical. 1805 */ 1806 if (overlap == 0) { 1807 (*is_local)[s] = (*is)[s]; 1808 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1809 continue; 1810 } else { 1811 xis = is_local; 1812 subcomm = ((PetscObject)(*is)[s])->comm; 1813 } 1814 } /* if (q == 1) */ 1815 idx = NULL; 1816 ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr); 1817 if (nidx) { 1818 k = 0; 1819 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1820 PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft; 1821 PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright; 1822 kk = dof*(M*jj + x0); 1823 for (ii=x0; ii<x1; ++ii) { 1824 for (d = 0; d < dof; ++d) { 1825 idx[k++] = kk++; 1826 } 1827 } 1828 } 1829 } 1830 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr); 1831 if (split) { 1832 ierr = MPI_Comm_free(&subcomm);CHKERRMPI(ierr); 1833 } 1834 }/* if (n[0]) */ 1835 }/* for (q = 0; q < 2; ++q) */ 1836 if (n[0]) ++s; 1837 xstart += maxwidth; 1838 } /* for (i = 0; i < Mdomains; ++i) */ 1839 ystart += maxheight; 1840 } /* for (j = 0; j < Ndomains; ++j) */ 1841 PetscFunctionReturn(0); 1842 } 1843 1844 /*@C 1845 PCGASMGetSubdomains - Gets the subdomains supported on this processor 1846 for the additive Schwarz preconditioner. 1847 1848 Not Collective 1849 1850 Input Parameter: 1851 . pc - the preconditioner context 1852 1853 Output Parameters: 1854 + n - the number of subdomains for this processor (default value = 1) 1855 . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL) 1856 - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL) 1857 1858 1859 Notes: 1860 The user is responsible for destroying the ISs and freeing the returned arrays. 1861 The IS numbering is in the parallel, global numbering of the vector. 1862 1863 Level: advanced 1864 1865 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(), 1866 PCGASMSetSubdomains(), PCGASMGetSubmatrices() 1867 @*/ 1868 PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[]) 1869 { 1870 PC_GASM *osm; 1871 PetscErrorCode ierr; 1872 PetscBool match; 1873 PetscInt i; 1874 1875 PetscFunctionBegin; 1876 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1877 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1878 if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1879 osm = (PC_GASM*)pc->data; 1880 if (n) *n = osm->n; 1881 if (iis) { 1882 ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr); 1883 } 1884 if (ois) { 1885 ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr); 1886 } 1887 if (iis || ois) { 1888 for (i = 0; i < osm->n; ++i) { 1889 if (iis) (*iis)[i] = osm->iis[i]; 1890 if (ois) (*ois)[i] = osm->ois[i]; 1891 } 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 /*@C 1897 PCGASMGetSubmatrices - Gets the local submatrices (for this processor 1898 only) for the additive Schwarz preconditioner. 1899 1900 Not Collective 1901 1902 Input Parameter: 1903 . pc - the preconditioner context 1904 1905 Output Parameters: 1906 + n - the number of matrices for this processor (default value = 1) 1907 - mat - the matrices 1908 1909 Notes: 1910 matrices returned by this routine have the same communicators as the index sets (IS) 1911 used to define subdomains in PCGASMSetSubdomains() 1912 Level: advanced 1913 1914 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), 1915 PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains() 1916 @*/ 1917 PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1918 { 1919 PC_GASM *osm; 1920 PetscErrorCode ierr; 1921 PetscBool match; 1922 1923 PetscFunctionBegin; 1924 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1925 PetscValidIntPointer(n,2); 1926 if (mat) PetscValidPointer(mat,3); 1927 if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp()."); 1928 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1929 if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1930 osm = (PC_GASM*)pc->data; 1931 if (n) *n = osm->n; 1932 if (mat) *mat = osm->pmat; 1933 PetscFunctionReturn(0); 1934 } 1935 1936 /*@ 1937 PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1938 Logically Collective 1939 1940 Input Parameter: 1941 + pc - the preconditioner 1942 - flg - boolean indicating whether to use subdomains defined by the DM 1943 1944 Options Database Key: 1945 . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains 1946 1947 Level: intermediate 1948 1949 Notes: 1950 PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(), 1951 so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap() 1952 automatically turns the latter off. 1953 1954 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap() 1955 PCGASMCreateSubdomains2D() 1956 @*/ 1957 PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg) 1958 { 1959 PC_GASM *osm = (PC_GASM*)pc->data; 1960 PetscErrorCode ierr; 1961 PetscBool match; 1962 1963 PetscFunctionBegin; 1964 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1965 PetscValidLogicalCollectiveBool(pc,flg,2); 1966 if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC."); 1967 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1968 if (match) { 1969 if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) { 1970 osm->dm_subdomains = flg; 1971 } 1972 } 1973 PetscFunctionReturn(0); 1974 } 1975 1976 /*@ 1977 PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1978 Not Collective 1979 1980 Input Parameter: 1981 . pc - the preconditioner 1982 1983 Output Parameter: 1984 . flg - boolean indicating whether to use subdomains defined by the DM 1985 1986 Level: intermediate 1987 1988 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap() 1989 PCGASMCreateSubdomains2D() 1990 @*/ 1991 PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg) 1992 { 1993 PC_GASM *osm = (PC_GASM*)pc->data; 1994 PetscErrorCode ierr; 1995 PetscBool match; 1996 1997 PetscFunctionBegin; 1998 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1999 PetscValidBoolPointer(flg,2); 2000 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 2001 if (match) { 2002 if (flg) *flg = osm->dm_subdomains; 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006