1 /* 2 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 3 In this version each processor may intersect multiple subdomains and any subdomain may 4 intersect multiple processors. Intersections of subdomains with processors are called *local 5 subdomains*. 6 7 N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM()) 8 n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains()) 9 nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM()) 10 */ 11 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 12 #include <petscdm.h> 13 14 typedef struct { 15 PetscInt N,n,nmax; 16 PetscInt overlap; /* overlap requested by user */ 17 PCGASMType type; /* use reduced interpolation, restriction or both */ 18 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 19 PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */ 20 PetscBool sort_indices; /* flag to sort subdomain indices */ 21 PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */ 22 PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */ 23 PetscBool hierarchicalpartitioning; 24 IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */ 25 IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */ 26 KSP *ksp; /* linear solvers for each subdomain */ 27 Mat *pmat; /* subdomain block matrices */ 28 Vec gx,gy; /* Merged work vectors */ 29 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 30 VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */ 31 VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */ 32 VecScatter pctoouter; 33 IS permutationIS; 34 Mat permutationP; 35 Mat pcmat; 36 Vec pcx,pcy; 37 } PC_GASM; 38 39 static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation) 40 { 41 PC_GASM *osm = (PC_GASM*)pc->data; 42 PetscInt i; 43 PetscErrorCode ierr; 44 45 PetscFunctionBegin; 46 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */ 47 ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr); 48 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr); 49 for (i = 0; i < osm->n; ++i) (*permutation)[i] = i; 50 ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer) 55 { 56 PC_GASM *osm = (PC_GASM*)pc->data; 57 PetscInt j,nidx; 58 const PetscInt *idx; 59 PetscViewer sviewer; 60 char *cidx; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n); 65 /* Inner subdomains. */ 66 ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr); 67 /* 68 No more than 15 characters per index plus a space. 69 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 70 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 71 For nidx == 0, the whole string 16 '\0'. 72 */ 73 #define len 16*(nidx+1)+1 74 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr); 75 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr); 76 #undef len 77 ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr); 78 for (j = 0; j < nidx; ++j) { 79 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 80 } 81 ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr); 82 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 83 ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr); 84 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 85 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 86 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 87 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 88 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 89 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 90 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 91 ierr = PetscFree(cidx);CHKERRQ(ierr); 92 /* Outer subdomains. */ 93 ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr); 94 /* 95 No more than 15 characters per index plus a space. 96 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 97 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 98 For nidx == 0, the whole string 16 '\0'. 99 */ 100 #define len 16*(nidx+1)+1 101 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr); 102 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr); 103 #undef len 104 ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr); 105 for (j = 0; j < nidx; ++j) { 106 ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr); 107 } 108 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 109 ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr); 110 ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr); 111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 112 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 113 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 114 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 115 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 116 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 117 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 118 ierr = PetscFree(cidx);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 123 { 124 PC_GASM *osm = (PC_GASM*)pc->data; 125 const char *prefix; 126 char fname[PETSC_MAX_PATH_LEN+1]; 127 PetscInt l, d, count; 128 PetscBool doprint,found; 129 PetscViewer viewer, sviewer = NULL; 130 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */ 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 135 doprint = PETSC_FALSE; 136 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr); 137 if (!doprint) PetscFunctionReturn(0); 138 ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr); 139 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 140 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr); 141 /* 142 Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer: 143 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm. 144 */ 145 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr); 146 l = 0; 147 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr); 148 for (count = 0; count < osm->N; ++count) { 149 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 150 if (l<osm->n) { 151 d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 152 if (numbering[d] == count) { 153 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 154 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr); 155 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 156 ++l; 157 } 158 } 159 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 160 } 161 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr); 162 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 167 { 168 PC_GASM *osm = (PC_GASM*)pc->data; 169 const char *prefix; 170 PetscErrorCode ierr; 171 PetscMPIInt rank, size; 172 PetscInt bsz; 173 PetscBool iascii,view_subdomains=PETSC_FALSE; 174 PetscViewer sviewer; 175 PetscInt count, l; 176 char overlap[256] = "user-defined overlap"; 177 char gsubdomains[256] = "unknown total number of subdomains"; 178 char msubdomains[256] = "unknown max number of local subdomains"; 179 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */ 180 181 PetscFunctionBegin; 182 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRMPI(ierr); 183 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRMPI(ierr); 184 185 if (osm->overlap >= 0) { 186 ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr); 187 } 188 if (osm->N != PETSC_DETERMINE) { 189 ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr); 190 } 191 if (osm->nmax != PETSC_DETERMINE) { 192 ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr); 193 } 194 195 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 196 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr); 197 198 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 199 if (iascii) { 200 /* 201 Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer: 202 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed 203 collectively on the comm. 204 */ 205 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr); 206 ierr = PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 207 ierr = PetscViewerASCIIPrintf(viewer," %s\n",overlap);CHKERRQ(ierr); 208 ierr = PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);CHKERRQ(ierr); 209 ierr = PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);CHKERRQ(ierr); 210 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 211 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr); 212 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 213 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 214 /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */ 215 ierr = PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");CHKERRQ(ierr); 216 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 217 ierr = PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 218 /* Make sure that everybody waits for the banner to be printed. */ 219 ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 220 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 221 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr); 222 l = 0; 223 for (count = 0; count < osm->N; ++count) { 224 PetscMPIInt srank, ssize; 225 if (l<osm->n) { 226 PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 227 if (numbering[d] == count) { 228 ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRMPI(ierr); 229 ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRMPI(ierr); 230 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 231 ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr); 232 ierr = PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr); 233 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 234 if (view_subdomains) { 235 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr); 236 } 237 if (!pc->setupcalled) { 238 ierr = PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr); 239 } else { 240 ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr); 241 } 242 ierr = PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 243 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 244 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 245 ++l; 246 } 247 } 248 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 249 } 250 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr); 251 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 252 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 253 /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 254 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 255 } 256 PetscFunctionReturn(0); 257 } 258 259 PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]); 260 261 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc) 262 { 263 PC_GASM *osm = (PC_GASM*)pc->data; 264 MatPartitioning part; 265 MPI_Comm comm; 266 PetscMPIInt size; 267 PetscInt nlocalsubdomains,fromrows_localsize; 268 IS partitioning,fromrows,isn; 269 Vec outervec; 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 274 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 275 /* we do not need a hierarchical partitioning when 276 * the total number of subdomains is consistent with 277 * the number of MPI tasks. 278 * For the following cases, we do not need to use HP 279 * */ 280 if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0); 281 if (size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size); 282 nlocalsubdomains = size/osm->N; 283 osm->n = 1; 284 ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr); 285 ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr); 286 ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr); 287 ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr); 288 ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr); 289 ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr); 290 /* get new processor owner number of each vertex */ 291 ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr); 292 ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr); 293 ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr); 294 ierr = ISDestroy(&isn);CHKERRQ(ierr); 295 ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr); 296 ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr); 297 ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr); 298 ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr); 299 ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr); 300 ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr); 301 ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr); 302 ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr); 303 osm->permutationIS = fromrows; 304 osm->pcmat = pc->pmat; 305 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr); 306 pc->pmat = osm->permutationP; 307 ierr = VecDestroy(&outervec);CHKERRQ(ierr); 308 ierr = ISDestroy(&fromrows);CHKERRQ(ierr); 309 ierr = ISDestroy(&partitioning);CHKERRQ(ierr); 310 osm->n = PETSC_DETERMINE; 311 PetscFunctionReturn(0); 312 } 313 314 static PetscErrorCode PCSetUp_GASM(PC pc) 315 { 316 PC_GASM *osm = (PC_GASM*)pc->data; 317 PetscErrorCode ierr; 318 PetscInt i,nInnerIndices,nTotalInnerIndices; 319 PetscMPIInt rank, size; 320 MatReuse scall = MAT_REUSE_MATRIX; 321 KSP ksp; 322 PC subpc; 323 const char *prefix,*pprefix; 324 Vec x,y; 325 PetscInt oni; /* Number of indices in the i-th local outer subdomain. */ 326 const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */ 327 PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */ 328 PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */ 329 IS gois; /* Disjoint union the global indices of outer subdomains. */ 330 IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */ 331 PetscScalar *gxarray, *gyarray; 332 PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */ 333 PetscInt num_subdomains = 0; 334 DM *subdomain_dm = NULL; 335 char **subdomain_names = NULL; 336 PetscInt *numbering; 337 338 PetscFunctionBegin; 339 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 340 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 341 if (!pc->setupcalled) { 342 /* use a hierarchical partitioning */ 343 if (osm->hierarchicalpartitioning) { 344 ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr); 345 } 346 if (osm->n == PETSC_DETERMINE) { 347 if (osm->N != PETSC_DETERMINE) { 348 /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */ 349 ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr); 350 } else if (osm->dm_subdomains && pc->dm) { 351 /* try pc->dm next, if allowed */ 352 PetscInt d; 353 IS *inner_subdomain_is, *outer_subdomain_is; 354 ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr); 355 if (num_subdomains) { 356 ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr); 357 } 358 for (d = 0; d < num_subdomains; ++d) { 359 if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);} 360 if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);} 361 } 362 ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr); 363 ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr); 364 } else { 365 /* still no subdomains; use one per processor */ 366 osm->nmax = osm->n = 1; 367 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 368 osm->N = size; 369 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr); 370 } 371 } 372 if (!osm->iis) { 373 /* 374 osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied. 375 We create the requisite number of local inner subdomains and then expand them into 376 out subdomains, if necessary. 377 */ 378 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr); 379 } 380 if (!osm->ois) { 381 /* 382 Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap 383 has been requested, copy the inner subdomains over so they can be modified. 384 */ 385 ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr); 386 for (i=0; i<osm->n; ++i) { 387 if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */ 388 ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr); 389 ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr); 390 } else { 391 ierr = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr); 392 osm->ois[i] = osm->iis[i]; 393 } 394 } 395 if (osm->overlap>0 && osm->N>1) { 396 /* Extend the "overlapping" regions by a number of steps */ 397 ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr); 398 } 399 } 400 401 /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */ 402 if (osm->nmax == PETSC_DETERMINE) { 403 PetscMPIInt inwork,outwork; 404 /* determine global number of subdomains and the max number of local subdomains */ 405 inwork = osm->n; 406 ierr = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 407 osm->nmax = outwork; 408 } 409 if (osm->N == PETSC_DETERMINE) { 410 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */ 411 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr); 412 } 413 414 if (osm->sort_indices) { 415 for (i=0; i<osm->n; i++) { 416 ierr = ISSort(osm->ois[i]);CHKERRQ(ierr); 417 ierr = ISSort(osm->iis[i]);CHKERRQ(ierr); 418 } 419 } 420 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 421 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); 422 423 /* 424 Merge the ISs, create merged vectors and restrictions. 425 */ 426 /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */ 427 on = 0; 428 for (i=0; i<osm->n; i++) { 429 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 430 on += oni; 431 } 432 ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr); 433 on = 0; 434 /* Merge local indices together */ 435 for (i=0; i<osm->n; i++) { 436 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 437 ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 438 ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr); 439 ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 440 on += oni; 441 } 442 ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr); 443 nTotalInnerIndices = 0; 444 for (i=0; i<osm->n; i++) { 445 ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr); 446 nTotalInnerIndices += nInnerIndices; 447 } 448 ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr); 449 ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 450 451 ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr); 452 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 453 ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr); 454 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr); 455 /* gois might indices not on local */ 456 ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr); 457 ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr); 458 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr); 459 ierr = VecDestroy(&x);CHKERRQ(ierr); 460 ierr = ISDestroy(&gois);CHKERRQ(ierr); 461 462 /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */ 463 { 464 PetscInt ini; /* Number of indices the i-th a local inner subdomain. */ 465 PetscInt in; /* Number of indices in the disjoint union of local inner subdomains. */ 466 PetscInt *iidx; /* Global indices in the merged local inner subdomain. */ 467 PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 468 IS giis; /* IS for the disjoint union of inner subdomains. */ 469 IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 470 PetscScalar *array; 471 const PetscInt *indices; 472 PetscInt k; 473 on = 0; 474 for (i=0; i<osm->n; i++) { 475 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 476 on += oni; 477 } 478 ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr); 479 ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr); 480 ierr = VecGetArray(y,&array);CHKERRQ(ierr); 481 /* set communicator id to determine where overlap is */ 482 in = 0; 483 for (i=0; i<osm->n; i++) { 484 ierr = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr); 485 for (k = 0; k < ini; ++k) { 486 array[in+k] = numbering[i]; 487 } 488 in += ini; 489 } 490 ierr = VecRestoreArray(y,&array);CHKERRQ(ierr); 491 ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr); 494 ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr); 495 on = 0; 496 in = 0; 497 for (i=0; i<osm->n; i++) { 498 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 499 ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr); 500 for (k=0; k<oni; k++) { 501 /* skip overlapping indices to get inner domain */ 502 if (PetscRealPart(array[on+k]) != numbering[i]) continue; 503 iidx[in] = indices[k]; 504 ioidx[in++] = gostart+on+k; 505 } 506 ierr = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr); 507 on += oni; 508 } 509 ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr); 510 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr); 511 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr); 512 ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr); 513 ierr = VecDestroy(&y);CHKERRQ(ierr); 514 ierr = ISDestroy(&giis);CHKERRQ(ierr); 515 ierr = ISDestroy(&giois);CHKERRQ(ierr); 516 } 517 ierr = ISDestroy(&goid);CHKERRQ(ierr); 518 ierr = PetscFree(numbering);CHKERRQ(ierr); 519 520 /* Create the subdomain work vectors. */ 521 ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr); 522 ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr); 523 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 524 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 525 for (i=0, on=0; i<osm->n; ++i, on += oni) { 526 PetscInt oNi; 527 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 528 /* on a sub communicator */ 529 ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr); 530 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr); 531 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr); 532 } 533 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 534 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 535 /* Create the subdomain solvers */ 536 ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr); 537 for (i=0; i<osm->n; i++) { 538 char subprefix[PETSC_MAX_PATH_LEN+1]; 539 ierr = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr); 540 ierr = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr); 541 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr); 542 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 543 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 544 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */ 545 if (subdomain_dm) { 546 ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr); 547 ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr); 548 } 549 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 550 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 551 if (subdomain_names && subdomain_names[i]) { 552 ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr); 553 ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr); 554 ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr); 555 } 556 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 557 osm->ksp[i] = ksp; 558 } 559 ierr = PetscFree(subdomain_dm);CHKERRQ(ierr); 560 ierr = PetscFree(subdomain_names);CHKERRQ(ierr); 561 scall = MAT_INITIAL_MATRIX; 562 } else { /* if (pc->setupcalled) */ 563 /* 564 Destroy the submatrices from the previous iteration 565 */ 566 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 567 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 568 scall = MAT_INITIAL_MATRIX; 569 } 570 if (osm->permutationIS) { 571 ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr); 572 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr); 573 osm->pcmat = pc->pmat; 574 pc->pmat = osm->permutationP; 575 } 576 } 577 578 /* 579 Extract the submatrices. 580 */ 581 if (size > 1) { 582 ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 583 } else { 584 ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 585 } 586 if (scall == MAT_INITIAL_MATRIX) { 587 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 588 for (i=0; i<osm->n; i++) { 589 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr); 590 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 591 } 592 } 593 594 /* Return control to the user so that the submatrices can be modified (e.g., to apply 595 different boundary conditions for the submatrices than for the global problem) */ 596 ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 597 598 /* 599 Loop over submatrices putting them into local ksps 600 */ 601 for (i=0; i<osm->n; i++) { 602 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr); 603 ierr = KSPGetOptionsPrefix(osm->ksp[i],&prefix);CHKERRQ(ierr); 604 ierr = MatSetOptionsPrefix(osm->pmat[i],prefix);CHKERRQ(ierr); 605 if (!pc->setupcalled) { 606 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 607 } 608 } 609 if (osm->pcmat) { 610 ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr); 611 pc->pmat = osm->pcmat; 612 osm->pcmat = NULL; 613 } 614 PetscFunctionReturn(0); 615 } 616 617 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 618 { 619 PC_GASM *osm = (PC_GASM*)pc->data; 620 PetscErrorCode ierr; 621 PetscInt i; 622 623 PetscFunctionBegin; 624 for (i=0; i<osm->n; i++) { 625 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 626 } 627 PetscFunctionReturn(0); 628 } 629 630 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout) 631 { 632 PC_GASM *osm = (PC_GASM*)pc->data; 633 PetscErrorCode ierr; 634 PetscInt i; 635 Vec x,y; 636 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 637 638 PetscFunctionBegin; 639 if (osm->pctoouter) { 640 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 641 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 642 x = osm->pcx; 643 y = osm->pcy; 644 } else { 645 x = xin; 646 y = yout; 647 } 648 /* 649 support for limiting the restriction or interpolation only to the inner 650 subdomain values (leaving the other values 0). 651 */ 652 if (!(osm->type & PC_GASM_RESTRICT)) { 653 /* have to zero the work RHS since scatter may leave some slots empty */ 654 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 655 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 656 } else { 657 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 658 } 659 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 660 if (!(osm->type & PC_GASM_RESTRICT)) { 661 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 662 } else { 663 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 664 } 665 /* do the subdomain solves */ 666 for (i=0; i<osm->n; ++i) { 667 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 668 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr); 669 } 670 /* do we need to zero y? */ 671 ierr = VecZeroEntries(y);CHKERRQ(ierr); 672 if (!(osm->type & PC_GASM_INTERPOLATE)) { 673 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 674 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 675 } else { 676 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 677 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 678 } 679 if (osm->pctoouter) { 680 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 681 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 682 } 683 PetscFunctionReturn(0); 684 } 685 686 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout) 687 { 688 PC_GASM *osm = (PC_GASM*)pc->data; 689 Mat X,Y,O=NULL,Z,W; 690 Vec x,y; 691 PetscInt i,m,M,N; 692 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 693 PetscErrorCode ierr; 694 695 PetscFunctionBegin; 696 if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 697 ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr); 698 if (osm->pctoouter) { 699 ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr); 700 ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr); 701 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr); 702 for (i = 0; i < N; ++i) { 703 ierr = MatDenseGetColumnVecRead(Xin,i,&x);CHKERRQ(ierr); 704 ierr = MatDenseGetColumnVecWrite(O,i,&y);CHKERRQ(ierr); 705 ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 706 ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 707 ierr = MatDenseRestoreColumnVecWrite(O,i,&y);CHKERRQ(ierr); 708 ierr = MatDenseRestoreColumnVecRead(Xin,i,&x);CHKERRQ(ierr); 709 } 710 X = Y = O; 711 } else { 712 X = Xin; 713 Y = Yout; 714 } 715 /* 716 support for limiting the restriction or interpolation only to the inner 717 subdomain values (leaving the other values 0). 718 */ 719 ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr); 720 ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr); 721 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr); 722 for (i = 0; i < N; ++i) { 723 ierr = MatDenseGetColumnVecRead(X,i,&x);CHKERRQ(ierr); 724 ierr = MatDenseGetColumnVecWrite(Z,i,&y);CHKERRQ(ierr); 725 if (!(osm->type & PC_GASM_RESTRICT)) { 726 /* have to zero the work RHS since scatter may leave some slots empty */ 727 ierr = VecZeroEntries(y);CHKERRQ(ierr); 728 ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 729 ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 730 } else { 731 ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 732 ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 733 } 734 ierr = MatDenseRestoreColumnVecWrite(Z,i,&y);CHKERRQ(ierr); 735 ierr = MatDenseRestoreColumnVecRead(X,i,&x);CHKERRQ(ierr); 736 } 737 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr); 738 ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 739 ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 740 ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 741 /* do the subdomain solve */ 742 ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr); 743 ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr); 744 ierr = MatDestroy(&Z);CHKERRQ(ierr); 745 /* do we need to zero y? */ 746 ierr = MatZeroEntries(Y);CHKERRQ(ierr); 747 for (i = 0; i < N; ++i) { 748 ierr = MatDenseGetColumnVecWrite(Y,i,&y);CHKERRQ(ierr); 749 ierr = MatDenseGetColumnVecRead(W,i,&x);CHKERRQ(ierr); 750 if (!(osm->type & PC_GASM_INTERPOLATE)) { 751 ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 752 ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 753 } else { 754 ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 755 ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 756 } 757 ierr = MatDenseRestoreColumnVecRead(W,i,&x);CHKERRQ(ierr); 758 if (osm->pctoouter) { 759 ierr = MatDenseGetColumnVecWrite(Yout,i,&x);CHKERRQ(ierr); 760 ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 761 ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 762 ierr = MatDenseRestoreColumnVecRead(Yout,i,&x);CHKERRQ(ierr); 763 } 764 ierr = MatDenseRestoreColumnVecWrite(Y,i,&y);CHKERRQ(ierr); 765 } 766 ierr = MatDestroy(&W);CHKERRQ(ierr); 767 ierr = MatDestroy(&O);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout) 772 { 773 PC_GASM *osm = (PC_GASM*)pc->data; 774 PetscErrorCode ierr; 775 PetscInt i; 776 Vec x,y; 777 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 778 779 PetscFunctionBegin; 780 if (osm->pctoouter) { 781 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 782 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 783 x = osm->pcx; 784 y = osm->pcy; 785 }else{ 786 x = xin; 787 y = yout; 788 } 789 /* 790 Support for limiting the restriction or interpolation to only local 791 subdomain values (leaving the other values 0). 792 793 Note: these are reversed from the PCApply_GASM() because we are applying the 794 transpose of the three terms 795 */ 796 if (!(osm->type & PC_GASM_INTERPOLATE)) { 797 /* have to zero the work RHS since scatter may leave some slots empty */ 798 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 799 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 800 } else { 801 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 802 } 803 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 804 if (!(osm->type & PC_GASM_INTERPOLATE)) { 805 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 806 } else { 807 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 808 } 809 /* do the local solves */ 810 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 811 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 812 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr); 813 } 814 ierr = VecZeroEntries(y);CHKERRQ(ierr); 815 if (!(osm->type & PC_GASM_RESTRICT)) { 816 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 817 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 818 } else { 819 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 820 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 821 } 822 if (osm->pctoouter) { 823 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 } 826 PetscFunctionReturn(0); 827 } 828 829 static PetscErrorCode PCReset_GASM(PC pc) 830 { 831 PC_GASM *osm = (PC_GASM*)pc->data; 832 PetscErrorCode ierr; 833 PetscInt i; 834 835 PetscFunctionBegin; 836 if (osm->ksp) { 837 for (i=0; i<osm->n; i++) { 838 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 839 } 840 } 841 if (osm->pmat) { 842 if (osm->n > 0) { 843 PetscMPIInt size; 844 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 845 if (size > 1) { 846 /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */ 847 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 848 } else { 849 ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 850 } 851 } 852 } 853 if (osm->x) { 854 for (i=0; i<osm->n; i++) { 855 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 856 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 857 } 858 } 859 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 860 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 861 862 ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr); 863 ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr); 864 if (!osm->user_subdomains) { 865 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr); 866 osm->N = PETSC_DETERMINE; 867 osm->nmax = PETSC_DETERMINE; 868 } 869 if (osm->pctoouter) { 870 ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr); 871 } 872 if (osm->permutationIS) { 873 ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr); 874 } 875 if (osm->pcx) { 876 ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr); 877 } 878 if (osm->pcy) { 879 ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr); 880 } 881 if (osm->permutationP) { 882 ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr); 883 } 884 if (osm->pcmat) { 885 ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr); 886 } 887 PetscFunctionReturn(0); 888 } 889 890 static PetscErrorCode PCDestroy_GASM(PC pc) 891 { 892 PC_GASM *osm = (PC_GASM*)pc->data; 893 PetscErrorCode ierr; 894 PetscInt i; 895 896 PetscFunctionBegin; 897 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 898 /* PCReset will not destroy subdomains, if user_subdomains is true. */ 899 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr); 900 if (osm->ksp) { 901 for (i=0; i<osm->n; i++) { 902 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 903 } 904 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 905 } 906 ierr = PetscFree(osm->x);CHKERRQ(ierr); 907 ierr = PetscFree(osm->y);CHKERRQ(ierr); 908 ierr = PetscFree(pc->data);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc) 913 { 914 PC_GASM *osm = (PC_GASM*)pc->data; 915 PetscErrorCode ierr; 916 PetscInt blocks,ovl; 917 PetscBool flg; 918 PCGASMType gasmtype; 919 920 PetscFunctionBegin; 921 ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr); 922 ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr); 923 ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr); 924 if (flg) { 925 ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); 926 } 927 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 928 if (flg) { 929 ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); 930 osm->dm_subdomains = PETSC_FALSE; 931 } 932 flg = PETSC_FALSE; 933 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 934 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);} 935 ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr); 936 ierr = PetscOptionsTail();CHKERRQ(ierr); 937 PetscFunctionReturn(0); 938 } 939 940 /*------------------------------------------------------------------------------------*/ 941 942 /*@ 943 PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the 944 communicator. 945 Logically collective on pc 946 947 Input Parameters: 948 + pc - the preconditioner 949 - N - total number of subdomains 950 951 Level: beginner 952 953 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap() 954 PCGASMCreateSubdomains2D() 955 @*/ 956 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 957 { 958 PC_GASM *osm = (PC_GASM*)pc->data; 959 PetscMPIInt size,rank; 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N); 964 if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp()."); 965 966 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 967 osm->ois = osm->iis = NULL; 968 969 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 970 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 971 osm->N = N; 972 osm->n = PETSC_DETERMINE; 973 osm->nmax = PETSC_DETERMINE; 974 osm->dm_subdomains = PETSC_FALSE; 975 PetscFunctionReturn(0); 976 } 977 978 static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[]) 979 { 980 PC_GASM *osm = (PC_GASM*)pc->data; 981 PetscErrorCode ierr; 982 PetscInt i; 983 984 PetscFunctionBegin; 985 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n); 986 if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp()."); 987 988 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 989 osm->iis = osm->ois = NULL; 990 osm->n = n; 991 osm->N = PETSC_DETERMINE; 992 osm->nmax = PETSC_DETERMINE; 993 if (ois) { 994 ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr); 995 for (i=0; i<n; i++) { 996 ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr); 997 osm->ois[i] = ois[i]; 998 } 999 /* 1000 Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(), 1001 it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1. 1002 */ 1003 osm->overlap = -1; 1004 /* inner subdomains must be provided */ 1005 if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n"); 1006 }/* end if */ 1007 if (iis) { 1008 ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr); 1009 for (i=0; i<n; i++) { 1010 ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr); 1011 osm->iis[i] = iis[i]; 1012 } 1013 if (!ois) { 1014 osm->ois = NULL; 1015 /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */ 1016 } 1017 } 1018 if (PetscDefined(USE_DEBUG)) { 1019 PetscInt j,rstart,rend,*covered,lsize; 1020 const PetscInt *indices; 1021 /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */ 1022 ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr); 1023 ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr); 1024 /* check if the current processor owns indices from others */ 1025 for (i=0; i<n; i++) { 1026 ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr); 1027 ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr); 1028 for (j=0; j<lsize; j++) { 1029 if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]); 1030 else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]); 1031 else covered[indices[j]-rstart] = 1; 1032 } 1033 ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr); 1034 } 1035 /* check if we miss any indices */ 1036 for (i=rstart; i<rend; i++) { 1037 if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i); 1038 } 1039 ierr = PetscFree(covered);CHKERRQ(ierr); 1040 } 1041 if (iis) osm->user_subdomains = PETSC_TRUE; 1042 PetscFunctionReturn(0); 1043 } 1044 1045 static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 1046 { 1047 PC_GASM *osm = (PC_GASM*)pc->data; 1048 1049 PetscFunctionBegin; 1050 if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 1051 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 1052 if (!pc->setupcalled) osm->overlap = ovl; 1053 PetscFunctionReturn(0); 1054 } 1055 1056 static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 1057 { 1058 PC_GASM *osm = (PC_GASM*)pc->data; 1059 1060 PetscFunctionBegin; 1061 osm->type = type; 1062 osm->type_set = PETSC_TRUE; 1063 PetscFunctionReturn(0); 1064 } 1065 1066 static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 1067 { 1068 PC_GASM *osm = (PC_GASM*)pc->data; 1069 1070 PetscFunctionBegin; 1071 osm->sort_indices = doSort; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 /* 1076 FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed. 1077 In particular, it would upset the global subdomain number calculation. 1078 */ 1079 static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 1080 { 1081 PC_GASM *osm = (PC_GASM*)pc->data; 1082 PetscErrorCode ierr; 1083 1084 PetscFunctionBegin; 1085 if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 1086 1087 if (n) *n = osm->n; 1088 if (first) { 1089 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 1090 *first -= osm->n; 1091 } 1092 if (ksp) { 1093 /* Assume that local solves are now different; not necessarily 1094 true, though! This flag is used only for PCView_GASM() */ 1095 *ksp = osm->ksp; 1096 osm->same_subdomain_solvers = PETSC_FALSE; 1097 } 1098 PetscFunctionReturn(0); 1099 } /* PCGASMGetSubKSP_GASM() */ 1100 1101 /*@C 1102 PCGASMSetSubdomains - Sets the subdomains for this processor 1103 for the additive Schwarz preconditioner. 1104 1105 Collective on pc 1106 1107 Input Parameters: 1108 + pc - the preconditioner object 1109 . n - the number of subdomains for this processor 1110 . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains) 1111 - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap) 1112 1113 Notes: 1114 The IS indices use the parallel, global numbering of the vector entries. 1115 Inner subdomains are those where the correction is applied. 1116 Outer subdomains are those where the residual necessary to obtain the 1117 corrections is obtained (see PCGASMType for the use of inner/outer subdomains). 1118 Both inner and outer subdomains can extend over several processors. 1119 This processor's portion of a subdomain is known as a local subdomain. 1120 1121 Inner subdomains can not overlap with each other, do not have any entities from remote processors, 1122 and have to cover the entire local subdomain owned by the current processor. The index sets on each 1123 process should be ordered such that the ith local subdomain is connected to the ith remote subdomain 1124 on another MPI process. 1125 1126 By default the GASM preconditioner uses 1 (local) subdomain per processor. 1127 1128 Level: advanced 1129 1130 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), 1131 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 1132 @*/ 1133 PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[]) 1134 { 1135 PC_GASM *osm = (PC_GASM*)pc->data; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1140 ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr); 1141 osm->dm_subdomains = PETSC_FALSE; 1142 PetscFunctionReturn(0); 1143 } 1144 1145 /*@ 1146 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 1147 additive Schwarz preconditioner. Either all or no processors in the 1148 pc communicator must call this routine. 1149 1150 Logically Collective on pc 1151 1152 Input Parameters: 1153 + pc - the preconditioner context 1154 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0) 1155 1156 Options Database Key: 1157 . -pc_gasm_overlap <overlap> - Sets overlap 1158 1159 Notes: 1160 By default the GASM preconditioner uses 1 subdomain per processor. To use 1161 multiple subdomain per perocessor or "straddling" subdomains that intersect 1162 multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>). 1163 1164 The overlap defaults to 0, so if one desires that no additional 1165 overlap be computed beyond what may have been set with a call to 1166 PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does 1167 not explicitly set the subdomains in application code, then all overlap would be computed 1168 internally by PETSc, and using an overlap of 0 would result in an GASM 1169 variant that is equivalent to the block Jacobi preconditioner. 1170 1171 Note that one can define initial index sets with any overlap via 1172 PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows 1173 PETSc to extend that overlap further, if desired. 1174 1175 Level: intermediate 1176 1177 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1178 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 1179 @*/ 1180 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 1181 { 1182 PetscErrorCode ierr; 1183 PC_GASM *osm = (PC_GASM*)pc->data; 1184 1185 PetscFunctionBegin; 1186 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1187 PetscValidLogicalCollectiveInt(pc,ovl,2); 1188 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 1189 osm->dm_subdomains = PETSC_FALSE; 1190 PetscFunctionReturn(0); 1191 } 1192 1193 /*@ 1194 PCGASMSetType - Sets the type of restriction and interpolation used 1195 for local problems in the additive Schwarz method. 1196 1197 Logically Collective on PC 1198 1199 Input Parameters: 1200 + pc - the preconditioner context 1201 - type - variant of GASM, one of 1202 .vb 1203 PC_GASM_BASIC - full interpolation and restriction 1204 PC_GASM_RESTRICT - full restriction, local processor interpolation 1205 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 1206 PC_GASM_NONE - local processor restriction and interpolation 1207 .ve 1208 1209 Options Database Key: 1210 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1211 1212 Level: intermediate 1213 1214 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1215 PCGASMCreateSubdomains2D() 1216 @*/ 1217 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 1218 { 1219 PetscErrorCode ierr; 1220 1221 PetscFunctionBegin; 1222 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1223 PetscValidLogicalCollectiveEnum(pc,type,2); 1224 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /*@ 1229 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 1230 1231 Logically Collective on PC 1232 1233 Input Parameters: 1234 + pc - the preconditioner context 1235 - doSort - sort the subdomain indices 1236 1237 Level: intermediate 1238 1239 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1240 PCGASMCreateSubdomains2D() 1241 @*/ 1242 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 1243 { 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1248 PetscValidLogicalCollectiveBool(pc,doSort,2); 1249 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 /*@C 1254 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 1255 this processor. 1256 1257 Collective on PC iff first_local is requested 1258 1259 Input Parameter: 1260 . pc - the preconditioner context 1261 1262 Output Parameters: 1263 + n_local - the number of blocks on this processor or NULL 1264 . first_local - the global number of the first block on this processor or NULL, 1265 all processors must request or all must pass NULL 1266 - ksp - the array of KSP contexts 1267 1268 Note: 1269 After PCGASMGetSubKSP() the array of KSPes is not to be freed 1270 1271 Currently for some matrix implementations only 1 block per processor 1272 is supported. 1273 1274 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 1275 1276 Level: advanced 1277 1278 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(), 1279 PCGASMCreateSubdomains2D(), 1280 @*/ 1281 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 1282 { 1283 PetscErrorCode ierr; 1284 1285 PetscFunctionBegin; 1286 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1287 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 /* -------------------------------------------------------------------------------------*/ 1292 /*MC 1293 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1294 its own KSP object. 1295 1296 Options Database Keys: 1297 + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors 1298 . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view 1299 . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp() 1300 . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains 1301 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1302 1303 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1304 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1305 -pc_gasm_type basic to use the standard GASM. 1306 1307 Notes: 1308 Blocks can be shared by multiple processes. 1309 1310 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1311 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1312 1313 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1314 and set the options directly on the resulting KSP object (you can access its PC 1315 with KSPGetPC()) 1316 1317 Level: beginner 1318 1319 References: 1320 + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions 1321 Courant Institute, New York University Technical report 1322 - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1323 Cambridge University Press. 1324 1325 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1326 PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(), 1327 PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType() 1328 1329 M*/ 1330 1331 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc) 1332 { 1333 PetscErrorCode ierr; 1334 PC_GASM *osm; 1335 1336 PetscFunctionBegin; 1337 ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr); 1338 1339 osm->N = PETSC_DETERMINE; 1340 osm->n = PETSC_DECIDE; 1341 osm->nmax = PETSC_DETERMINE; 1342 osm->overlap = 0; 1343 osm->ksp = NULL; 1344 osm->gorestriction = NULL; 1345 osm->girestriction = NULL; 1346 osm->pctoouter = NULL; 1347 osm->gx = NULL; 1348 osm->gy = NULL; 1349 osm->x = NULL; 1350 osm->y = NULL; 1351 osm->pcx = NULL; 1352 osm->pcy = NULL; 1353 osm->permutationIS = NULL; 1354 osm->permutationP = NULL; 1355 osm->pcmat = NULL; 1356 osm->ois = NULL; 1357 osm->iis = NULL; 1358 osm->pmat = NULL; 1359 osm->type = PC_GASM_RESTRICT; 1360 osm->same_subdomain_solvers = PETSC_TRUE; 1361 osm->sort_indices = PETSC_TRUE; 1362 osm->dm_subdomains = PETSC_FALSE; 1363 osm->hierarchicalpartitioning = PETSC_FALSE; 1364 1365 pc->data = (void*)osm; 1366 pc->ops->apply = PCApply_GASM; 1367 pc->ops->matapply = PCMatApply_GASM; 1368 pc->ops->applytranspose = PCApplyTranspose_GASM; 1369 pc->ops->setup = PCSetUp_GASM; 1370 pc->ops->reset = PCReset_GASM; 1371 pc->ops->destroy = PCDestroy_GASM; 1372 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1373 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1374 pc->ops->view = PCView_GASM; 1375 pc->ops->applyrichardson = NULL; 1376 1377 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr); 1378 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1379 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr); 1380 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1381 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]) 1386 { 1387 MatPartitioning mpart; 1388 const char *prefix; 1389 PetscInt i,j,rstart,rend,bs; 1390 PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1391 Mat Ad = NULL, adj; 1392 IS ispart,isnumb,*is; 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc); 1397 1398 /* Get prefix, row distribution, and block size */ 1399 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1400 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1401 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1402 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1403 1404 /* Get diagonal block from matrix if possible */ 1405 ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1406 if (hasop) { 1407 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1408 } 1409 if (Ad) { 1410 ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1411 if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1412 } 1413 if (Ad && nloc > 1) { 1414 PetscBool match,done; 1415 /* Try to setup a good matrix partitioning if available */ 1416 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1417 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1418 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1419 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1420 if (!match) { 1421 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1422 } 1423 if (!match) { /* assume a "good" partitioner is available */ 1424 PetscInt na; 1425 const PetscInt *ia,*ja; 1426 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1427 if (done) { 1428 /* Build adjacency matrix by hand. Unfortunately a call to 1429 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1430 remove the block-aij structure and we cannot expect 1431 MatPartitioning to split vertices as we need */ 1432 PetscInt i,j,len,nnz,cnt,*iia=NULL,*jja=NULL; 1433 const PetscInt *row; 1434 nnz = 0; 1435 for (i=0; i<na; i++) { /* count number of nonzeros */ 1436 len = ia[i+1] - ia[i]; 1437 row = ja + ia[i]; 1438 for (j=0; j<len; j++) { 1439 if (row[j] == i) { /* don't count diagonal */ 1440 len--; break; 1441 } 1442 } 1443 nnz += len; 1444 } 1445 ierr = PetscMalloc1(na+1,&iia);CHKERRQ(ierr); 1446 ierr = PetscMalloc1(nnz,&jja);CHKERRQ(ierr); 1447 nnz = 0; 1448 iia[0] = 0; 1449 for (i=0; i<na; i++) { /* fill adjacency */ 1450 cnt = 0; 1451 len = ia[i+1] - ia[i]; 1452 row = ja + ia[i]; 1453 for (j=0; j<len; j++) { 1454 if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */ 1455 } 1456 nnz += cnt; 1457 iia[i+1] = nnz; 1458 } 1459 /* Partitioning of the adjacency matrix */ 1460 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr); 1461 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1462 ierr = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr); 1463 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1464 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1465 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1466 foundpart = PETSC_TRUE; 1467 } 1468 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1469 } 1470 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1471 } 1472 ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr); 1473 if (!foundpart) { 1474 1475 /* Partitioning by contiguous chunks of rows */ 1476 1477 PetscInt mbs = (rend-rstart)/bs; 1478 PetscInt start = rstart; 1479 for (i=0; i<nloc; i++) { 1480 PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs; 1481 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1482 start += count; 1483 } 1484 1485 } else { 1486 1487 /* Partitioning by adjacency of diagonal block */ 1488 1489 const PetscInt *numbering; 1490 PetscInt *count,nidx,*indices,*newidx,start=0; 1491 /* Get node count in each partition */ 1492 ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr); 1493 ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr); 1494 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1495 for (i=0; i<nloc; i++) count[i] *= bs; 1496 } 1497 /* Build indices from node numbering */ 1498 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1499 ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr); 1500 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1501 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1502 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1503 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1504 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1505 ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr); 1506 for (i=0; i<nidx; i++) { 1507 for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j; 1508 } 1509 ierr = PetscFree(indices);CHKERRQ(ierr); 1510 nidx *= bs; 1511 indices = newidx; 1512 } 1513 /* Shift to get global indices */ 1514 for (i=0; i<nidx; i++) indices[i] += rstart; 1515 1516 /* Build the index sets for each block */ 1517 for (i=0; i<nloc; i++) { 1518 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1519 ierr = ISSort(is[i]);CHKERRQ(ierr); 1520 start += count[i]; 1521 } 1522 1523 ierr = PetscFree(count);CHKERRQ(ierr); 1524 ierr = PetscFree(indices);CHKERRQ(ierr); 1525 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1526 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1527 } 1528 *iis = is; 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[]) 1533 { 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 /*@C 1542 PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive 1543 Schwarz preconditioner for a any problem based on its matrix. 1544 1545 Collective 1546 1547 Input Parameters: 1548 + A - The global matrix operator 1549 - N - the number of global subdomains requested 1550 1551 Output Parameters: 1552 + n - the number of subdomains created on this processor 1553 - iis - the array of index sets defining the local inner subdomains (on which the correction is applied) 1554 1555 Level: advanced 1556 1557 Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor. 1558 When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local. 1559 The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping 1560 outer subdomains will be automatically generated from these according to the requested amount of 1561 overlap; this is currently supported only with local subdomains. 1562 1563 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains() 1564 @*/ 1565 PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[]) 1566 { 1567 PetscMPIInt size; 1568 PetscErrorCode ierr; 1569 1570 PetscFunctionBegin; 1571 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1572 PetscValidPointer(iis,4); 1573 1574 if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N); 1575 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 1576 if (N >= size) { 1577 *n = N/size + (N%size); 1578 ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr); 1579 } else { 1580 ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 /*@C 1586 PCGASMDestroySubdomains - Destroys the index sets created with 1587 PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be 1588 called after setting subdomains with PCGASMSetSubdomains(). 1589 1590 Collective 1591 1592 Input Parameters: 1593 + n - the number of index sets 1594 . iis - the array of inner subdomains, 1595 - ois - the array of outer subdomains, can be NULL 1596 1597 Level: intermediate 1598 1599 Notes: 1600 this is merely a convenience subroutine that walks each list, 1601 destroys each IS on the list, and then frees the list. At the end the 1602 list pointers are set to NULL. 1603 1604 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains() 1605 @*/ 1606 PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois) 1607 { 1608 PetscInt i; 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 if (n <= 0) PetscFunctionReturn(0); 1613 if (ois) { 1614 PetscValidPointer(ois,3); 1615 if (*ois) { 1616 PetscValidPointer(*ois,3); 1617 for (i=0; i<n; i++) { 1618 ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr); 1619 } 1620 ierr = PetscFree((*ois));CHKERRQ(ierr); 1621 } 1622 } 1623 if (iis) { 1624 PetscValidPointer(iis,2); 1625 if (*iis) { 1626 PetscValidPointer(*iis,2); 1627 for (i=0; i<n; i++) { 1628 ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr); 1629 } 1630 ierr = PetscFree((*iis));CHKERRQ(ierr); 1631 } 1632 } 1633 PetscFunctionReturn(0); 1634 } 1635 1636 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1637 { \ 1638 PetscInt first_row = first/M, last_row = last/M+1; \ 1639 /* \ 1640 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1641 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1642 subdomain). \ 1643 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1644 of the intersection. \ 1645 */ \ 1646 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1647 *ylow_loc = PetscMax(first_row,ylow); \ 1648 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1649 *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \ 1650 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1651 *yhigh_loc = PetscMin(last_row,yhigh); \ 1652 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1653 *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \ 1654 /* Now compute the size of the local subdomain n. */ \ 1655 *n = 0; \ 1656 if (*ylow_loc < *yhigh_loc) { \ 1657 PetscInt width = xright-xleft; \ 1658 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1659 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1660 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1661 } \ 1662 } 1663 1664 /*@ 1665 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1666 preconditioner for a two-dimensional problem on a regular grid. 1667 1668 Collective 1669 1670 Input Parameters: 1671 + M, N - the global number of grid points in the x and y directions 1672 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1673 . dof - degrees of freedom per node 1674 - overlap - overlap in mesh lines 1675 1676 Output Parameters: 1677 + Nsub - the number of local subdomains created 1678 . iis - array of index sets defining inner (nonoverlapping) subdomains 1679 - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains 1680 1681 Level: advanced 1682 1683 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap() 1684 @*/ 1685 PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois) 1686 { 1687 PetscErrorCode ierr; 1688 PetscMPIInt size, rank; 1689 PetscInt i, j; 1690 PetscInt maxheight, maxwidth; 1691 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1692 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1693 PetscInt x[2][2], y[2][2], n[2]; 1694 PetscInt first, last; 1695 PetscInt nidx, *idx; 1696 PetscInt ii,jj,s,q,d; 1697 PetscInt k,kk; 1698 PetscMPIInt color; 1699 MPI_Comm comm, subcomm; 1700 IS **xis = NULL, **is = ois, **is_local = iis; 1701 1702 PetscFunctionBegin; 1703 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1704 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 1705 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 1706 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1707 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1708 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1709 1710 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1711 s = 0; 1712 ystart = 0; 1713 for (j=0; j<Ndomains; ++j) { 1714 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1715 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1716 /* Vertical domain limits with an overlap. */ 1717 ylow = PetscMax(ystart - overlap,0); 1718 yhigh = PetscMin(ystart + maxheight + overlap,N); 1719 xstart = 0; 1720 for (i=0; i<Mdomains; ++i) { 1721 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1722 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1723 /* Horizontal domain limits with an overlap. */ 1724 xleft = PetscMax(xstart - overlap,0); 1725 xright = PetscMin(xstart + maxwidth + overlap,M); 1726 /* 1727 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1728 */ 1729 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1730 if (nidx) ++s; 1731 xstart += maxwidth; 1732 } /* for (i = 0; i < Mdomains; ++i) */ 1733 ystart += maxheight; 1734 } /* for (j = 0; j < Ndomains; ++j) */ 1735 1736 /* Now we can allocate the necessary number of ISs. */ 1737 *nsub = s; 1738 ierr = PetscMalloc1(*nsub,is);CHKERRQ(ierr); 1739 ierr = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr); 1740 s = 0; 1741 ystart = 0; 1742 for (j=0; j<Ndomains; ++j) { 1743 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1744 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1745 /* Vertical domain limits with an overlap. */ 1746 y[0][0] = PetscMax(ystart - overlap,0); 1747 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1748 /* Vertical domain limits without an overlap. */ 1749 y[1][0] = ystart; 1750 y[1][1] = PetscMin(ystart + maxheight,N); 1751 xstart = 0; 1752 for (i=0; i<Mdomains; ++i) { 1753 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1754 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1755 /* Horizontal domain limits with an overlap. */ 1756 x[0][0] = PetscMax(xstart - overlap,0); 1757 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1758 /* Horizontal domain limits without an overlap. */ 1759 x[1][0] = xstart; 1760 x[1][1] = PetscMin(xstart+maxwidth,M); 1761 /* 1762 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1763 Do this twice: first for the domains with overlaps, and once without. 1764 During the first pass create the subcommunicators, and use them on the second pass as well. 1765 */ 1766 for (q = 0; q < 2; ++q) { 1767 PetscBool split = PETSC_FALSE; 1768 /* 1769 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1770 according to whether the domain with an overlap or without is considered. 1771 */ 1772 xleft = x[q][0]; xright = x[q][1]; 1773 ylow = y[q][0]; yhigh = y[q][1]; 1774 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1775 nidx *= dof; 1776 n[q] = nidx; 1777 /* 1778 Based on the counted number of indices in the local domain *with an overlap*, 1779 construct a subcommunicator of all the processors supporting this domain. 1780 Observe that a domain with an overlap might have nontrivial local support, 1781 while the domain without an overlap might not. Hence, the decision to participate 1782 in the subcommunicator must be based on the domain with an overlap. 1783 */ 1784 if (q == 0) { 1785 if (nidx) color = 1; 1786 else color = MPI_UNDEFINED; 1787 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRMPI(ierr); 1788 split = PETSC_TRUE; 1789 } 1790 /* 1791 Proceed only if the number of local indices *with an overlap* is nonzero. 1792 */ 1793 if (n[0]) { 1794 if (q == 0) xis = is; 1795 if (q == 1) { 1796 /* 1797 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1798 Moreover, if the overlap is zero, the two ISs are identical. 1799 */ 1800 if (overlap == 0) { 1801 (*is_local)[s] = (*is)[s]; 1802 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1803 continue; 1804 } else { 1805 xis = is_local; 1806 subcomm = ((PetscObject)(*is)[s])->comm; 1807 } 1808 } /* if (q == 1) */ 1809 idx = NULL; 1810 ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr); 1811 if (nidx) { 1812 k = 0; 1813 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1814 PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft; 1815 PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright; 1816 kk = dof*(M*jj + x0); 1817 for (ii=x0; ii<x1; ++ii) { 1818 for (d = 0; d < dof; ++d) { 1819 idx[k++] = kk++; 1820 } 1821 } 1822 } 1823 } 1824 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr); 1825 if (split) { 1826 ierr = MPI_Comm_free(&subcomm);CHKERRMPI(ierr); 1827 } 1828 }/* if (n[0]) */ 1829 }/* for (q = 0; q < 2; ++q) */ 1830 if (n[0]) ++s; 1831 xstart += maxwidth; 1832 } /* for (i = 0; i < Mdomains; ++i) */ 1833 ystart += maxheight; 1834 } /* for (j = 0; j < Ndomains; ++j) */ 1835 PetscFunctionReturn(0); 1836 } 1837 1838 /*@C 1839 PCGASMGetSubdomains - Gets the subdomains supported on this processor 1840 for the additive Schwarz preconditioner. 1841 1842 Not Collective 1843 1844 Input Parameter: 1845 . pc - the preconditioner context 1846 1847 Output Parameters: 1848 + n - the number of subdomains for this processor (default value = 1) 1849 . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL) 1850 - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL) 1851 1852 Notes: 1853 The user is responsible for destroying the ISs and freeing the returned arrays. 1854 The IS numbering is in the parallel, global numbering of the vector. 1855 1856 Level: advanced 1857 1858 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(), 1859 PCGASMSetSubdomains(), PCGASMGetSubmatrices() 1860 @*/ 1861 PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[]) 1862 { 1863 PC_GASM *osm; 1864 PetscErrorCode ierr; 1865 PetscBool match; 1866 PetscInt i; 1867 1868 PetscFunctionBegin; 1869 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1870 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1871 if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1872 osm = (PC_GASM*)pc->data; 1873 if (n) *n = osm->n; 1874 if (iis) { 1875 ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr); 1876 } 1877 if (ois) { 1878 ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr); 1879 } 1880 if (iis || ois) { 1881 for (i = 0; i < osm->n; ++i) { 1882 if (iis) (*iis)[i] = osm->iis[i]; 1883 if (ois) (*ois)[i] = osm->ois[i]; 1884 } 1885 } 1886 PetscFunctionReturn(0); 1887 } 1888 1889 /*@C 1890 PCGASMGetSubmatrices - Gets the local submatrices (for this processor 1891 only) for the additive Schwarz preconditioner. 1892 1893 Not Collective 1894 1895 Input Parameter: 1896 . pc - the preconditioner context 1897 1898 Output Parameters: 1899 + n - the number of matrices for this processor (default value = 1) 1900 - mat - the matrices 1901 1902 Notes: 1903 matrices returned by this routine have the same communicators as the index sets (IS) 1904 used to define subdomains in PCGASMSetSubdomains() 1905 Level: advanced 1906 1907 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), 1908 PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains() 1909 @*/ 1910 PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1911 { 1912 PC_GASM *osm; 1913 PetscErrorCode ierr; 1914 PetscBool match; 1915 1916 PetscFunctionBegin; 1917 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1918 PetscValidIntPointer(n,2); 1919 if (mat) PetscValidPointer(mat,3); 1920 if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp()."); 1921 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1922 if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1923 osm = (PC_GASM*)pc->data; 1924 if (n) *n = osm->n; 1925 if (mat) *mat = osm->pmat; 1926 PetscFunctionReturn(0); 1927 } 1928 1929 /*@ 1930 PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1931 Logically Collective 1932 1933 Input Parameter: 1934 + pc - the preconditioner 1935 - flg - boolean indicating whether to use subdomains defined by the DM 1936 1937 Options Database Key: 1938 . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains 1939 1940 Level: intermediate 1941 1942 Notes: 1943 PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(), 1944 so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap() 1945 automatically turns the latter off. 1946 1947 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap() 1948 PCGASMCreateSubdomains2D() 1949 @*/ 1950 PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg) 1951 { 1952 PC_GASM *osm = (PC_GASM*)pc->data; 1953 PetscErrorCode ierr; 1954 PetscBool match; 1955 1956 PetscFunctionBegin; 1957 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1958 PetscValidLogicalCollectiveBool(pc,flg,2); 1959 if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC."); 1960 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1961 if (match) { 1962 if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) { 1963 osm->dm_subdomains = flg; 1964 } 1965 } 1966 PetscFunctionReturn(0); 1967 } 1968 1969 /*@ 1970 PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1971 Not Collective 1972 1973 Input Parameter: 1974 . pc - the preconditioner 1975 1976 Output Parameter: 1977 . flg - boolean indicating whether to use subdomains defined by the DM 1978 1979 Level: intermediate 1980 1981 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap() 1982 PCGASMCreateSubdomains2D() 1983 @*/ 1984 PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg) 1985 { 1986 PC_GASM *osm = (PC_GASM*)pc->data; 1987 PetscErrorCode ierr; 1988 PetscBool match; 1989 1990 PetscFunctionBegin; 1991 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1992 PetscValidBoolPointer(flg,2); 1993 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1994 if (match) { 1995 if (flg) *flg = osm->dm_subdomains; 1996 } 1997 PetscFunctionReturn(0); 1998 } 1999