1 /* 2 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 3 In this version each processor may intersect multiple subdomains and any subdomain may 4 intersect multiple processors. Intersections of subdomains with processors are called *local 5 subdomains*. 6 7 N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM()) 8 n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains()) 9 nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM()) 10 */ 11 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 12 #include <petscdm.h> 13 14 typedef struct { 15 PetscInt N,n,nmax; 16 PetscInt overlap; /* overlap requested by user */ 17 PCGASMType type; /* use reduced interpolation, restriction or both */ 18 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 19 PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */ 20 PetscBool sort_indices; /* flag to sort subdomain indices */ 21 PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */ 22 PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */ 23 PetscBool hierarchicalpartitioning; 24 IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */ 25 IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */ 26 KSP *ksp; /* linear solvers for each subdomain */ 27 Mat *pmat; /* subdomain block matrices */ 28 Vec gx,gy; /* Merged work vectors */ 29 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 30 VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */ 31 VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */ 32 VecScatter pctoouter; 33 IS permutationIS; 34 Mat permutationP; 35 Mat pcmat; 36 Vec pcx,pcy; 37 } PC_GASM; 38 39 static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation) 40 { 41 PC_GASM *osm = (PC_GASM*)pc->data; 42 PetscInt i; 43 PetscErrorCode ierr; 44 45 PetscFunctionBegin; 46 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */ 47 ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr); 48 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr); 49 for (i = 0; i < osm->n; ++i) (*permutation)[i] = i; 50 ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer) 55 { 56 PC_GASM *osm = (PC_GASM*)pc->data; 57 PetscInt j,nidx; 58 const PetscInt *idx; 59 PetscViewer sviewer; 60 char *cidx; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n); 65 /* Inner subdomains. */ 66 ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr); 67 /* 68 No more than 15 characters per index plus a space. 69 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 70 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 71 For nidx == 0, the whole string 16 '\0'. 72 */ 73 #define len 16*(nidx+1)+1 74 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr); 75 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr); 76 #undef len 77 ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr); 78 for (j = 0; j < nidx; ++j) { 79 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 80 } 81 ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr); 82 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 83 ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr); 84 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 85 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 86 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 87 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 88 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 89 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 90 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 91 ierr = PetscFree(cidx);CHKERRQ(ierr); 92 /* Outer subdomains. */ 93 ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr); 94 /* 95 No more than 15 characters per index plus a space. 96 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 97 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 98 For nidx == 0, the whole string 16 '\0'. 99 */ 100 #define len 16*(nidx+1)+1 101 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr); 102 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr); 103 #undef len 104 ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr); 105 for (j = 0; j < nidx; ++j) { 106 ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr); 107 } 108 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 109 ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr); 110 ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr); 111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 112 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 113 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 114 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 115 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 116 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 117 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 118 ierr = PetscFree(cidx);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 123 { 124 PC_GASM *osm = (PC_GASM*)pc->data; 125 const char *prefix; 126 char fname[PETSC_MAX_PATH_LEN+1]; 127 PetscInt l, d, count; 128 PetscBool doprint,found; 129 PetscViewer viewer, sviewer = NULL; 130 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */ 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 135 doprint = PETSC_FALSE; 136 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr); 137 if (!doprint) PetscFunctionReturn(0); 138 ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr); 139 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 140 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr); 141 /* 142 Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer: 143 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm. 144 */ 145 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr); 146 l = 0; 147 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr); 148 for (count = 0; count < osm->N; ++count) { 149 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 150 if (l<osm->n) { 151 d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 152 if (numbering[d] == count) { 153 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 154 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr); 155 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 156 ++l; 157 } 158 } 159 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 160 } 161 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr); 162 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 167 { 168 PC_GASM *osm = (PC_GASM*)pc->data; 169 const char *prefix; 170 PetscErrorCode ierr; 171 PetscMPIInt rank, size; 172 PetscInt bsz; 173 PetscBool iascii,view_subdomains=PETSC_FALSE; 174 PetscViewer sviewer; 175 PetscInt count, l; 176 char overlap[256] = "user-defined overlap"; 177 char gsubdomains[256] = "unknown total number of subdomains"; 178 char msubdomains[256] = "unknown max number of local subdomains"; 179 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */ 180 181 PetscFunctionBegin; 182 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRMPI(ierr); 183 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRMPI(ierr); 184 185 if (osm->overlap >= 0) { 186 ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr); 187 } 188 if (osm->N != PETSC_DETERMINE) { 189 ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr); 190 } 191 if (osm->nmax != PETSC_DETERMINE) { 192 ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr); 193 } 194 195 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 196 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr); 197 198 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 199 if (iascii) { 200 /* 201 Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer: 202 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed 203 collectively on the comm. 204 */ 205 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr); 206 ierr = PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 207 ierr = PetscViewerASCIIPrintf(viewer," %s\n",overlap);CHKERRQ(ierr); 208 ierr = PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);CHKERRQ(ierr); 209 ierr = PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);CHKERRQ(ierr); 210 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 211 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr); 212 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 213 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 214 /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */ 215 ierr = PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");CHKERRQ(ierr); 216 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 217 ierr = PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 218 /* Make sure that everybody waits for the banner to be printed. */ 219 ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 220 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */ 221 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr); 222 l = 0; 223 for (count = 0; count < osm->N; ++count) { 224 PetscMPIInt srank, ssize; 225 if (l<osm->n) { 226 PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */ 227 if (numbering[d] == count) { 228 ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRMPI(ierr); 229 ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRMPI(ierr); 230 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 231 ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr); 232 ierr = PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr); 233 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 234 if (view_subdomains) { 235 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr); 236 } 237 if (!pc->setupcalled) { 238 ierr = PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr); 239 } else { 240 ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr); 241 } 242 ierr = PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 243 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 244 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr); 245 ++l; 246 } 247 } 248 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 249 } 250 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr); 251 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 252 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 253 /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */ 254 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 255 } 256 PetscFunctionReturn(0); 257 } 258 259 PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]); 260 261 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc) 262 { 263 PC_GASM *osm = (PC_GASM*)pc->data; 264 MatPartitioning part; 265 MPI_Comm comm; 266 PetscMPIInt size; 267 PetscInt nlocalsubdomains,fromrows_localsize; 268 IS partitioning,fromrows,isn; 269 Vec outervec; 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 274 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 275 /* we do not need a hierarchical partitioning when 276 * the total number of subdomains is consistent with 277 * the number of MPI tasks. 278 * For the following cases, we do not need to use HP 279 * */ 280 if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0); 281 if (size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size); 282 nlocalsubdomains = size/osm->N; 283 osm->n = 1; 284 ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr); 285 ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr); 286 ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr); 287 ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr); 288 ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr); 289 ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr); 290 /* get new processor owner number of each vertex */ 291 ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr); 292 ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr); 293 ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr); 294 ierr = ISDestroy(&isn);CHKERRQ(ierr); 295 ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr); 296 ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr); 297 ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr); 298 ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr); 299 ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr); 300 ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr); 301 ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr); 302 ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr); 303 osm->permutationIS = fromrows; 304 osm->pcmat = pc->pmat; 305 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr); 306 pc->pmat = osm->permutationP; 307 ierr = VecDestroy(&outervec);CHKERRQ(ierr); 308 ierr = ISDestroy(&fromrows);CHKERRQ(ierr); 309 ierr = ISDestroy(&partitioning);CHKERRQ(ierr); 310 osm->n = PETSC_DETERMINE; 311 PetscFunctionReturn(0); 312 } 313 314 static PetscErrorCode PCSetUp_GASM(PC pc) 315 { 316 PC_GASM *osm = (PC_GASM*)pc->data; 317 PetscErrorCode ierr; 318 PetscInt i,nInnerIndices,nTotalInnerIndices; 319 PetscMPIInt rank, size; 320 MatReuse scall = MAT_REUSE_MATRIX; 321 KSP ksp; 322 PC subpc; 323 const char *prefix,*pprefix; 324 Vec x,y; 325 PetscInt oni; /* Number of indices in the i-th local outer subdomain. */ 326 const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */ 327 PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */ 328 PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */ 329 IS gois; /* Disjoint union the global indices of outer subdomains. */ 330 IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */ 331 PetscScalar *gxarray, *gyarray; 332 PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */ 333 PetscInt num_subdomains = 0; 334 DM *subdomain_dm = NULL; 335 char **subdomain_names = NULL; 336 PetscInt *numbering; 337 338 339 PetscFunctionBegin; 340 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 341 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 342 if (!pc->setupcalled) { 343 /* use a hierarchical partitioning */ 344 if (osm->hierarchicalpartitioning){ 345 ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr); 346 } 347 if (osm->n == PETSC_DETERMINE) { 348 if (osm->N != PETSC_DETERMINE) { 349 /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */ 350 ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr); 351 } else if (osm->dm_subdomains && pc->dm) { 352 /* try pc->dm next, if allowed */ 353 PetscInt d; 354 IS *inner_subdomain_is, *outer_subdomain_is; 355 ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr); 356 if (num_subdomains) { 357 ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr); 358 } 359 for (d = 0; d < num_subdomains; ++d) { 360 if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);} 361 if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);} 362 } 363 ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr); 364 ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr); 365 } else { 366 /* still no subdomains; use one per processor */ 367 osm->nmax = osm->n = 1; 368 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 369 osm->N = size; 370 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr); 371 } 372 } 373 if (!osm->iis) { 374 /* 375 osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied. 376 We create the requisite number of local inner subdomains and then expand them into 377 out subdomains, if necessary. 378 */ 379 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr); 380 } 381 if (!osm->ois) { 382 /* 383 Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap 384 has been requested, copy the inner subdomains over so they can be modified. 385 */ 386 ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr); 387 for (i=0; i<osm->n; ++i) { 388 if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */ 389 ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr); 390 ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr); 391 } else { 392 ierr = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr); 393 osm->ois[i] = osm->iis[i]; 394 } 395 } 396 if (osm->overlap>0 && osm->N>1) { 397 /* Extend the "overlapping" regions by a number of steps */ 398 ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr); 399 } 400 } 401 402 /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */ 403 if (osm->nmax == PETSC_DETERMINE) { 404 PetscMPIInt inwork,outwork; 405 /* determine global number of subdomains and the max number of local subdomains */ 406 inwork = osm->n; 407 ierr = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 408 osm->nmax = outwork; 409 } 410 if (osm->N == PETSC_DETERMINE) { 411 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */ 412 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr); 413 } 414 415 if (osm->sort_indices) { 416 for (i=0; i<osm->n; i++) { 417 ierr = ISSort(osm->ois[i]);CHKERRQ(ierr); 418 ierr = ISSort(osm->iis[i]);CHKERRQ(ierr); 419 } 420 } 421 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 422 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); 423 424 /* 425 Merge the ISs, create merged vectors and restrictions. 426 */ 427 /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */ 428 on = 0; 429 for (i=0; i<osm->n; i++) { 430 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 431 on += oni; 432 } 433 ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr); 434 on = 0; 435 /* Merge local indices together */ 436 for (i=0; i<osm->n; i++) { 437 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 438 ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 439 ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr); 440 ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr); 441 on += oni; 442 } 443 ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr); 444 nTotalInnerIndices = 0; 445 for (i=0; i<osm->n; i++){ 446 ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr); 447 nTotalInnerIndices += nInnerIndices; 448 } 449 ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr); 450 ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 451 452 ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr); 453 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 454 ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr); 455 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr); 456 /* gois might indices not on local */ 457 ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr); 458 ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr); 459 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr); 460 ierr = VecDestroy(&x);CHKERRQ(ierr); 461 ierr = ISDestroy(&gois);CHKERRQ(ierr); 462 463 /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */ 464 { 465 PetscInt ini; /* Number of indices the i-th a local inner subdomain. */ 466 PetscInt in; /* Number of indices in the disjoint union of local inner subdomains. */ 467 PetscInt *iidx; /* Global indices in the merged local inner subdomain. */ 468 PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 469 IS giis; /* IS for the disjoint union of inner subdomains. */ 470 IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */ 471 PetscScalar *array; 472 const PetscInt *indices; 473 PetscInt k; 474 on = 0; 475 for (i=0; i<osm->n; i++) { 476 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 477 on += oni; 478 } 479 ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr); 480 ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr); 481 ierr = VecGetArray(y,&array);CHKERRQ(ierr); 482 /* set communicator id to determine where overlap is */ 483 in = 0; 484 for (i=0; i<osm->n; i++) { 485 ierr = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr); 486 for (k = 0; k < ini; ++k){ 487 array[in+k] = numbering[i]; 488 } 489 in += ini; 490 } 491 ierr = VecRestoreArray(y,&array);CHKERRQ(ierr); 492 ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 494 ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr); 495 ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr); 496 on = 0; 497 in = 0; 498 for (i=0; i<osm->n; i++) { 499 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 500 ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr); 501 for (k=0; k<oni; k++) { 502 /* skip overlapping indices to get inner domain */ 503 if (PetscRealPart(array[on+k]) != numbering[i]) continue; 504 iidx[in] = indices[k]; 505 ioidx[in++] = gostart+on+k; 506 } 507 ierr = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr); 508 on += oni; 509 } 510 ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr); 511 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr); 512 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr); 513 ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr); 514 ierr = VecDestroy(&y);CHKERRQ(ierr); 515 ierr = ISDestroy(&giis);CHKERRQ(ierr); 516 ierr = ISDestroy(&giois);CHKERRQ(ierr); 517 } 518 ierr = ISDestroy(&goid);CHKERRQ(ierr); 519 ierr = PetscFree(numbering);CHKERRQ(ierr); 520 521 /* Create the subdomain work vectors. */ 522 ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr); 523 ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr); 524 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 525 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 526 for (i=0, on=0; i<osm->n; ++i, on += oni) { 527 PetscInt oNi; 528 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr); 529 /* on a sub communicator */ 530 ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr); 531 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr); 532 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr); 533 } 534 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 535 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 536 /* Create the subdomain solvers */ 537 ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr); 538 for (i=0; i<osm->n; i++) { 539 char subprefix[PETSC_MAX_PATH_LEN+1]; 540 ierr = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr); 541 ierr = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr); 542 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr); 543 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 544 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 545 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */ 546 if (subdomain_dm) { 547 ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr); 548 ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr); 549 } 550 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 551 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 552 if (subdomain_names && subdomain_names[i]) { 553 ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr); 554 ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr); 555 ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr); 556 } 557 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 558 osm->ksp[i] = ksp; 559 } 560 ierr = PetscFree(subdomain_dm);CHKERRQ(ierr); 561 ierr = PetscFree(subdomain_names);CHKERRQ(ierr); 562 scall = MAT_INITIAL_MATRIX; 563 } else { /* if (pc->setupcalled) */ 564 /* 565 Destroy the submatrices from the previous iteration 566 */ 567 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 568 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 569 scall = MAT_INITIAL_MATRIX; 570 } 571 if (osm->permutationIS){ 572 ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr); 573 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr); 574 osm->pcmat = pc->pmat; 575 pc->pmat = osm->permutationP; 576 } 577 } 578 579 /* 580 Extract the submatrices. 581 */ 582 if (size > 1) { 583 ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 584 } else { 585 ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr); 586 } 587 if (scall == MAT_INITIAL_MATRIX) { 588 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 589 for (i=0; i<osm->n; i++) { 590 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr); 591 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 592 } 593 } 594 595 /* Return control to the user so that the submatrices can be modified (e.g., to apply 596 different boundary conditions for the submatrices than for the global problem) */ 597 ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 598 599 /* 600 Loop over submatrices putting them into local ksps 601 */ 602 for (i=0; i<osm->n; i++) { 603 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr); 604 if (!pc->setupcalled) { 605 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 606 } 607 } 608 if (osm->pcmat){ 609 ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr); 610 pc->pmat = osm->pcmat; 611 osm->pcmat = NULL; 612 } 613 PetscFunctionReturn(0); 614 } 615 616 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 617 { 618 PC_GASM *osm = (PC_GASM*)pc->data; 619 PetscErrorCode ierr; 620 PetscInt i; 621 622 PetscFunctionBegin; 623 for (i=0; i<osm->n; i++) { 624 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 625 } 626 PetscFunctionReturn(0); 627 } 628 629 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout) 630 { 631 PC_GASM *osm = (PC_GASM*)pc->data; 632 PetscErrorCode ierr; 633 PetscInt i; 634 Vec x,y; 635 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 636 637 PetscFunctionBegin; 638 if (osm->pctoouter) { 639 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 640 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 641 x = osm->pcx; 642 y = osm->pcy; 643 } else { 644 x = xin; 645 y = yout; 646 } 647 /* 648 support for limiting the restriction or interpolation only to the inner 649 subdomain values (leaving the other values 0). 650 */ 651 if (!(osm->type & PC_GASM_RESTRICT)) { 652 /* have to zero the work RHS since scatter may leave some slots empty */ 653 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 654 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 655 } else { 656 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 657 } 658 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 659 if (!(osm->type & PC_GASM_RESTRICT)) { 660 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 661 } else { 662 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 663 } 664 /* do the subdomain solves */ 665 for (i=0; i<osm->n; ++i) { 666 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 667 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr); 668 } 669 /* do we need to zero y? */ 670 ierr = VecZeroEntries(y);CHKERRQ(ierr); 671 if (!(osm->type & PC_GASM_INTERPOLATE)) { 672 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 673 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 674 } else { 675 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 676 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 677 } 678 if (osm->pctoouter) { 679 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 680 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 681 } 682 PetscFunctionReturn(0); 683 } 684 685 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout) 686 { 687 PC_GASM *osm = (PC_GASM*)pc->data; 688 Mat X,Y,O=NULL,Z,W; 689 Vec x,y; 690 PetscInt i,m,M,N; 691 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 692 PetscErrorCode ierr; 693 694 PetscFunctionBegin; 695 if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 696 ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr); 697 if (osm->pctoouter) { 698 ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr); 699 ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr); 700 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr); 701 for (i = 0; i < N; ++i) { 702 ierr = MatDenseGetColumnVecRead(Xin,i,&x); 703 ierr = MatDenseGetColumnVecWrite(O,i,&y); 704 ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 705 ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 706 ierr = MatDenseRestoreColumnVecWrite(O,i,&y); 707 ierr = MatDenseRestoreColumnVecRead(Xin,i,&x); 708 } 709 X = Y = O; 710 } else { 711 X = Xin; 712 Y = Yout; 713 } 714 /* 715 support for limiting the restriction or interpolation only to the inner 716 subdomain values (leaving the other values 0). 717 */ 718 ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr); 719 ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr); 720 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr); 721 for (i = 0; i < N; ++i) { 722 ierr = MatDenseGetColumnVecRead(X,i,&x); 723 ierr = MatDenseGetColumnVecWrite(Z,i,&y); 724 if (!(osm->type & PC_GASM_RESTRICT)) { 725 /* have to zero the work RHS since scatter may leave some slots empty */ 726 ierr = VecZeroEntries(y);CHKERRQ(ierr); 727 ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 728 ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 729 } else { 730 ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 731 ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr); 732 } 733 ierr = MatDenseRestoreColumnVecWrite(Z,i,&y); 734 ierr = MatDenseRestoreColumnVecRead(X,i,&x); 735 } 736 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr); 737 ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 740 /* do the subdomain solve */ 741 ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr); 742 ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr); 743 ierr = MatDestroy(&Z);CHKERRQ(ierr); 744 /* do we need to zero y? */ 745 ierr = MatZeroEntries(Y);CHKERRQ(ierr); 746 for (i = 0; i < N; ++i) { 747 ierr = MatDenseGetColumnVecWrite(Y,i,&y); 748 ierr = MatDenseGetColumnVecRead(W,i,&x); 749 if (!(osm->type & PC_GASM_INTERPOLATE)) { 750 ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 751 ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 752 } else { 753 ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 754 ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr); 755 } 756 ierr = MatDenseRestoreColumnVecRead(W,i,&x); 757 if (osm->pctoouter) { 758 ierr = MatDenseGetColumnVecWrite(Yout,i,&x); 759 ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 760 ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 761 ierr = MatDenseRestoreColumnVecRead(Yout,i,&x); 762 } 763 ierr = MatDenseRestoreColumnVecWrite(Y,i,&y); 764 } 765 ierr = MatDestroy(&W);CHKERRQ(ierr); 766 ierr = MatDestroy(&O);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout) 771 { 772 PC_GASM *osm = (PC_GASM*)pc->data; 773 PetscErrorCode ierr; 774 PetscInt i; 775 Vec x,y; 776 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 777 778 PetscFunctionBegin; 779 if (osm->pctoouter){ 780 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 781 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 782 x = osm->pcx; 783 y = osm->pcy; 784 }else{ 785 x = xin; 786 y = yout; 787 } 788 /* 789 Support for limiting the restriction or interpolation to only local 790 subdomain values (leaving the other values 0). 791 792 Note: these are reversed from the PCApply_GASM() because we are applying the 793 transpose of the three terms 794 */ 795 if (!(osm->type & PC_GASM_INTERPOLATE)) { 796 /* have to zero the work RHS since scatter may leave some slots empty */ 797 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 798 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 799 } else { 800 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 801 } 802 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr); 803 if (!(osm->type & PC_GASM_INTERPOLATE)) { 804 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 805 } else { 806 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 807 } 808 /* do the local solves */ 809 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 810 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 811 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr); 812 } 813 ierr = VecZeroEntries(y);CHKERRQ(ierr); 814 if (!(osm->type & PC_GASM_RESTRICT)) { 815 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 816 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 817 } else { 818 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 819 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 820 } 821 if (osm->pctoouter){ 822 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 } 825 PetscFunctionReturn(0); 826 } 827 828 static PetscErrorCode PCReset_GASM(PC pc) 829 { 830 PC_GASM *osm = (PC_GASM*)pc->data; 831 PetscErrorCode ierr; 832 PetscInt i; 833 834 PetscFunctionBegin; 835 if (osm->ksp) { 836 for (i=0; i<osm->n; i++) { 837 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 838 } 839 } 840 if (osm->pmat) { 841 if (osm->n > 0) { 842 PetscMPIInt size; 843 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 844 if (size > 1) { 845 /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */ 846 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 847 } else { 848 ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 849 } 850 } 851 } 852 if (osm->x) { 853 for (i=0; i<osm->n; i++) { 854 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 855 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 856 } 857 } 858 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 859 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 860 861 ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr); 862 ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr); 863 if (!osm->user_subdomains) { 864 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr); 865 osm->N = PETSC_DETERMINE; 866 osm->nmax = PETSC_DETERMINE; 867 } 868 if (osm->pctoouter){ 869 ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr); 870 } 871 if (osm->permutationIS){ 872 ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr); 873 } 874 if (osm->pcx){ 875 ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr); 876 } 877 if (osm->pcy){ 878 ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr); 879 } 880 if (osm->permutationP){ 881 ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr); 882 } 883 if (osm->pcmat){ 884 ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 static PetscErrorCode PCDestroy_GASM(PC pc) 890 { 891 PC_GASM *osm = (PC_GASM*)pc->data; 892 PetscErrorCode ierr; 893 PetscInt i; 894 895 PetscFunctionBegin; 896 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 897 /* PCReset will not destroy subdomains, if user_subdomains is true. */ 898 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr); 899 if (osm->ksp) { 900 for (i=0; i<osm->n; i++) { 901 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 902 } 903 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 904 } 905 ierr = PetscFree(osm->x);CHKERRQ(ierr); 906 ierr = PetscFree(osm->y);CHKERRQ(ierr); 907 ierr = PetscFree(pc->data);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc) 912 { 913 PC_GASM *osm = (PC_GASM*)pc->data; 914 PetscErrorCode ierr; 915 PetscInt blocks,ovl; 916 PetscBool flg; 917 PCGASMType gasmtype; 918 919 PetscFunctionBegin; 920 ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr); 921 ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr); 922 ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr); 923 if (flg) { 924 ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); 925 } 926 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 927 if (flg) { 928 ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); 929 osm->dm_subdomains = PETSC_FALSE; 930 } 931 flg = PETSC_FALSE; 932 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 933 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);} 934 ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr); 935 ierr = PetscOptionsTail();CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 /*------------------------------------------------------------------------------------*/ 940 941 /*@ 942 PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the 943 communicator. 944 Logically collective on pc 945 946 Input Parameters: 947 + pc - the preconditioner 948 - N - total number of subdomains 949 950 951 Level: beginner 952 953 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap() 954 PCGASMCreateSubdomains2D() 955 @*/ 956 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 957 { 958 PC_GASM *osm = (PC_GASM*)pc->data; 959 PetscMPIInt size,rank; 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N); 964 if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp()."); 965 966 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 967 osm->ois = osm->iis = NULL; 968 969 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr); 970 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr); 971 osm->N = N; 972 osm->n = PETSC_DETERMINE; 973 osm->nmax = PETSC_DETERMINE; 974 osm->dm_subdomains = PETSC_FALSE; 975 PetscFunctionReturn(0); 976 } 977 978 static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[]) 979 { 980 PC_GASM *osm = (PC_GASM*)pc->data; 981 PetscErrorCode ierr; 982 PetscInt i; 983 984 PetscFunctionBegin; 985 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n); 986 if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp()."); 987 988 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr); 989 osm->iis = osm->ois = NULL; 990 osm->n = n; 991 osm->N = PETSC_DETERMINE; 992 osm->nmax = PETSC_DETERMINE; 993 if (ois) { 994 ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr); 995 for (i=0; i<n; i++) { 996 ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr); 997 osm->ois[i] = ois[i]; 998 } 999 /* 1000 Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(), 1001 it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1. 1002 */ 1003 osm->overlap = -1; 1004 /* inner subdomains must be provided */ 1005 if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n"); 1006 }/* end if */ 1007 if (iis) { 1008 ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr); 1009 for (i=0; i<n; i++) { 1010 ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr); 1011 osm->iis[i] = iis[i]; 1012 } 1013 if (!ois) { 1014 osm->ois = NULL; 1015 /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */ 1016 } 1017 } 1018 if (PetscDefined(USE_DEBUG)) { 1019 PetscInt j,rstart,rend,*covered,lsize; 1020 const PetscInt *indices; 1021 /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */ 1022 ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr); 1023 ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr); 1024 /* check if the current processor owns indices from others */ 1025 for (i=0; i<n; i++) { 1026 ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr); 1027 ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr); 1028 for (j=0; j<lsize; j++) { 1029 if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]); 1030 else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]); 1031 else covered[indices[j]-rstart] = 1; 1032 } 1033 ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr); 1034 } 1035 /* check if we miss any indices */ 1036 for (i=rstart; i<rend; i++) { 1037 if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i); 1038 } 1039 ierr = PetscFree(covered);CHKERRQ(ierr); 1040 } 1041 if (iis) osm->user_subdomains = PETSC_TRUE; 1042 PetscFunctionReturn(0); 1043 } 1044 1045 static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 1046 { 1047 PC_GASM *osm = (PC_GASM*)pc->data; 1048 1049 PetscFunctionBegin; 1050 if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 1051 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 1052 if (!pc->setupcalled) osm->overlap = ovl; 1053 PetscFunctionReturn(0); 1054 } 1055 1056 static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 1057 { 1058 PC_GASM *osm = (PC_GASM*)pc->data; 1059 1060 PetscFunctionBegin; 1061 osm->type = type; 1062 osm->type_set = PETSC_TRUE; 1063 PetscFunctionReturn(0); 1064 } 1065 1066 static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 1067 { 1068 PC_GASM *osm = (PC_GASM*)pc->data; 1069 1070 PetscFunctionBegin; 1071 osm->sort_indices = doSort; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 /* 1076 FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed. 1077 In particular, it would upset the global subdomain number calculation. 1078 */ 1079 static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 1080 { 1081 PC_GASM *osm = (PC_GASM*)pc->data; 1082 PetscErrorCode ierr; 1083 1084 PetscFunctionBegin; 1085 if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 1086 1087 if (n) *n = osm->n; 1088 if (first) { 1089 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1090 *first -= osm->n; 1091 } 1092 if (ksp) { 1093 /* Assume that local solves are now different; not necessarily 1094 true, though! This flag is used only for PCView_GASM() */ 1095 *ksp = osm->ksp; 1096 osm->same_subdomain_solvers = PETSC_FALSE; 1097 } 1098 PetscFunctionReturn(0); 1099 } /* PCGASMGetSubKSP_GASM() */ 1100 1101 /*@C 1102 PCGASMSetSubdomains - Sets the subdomains for this processor 1103 for the additive Schwarz preconditioner. 1104 1105 Collective on pc 1106 1107 Input Parameters: 1108 + pc - the preconditioner object 1109 . n - the number of subdomains for this processor 1110 . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains) 1111 - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap) 1112 1113 Notes: 1114 The IS indices use the parallel, global numbering of the vector entries. 1115 Inner subdomains are those where the correction is applied. 1116 Outer subdomains are those where the residual necessary to obtain the 1117 corrections is obtained (see PCGASMType for the use of inner/outer subdomains). 1118 Both inner and outer subdomains can extend over several processors. 1119 This processor's portion of a subdomain is known as a local subdomain. 1120 1121 Inner subdomains can not overlap with each other, do not have any entities from remote processors, 1122 and have to cover the entire local subdomain owned by the current processor. The index sets on each 1123 process should be ordered such that the ith local subdomain is connected to the ith remote subdomain 1124 on another MPI process. 1125 1126 By default the GASM preconditioner uses 1 (local) subdomain per processor. 1127 1128 1129 Level: advanced 1130 1131 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1132 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 1133 @*/ 1134 PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[]) 1135 { 1136 PC_GASM *osm = (PC_GASM*)pc->data; 1137 PetscErrorCode ierr; 1138 1139 PetscFunctionBegin; 1140 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1141 ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr); 1142 osm->dm_subdomains = PETSC_FALSE; 1143 PetscFunctionReturn(0); 1144 } 1145 1146 /*@ 1147 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 1148 additive Schwarz preconditioner. Either all or no processors in the 1149 pc communicator must call this routine. 1150 1151 Logically Collective on pc 1152 1153 Input Parameters: 1154 + pc - the preconditioner context 1155 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0) 1156 1157 Options Database Key: 1158 . -pc_gasm_overlap <overlap> - Sets overlap 1159 1160 Notes: 1161 By default the GASM preconditioner uses 1 subdomain per processor. To use 1162 multiple subdomain per perocessor or "straddling" subdomains that intersect 1163 multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>). 1164 1165 The overlap defaults to 0, so if one desires that no additional 1166 overlap be computed beyond what may have been set with a call to 1167 PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does 1168 not explicitly set the subdomains in application code, then all overlap would be computed 1169 internally by PETSc, and using an overlap of 0 would result in an GASM 1170 variant that is equivalent to the block Jacobi preconditioner. 1171 1172 Note that one can define initial index sets with any overlap via 1173 PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows 1174 PETSc to extend that overlap further, if desired. 1175 1176 Level: intermediate 1177 1178 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1179 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains() 1180 @*/ 1181 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 1182 { 1183 PetscErrorCode ierr; 1184 PC_GASM *osm = (PC_GASM*)pc->data; 1185 1186 PetscFunctionBegin; 1187 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1188 PetscValidLogicalCollectiveInt(pc,ovl,2); 1189 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 1190 osm->dm_subdomains = PETSC_FALSE; 1191 PetscFunctionReturn(0); 1192 } 1193 1194 /*@ 1195 PCGASMSetType - Sets the type of restriction and interpolation used 1196 for local problems in the additive Schwarz method. 1197 1198 Logically Collective on PC 1199 1200 Input Parameters: 1201 + pc - the preconditioner context 1202 - type - variant of GASM, one of 1203 .vb 1204 PC_GASM_BASIC - full interpolation and restriction 1205 PC_GASM_RESTRICT - full restriction, local processor interpolation 1206 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 1207 PC_GASM_NONE - local processor restriction and interpolation 1208 .ve 1209 1210 Options Database Key: 1211 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1212 1213 Level: intermediate 1214 1215 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1216 PCGASMCreateSubdomains2D() 1217 @*/ 1218 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 1219 { 1220 PetscErrorCode ierr; 1221 1222 PetscFunctionBegin; 1223 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1224 PetscValidLogicalCollectiveEnum(pc,type,2); 1225 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 /*@ 1230 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 1231 1232 Logically Collective on PC 1233 1234 Input Parameters: 1235 + pc - the preconditioner context 1236 - doSort - sort the subdomain indices 1237 1238 Level: intermediate 1239 1240 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), 1241 PCGASMCreateSubdomains2D() 1242 @*/ 1243 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 1244 { 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1249 PetscValidLogicalCollectiveBool(pc,doSort,2); 1250 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 /*@C 1255 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 1256 this processor. 1257 1258 Collective on PC iff first_local is requested 1259 1260 Input Parameter: 1261 . pc - the preconditioner context 1262 1263 Output Parameters: 1264 + n_local - the number of blocks on this processor or NULL 1265 . first_local - the global number of the first block on this processor or NULL, 1266 all processors must request or all must pass NULL 1267 - ksp - the array of KSP contexts 1268 1269 Note: 1270 After PCGASMGetSubKSP() the array of KSPes is not to be freed 1271 1272 Currently for some matrix implementations only 1 block per processor 1273 is supported. 1274 1275 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 1276 1277 Level: advanced 1278 1279 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(), 1280 PCGASMCreateSubdomains2D(), 1281 @*/ 1282 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 1283 { 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1288 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 /* -------------------------------------------------------------------------------------*/ 1293 /*MC 1294 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1295 its own KSP object. 1296 1297 Options Database Keys: 1298 + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors 1299 . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view 1300 . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp() 1301 . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains 1302 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1303 1304 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1305 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1306 -pc_gasm_type basic to use the standard GASM. 1307 1308 Notes: 1309 Blocks can be shared by multiple processes. 1310 1311 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1312 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1313 1314 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1315 and set the options directly on the resulting KSP object (you can access its PC 1316 with KSPGetPC()) 1317 1318 1319 Level: beginner 1320 1321 References: 1322 + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions 1323 Courant Institute, New York University Technical report 1324 - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1325 Cambridge University Press. 1326 1327 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1328 PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(), 1329 PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType() 1330 1331 M*/ 1332 1333 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc) 1334 { 1335 PetscErrorCode ierr; 1336 PC_GASM *osm; 1337 1338 PetscFunctionBegin; 1339 ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr); 1340 1341 osm->N = PETSC_DETERMINE; 1342 osm->n = PETSC_DECIDE; 1343 osm->nmax = PETSC_DETERMINE; 1344 osm->overlap = 0; 1345 osm->ksp = NULL; 1346 osm->gorestriction = NULL; 1347 osm->girestriction = NULL; 1348 osm->pctoouter = NULL; 1349 osm->gx = NULL; 1350 osm->gy = NULL; 1351 osm->x = NULL; 1352 osm->y = NULL; 1353 osm->pcx = NULL; 1354 osm->pcy = NULL; 1355 osm->permutationIS = NULL; 1356 osm->permutationP = NULL; 1357 osm->pcmat = NULL; 1358 osm->ois = NULL; 1359 osm->iis = NULL; 1360 osm->pmat = NULL; 1361 osm->type = PC_GASM_RESTRICT; 1362 osm->same_subdomain_solvers = PETSC_TRUE; 1363 osm->sort_indices = PETSC_TRUE; 1364 osm->dm_subdomains = PETSC_FALSE; 1365 osm->hierarchicalpartitioning = PETSC_FALSE; 1366 1367 pc->data = (void*)osm; 1368 pc->ops->apply = PCApply_GASM; 1369 pc->ops->matapply = PCMatApply_GASM; 1370 pc->ops->applytranspose = PCApplyTranspose_GASM; 1371 pc->ops->setup = PCSetUp_GASM; 1372 pc->ops->reset = PCReset_GASM; 1373 pc->ops->destroy = PCDestroy_GASM; 1374 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1375 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1376 pc->ops->view = PCView_GASM; 1377 pc->ops->applyrichardson = NULL; 1378 1379 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr); 1380 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1381 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr); 1382 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1383 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1384 PetscFunctionReturn(0); 1385 } 1386 1387 PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]) 1388 { 1389 MatPartitioning mpart; 1390 const char *prefix; 1391 PetscInt i,j,rstart,rend,bs; 1392 PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1393 Mat Ad = NULL, adj; 1394 IS ispart,isnumb,*is; 1395 PetscErrorCode ierr; 1396 1397 PetscFunctionBegin; 1398 if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc); 1399 1400 /* Get prefix, row distribution, and block size */ 1401 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1402 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1403 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1404 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1405 1406 /* Get diagonal block from matrix if possible */ 1407 ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1408 if (hasop) { 1409 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1410 } 1411 if (Ad) { 1412 ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1413 if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1414 } 1415 if (Ad && nloc > 1) { 1416 PetscBool match,done; 1417 /* Try to setup a good matrix partitioning if available */ 1418 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1419 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1420 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1421 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1422 if (!match) { 1423 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1424 } 1425 if (!match) { /* assume a "good" partitioner is available */ 1426 PetscInt na; 1427 const PetscInt *ia,*ja; 1428 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1429 if (done) { 1430 /* Build adjacency matrix by hand. Unfortunately a call to 1431 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1432 remove the block-aij structure and we cannot expect 1433 MatPartitioning to split vertices as we need */ 1434 PetscInt i,j,len,nnz,cnt,*iia=NULL,*jja=NULL; 1435 const PetscInt *row; 1436 nnz = 0; 1437 for (i=0; i<na; i++) { /* count number of nonzeros */ 1438 len = ia[i+1] - ia[i]; 1439 row = ja + ia[i]; 1440 for (j=0; j<len; j++) { 1441 if (row[j] == i) { /* don't count diagonal */ 1442 len--; break; 1443 } 1444 } 1445 nnz += len; 1446 } 1447 ierr = PetscMalloc1(na+1,&iia);CHKERRQ(ierr); 1448 ierr = PetscMalloc1(nnz,&jja);CHKERRQ(ierr); 1449 nnz = 0; 1450 iia[0] = 0; 1451 for (i=0; i<na; i++) { /* fill adjacency */ 1452 cnt = 0; 1453 len = ia[i+1] - ia[i]; 1454 row = ja + ia[i]; 1455 for (j=0; j<len; j++) { 1456 if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */ 1457 } 1458 nnz += cnt; 1459 iia[i+1] = nnz; 1460 } 1461 /* Partitioning of the adjacency matrix */ 1462 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr); 1463 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1464 ierr = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr); 1465 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1466 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1467 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1468 foundpart = PETSC_TRUE; 1469 } 1470 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1471 } 1472 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1473 } 1474 ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr); 1475 if (!foundpart) { 1476 1477 /* Partitioning by contiguous chunks of rows */ 1478 1479 PetscInt mbs = (rend-rstart)/bs; 1480 PetscInt start = rstart; 1481 for (i=0; i<nloc; i++) { 1482 PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs; 1483 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1484 start += count; 1485 } 1486 1487 } else { 1488 1489 /* Partitioning by adjacency of diagonal block */ 1490 1491 const PetscInt *numbering; 1492 PetscInt *count,nidx,*indices,*newidx,start=0; 1493 /* Get node count in each partition */ 1494 ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr); 1495 ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr); 1496 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1497 for (i=0; i<nloc; i++) count[i] *= bs; 1498 } 1499 /* Build indices from node numbering */ 1500 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1501 ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr); 1502 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1503 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1504 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1505 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1506 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1507 ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr); 1508 for (i=0; i<nidx; i++) { 1509 for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j; 1510 } 1511 ierr = PetscFree(indices);CHKERRQ(ierr); 1512 nidx *= bs; 1513 indices = newidx; 1514 } 1515 /* Shift to get global indices */ 1516 for (i=0; i<nidx; i++) indices[i] += rstart; 1517 1518 /* Build the index sets for each block */ 1519 for (i=0; i<nloc; i++) { 1520 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1521 ierr = ISSort(is[i]);CHKERRQ(ierr); 1522 start += count[i]; 1523 } 1524 1525 ierr = PetscFree(count);CHKERRQ(ierr); 1526 ierr = PetscFree(indices);CHKERRQ(ierr); 1527 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1528 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1529 } 1530 *iis = is; 1531 PetscFunctionReturn(0); 1532 } 1533 1534 PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[]) 1535 { 1536 PetscErrorCode ierr; 1537 1538 PetscFunctionBegin; 1539 ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 /*@C 1544 PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive 1545 Schwarz preconditioner for a any problem based on its matrix. 1546 1547 Collective 1548 1549 Input Parameters: 1550 + A - The global matrix operator 1551 - N - the number of global subdomains requested 1552 1553 Output Parameters: 1554 + n - the number of subdomains created on this processor 1555 - iis - the array of index sets defining the local inner subdomains (on which the correction is applied) 1556 1557 Level: advanced 1558 1559 Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor. 1560 When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local. 1561 The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping 1562 outer subdomains will be automatically generated from these according to the requested amount of 1563 overlap; this is currently supported only with local subdomains. 1564 1565 1566 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains() 1567 @*/ 1568 PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[]) 1569 { 1570 PetscMPIInt size; 1571 PetscErrorCode ierr; 1572 1573 PetscFunctionBegin; 1574 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1575 PetscValidPointer(iis,4); 1576 1577 if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N); 1578 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 1579 if (N >= size) { 1580 *n = N/size + (N%size); 1581 ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr); 1582 } else { 1583 ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 /*@C 1589 PCGASMDestroySubdomains - Destroys the index sets created with 1590 PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be 1591 called after setting subdomains with PCGASMSetSubdomains(). 1592 1593 Collective 1594 1595 Input Parameters: 1596 + n - the number of index sets 1597 . iis - the array of inner subdomains, 1598 - ois - the array of outer subdomains, can be NULL 1599 1600 Level: intermediate 1601 1602 Notes: 1603 this is merely a convenience subroutine that walks each list, 1604 destroys each IS on the list, and then frees the list. At the end the 1605 list pointers are set to NULL. 1606 1607 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains() 1608 @*/ 1609 PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois) 1610 { 1611 PetscInt i; 1612 PetscErrorCode ierr; 1613 1614 PetscFunctionBegin; 1615 if (n <= 0) PetscFunctionReturn(0); 1616 if (ois) { 1617 PetscValidPointer(ois,3); 1618 if (*ois) { 1619 PetscValidPointer(*ois,3); 1620 for (i=0; i<n; i++) { 1621 ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr); 1622 } 1623 ierr = PetscFree((*ois));CHKERRQ(ierr); 1624 } 1625 } 1626 if (iis) { 1627 PetscValidPointer(iis,2); 1628 if (*iis) { 1629 PetscValidPointer(*iis,2); 1630 for (i=0; i<n; i++) { 1631 ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr); 1632 } 1633 ierr = PetscFree((*iis));CHKERRQ(ierr); 1634 } 1635 } 1636 PetscFunctionReturn(0); 1637 } 1638 1639 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1640 { \ 1641 PetscInt first_row = first/M, last_row = last/M+1; \ 1642 /* \ 1643 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1644 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1645 subdomain). \ 1646 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1647 of the intersection. \ 1648 */ \ 1649 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1650 *ylow_loc = PetscMax(first_row,ylow); \ 1651 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1652 *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \ 1653 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1654 *yhigh_loc = PetscMin(last_row,yhigh); \ 1655 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1656 *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \ 1657 /* Now compute the size of the local subdomain n. */ \ 1658 *n = 0; \ 1659 if (*ylow_loc < *yhigh_loc) { \ 1660 PetscInt width = xright-xleft; \ 1661 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1662 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1663 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1664 } \ 1665 } 1666 1667 /*@ 1668 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1669 preconditioner for a two-dimensional problem on a regular grid. 1670 1671 Collective 1672 1673 Input Parameters: 1674 + M, N - the global number of grid points in the x and y directions 1675 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1676 . dof - degrees of freedom per node 1677 - overlap - overlap in mesh lines 1678 1679 Output Parameters: 1680 + Nsub - the number of local subdomains created 1681 . iis - array of index sets defining inner (nonoverlapping) subdomains 1682 - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains 1683 1684 1685 Level: advanced 1686 1687 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap() 1688 @*/ 1689 PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois) 1690 { 1691 PetscErrorCode ierr; 1692 PetscMPIInt size, rank; 1693 PetscInt i, j; 1694 PetscInt maxheight, maxwidth; 1695 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1696 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1697 PetscInt x[2][2], y[2][2], n[2]; 1698 PetscInt first, last; 1699 PetscInt nidx, *idx; 1700 PetscInt ii,jj,s,q,d; 1701 PetscInt k,kk; 1702 PetscMPIInt color; 1703 MPI_Comm comm, subcomm; 1704 IS **xis = NULL, **is = ois, **is_local = iis; 1705 1706 PetscFunctionBegin; 1707 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1708 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 1709 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 1710 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1711 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1712 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1713 1714 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1715 s = 0; 1716 ystart = 0; 1717 for (j=0; j<Ndomains; ++j) { 1718 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1719 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1720 /* Vertical domain limits with an overlap. */ 1721 ylow = PetscMax(ystart - overlap,0); 1722 yhigh = PetscMin(ystart + maxheight + overlap,N); 1723 xstart = 0; 1724 for (i=0; i<Mdomains; ++i) { 1725 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1726 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1727 /* Horizontal domain limits with an overlap. */ 1728 xleft = PetscMax(xstart - overlap,0); 1729 xright = PetscMin(xstart + maxwidth + overlap,M); 1730 /* 1731 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1732 */ 1733 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1734 if (nidx) ++s; 1735 xstart += maxwidth; 1736 } /* for (i = 0; i < Mdomains; ++i) */ 1737 ystart += maxheight; 1738 } /* for (j = 0; j < Ndomains; ++j) */ 1739 1740 /* Now we can allocate the necessary number of ISs. */ 1741 *nsub = s; 1742 ierr = PetscMalloc1(*nsub,is);CHKERRQ(ierr); 1743 ierr = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr); 1744 s = 0; 1745 ystart = 0; 1746 for (j=0; j<Ndomains; ++j) { 1747 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1748 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1749 /* Vertical domain limits with an overlap. */ 1750 y[0][0] = PetscMax(ystart - overlap,0); 1751 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1752 /* Vertical domain limits without an overlap. */ 1753 y[1][0] = ystart; 1754 y[1][1] = PetscMin(ystart + maxheight,N); 1755 xstart = 0; 1756 for (i=0; i<Mdomains; ++i) { 1757 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1758 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1759 /* Horizontal domain limits with an overlap. */ 1760 x[0][0] = PetscMax(xstart - overlap,0); 1761 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1762 /* Horizontal domain limits without an overlap. */ 1763 x[1][0] = xstart; 1764 x[1][1] = PetscMin(xstart+maxwidth,M); 1765 /* 1766 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1767 Do this twice: first for the domains with overlaps, and once without. 1768 During the first pass create the subcommunicators, and use them on the second pass as well. 1769 */ 1770 for (q = 0; q < 2; ++q) { 1771 PetscBool split = PETSC_FALSE; 1772 /* 1773 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1774 according to whether the domain with an overlap or without is considered. 1775 */ 1776 xleft = x[q][0]; xright = x[q][1]; 1777 ylow = y[q][0]; yhigh = y[q][1]; 1778 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1779 nidx *= dof; 1780 n[q] = nidx; 1781 /* 1782 Based on the counted number of indices in the local domain *with an overlap*, 1783 construct a subcommunicator of all the processors supporting this domain. 1784 Observe that a domain with an overlap might have nontrivial local support, 1785 while the domain without an overlap might not. Hence, the decision to participate 1786 in the subcommunicator must be based on the domain with an overlap. 1787 */ 1788 if (q == 0) { 1789 if (nidx) color = 1; 1790 else color = MPI_UNDEFINED; 1791 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRMPI(ierr); 1792 split = PETSC_TRUE; 1793 } 1794 /* 1795 Proceed only if the number of local indices *with an overlap* is nonzero. 1796 */ 1797 if (n[0]) { 1798 if (q == 0) xis = is; 1799 if (q == 1) { 1800 /* 1801 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1802 Moreover, if the overlap is zero, the two ISs are identical. 1803 */ 1804 if (overlap == 0) { 1805 (*is_local)[s] = (*is)[s]; 1806 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1807 continue; 1808 } else { 1809 xis = is_local; 1810 subcomm = ((PetscObject)(*is)[s])->comm; 1811 } 1812 } /* if (q == 1) */ 1813 idx = NULL; 1814 ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr); 1815 if (nidx) { 1816 k = 0; 1817 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1818 PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft; 1819 PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright; 1820 kk = dof*(M*jj + x0); 1821 for (ii=x0; ii<x1; ++ii) { 1822 for (d = 0; d < dof; ++d) { 1823 idx[k++] = kk++; 1824 } 1825 } 1826 } 1827 } 1828 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr); 1829 if (split) { 1830 ierr = MPI_Comm_free(&subcomm);CHKERRMPI(ierr); 1831 } 1832 }/* if (n[0]) */ 1833 }/* for (q = 0; q < 2; ++q) */ 1834 if (n[0]) ++s; 1835 xstart += maxwidth; 1836 } /* for (i = 0; i < Mdomains; ++i) */ 1837 ystart += maxheight; 1838 } /* for (j = 0; j < Ndomains; ++j) */ 1839 PetscFunctionReturn(0); 1840 } 1841 1842 /*@C 1843 PCGASMGetSubdomains - Gets the subdomains supported on this processor 1844 for the additive Schwarz preconditioner. 1845 1846 Not Collective 1847 1848 Input Parameter: 1849 . pc - the preconditioner context 1850 1851 Output Parameters: 1852 + n - the number of subdomains for this processor (default value = 1) 1853 . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL) 1854 - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL) 1855 1856 1857 Notes: 1858 The user is responsible for destroying the ISs and freeing the returned arrays. 1859 The IS numbering is in the parallel, global numbering of the vector. 1860 1861 Level: advanced 1862 1863 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(), 1864 PCGASMSetSubdomains(), PCGASMGetSubmatrices() 1865 @*/ 1866 PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[]) 1867 { 1868 PC_GASM *osm; 1869 PetscErrorCode ierr; 1870 PetscBool match; 1871 PetscInt i; 1872 1873 PetscFunctionBegin; 1874 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1875 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1876 if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1877 osm = (PC_GASM*)pc->data; 1878 if (n) *n = osm->n; 1879 if (iis) { 1880 ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr); 1881 } 1882 if (ois) { 1883 ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr); 1884 } 1885 if (iis || ois) { 1886 for (i = 0; i < osm->n; ++i) { 1887 if (iis) (*iis)[i] = osm->iis[i]; 1888 if (ois) (*ois)[i] = osm->ois[i]; 1889 } 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 /*@C 1895 PCGASMGetSubmatrices - Gets the local submatrices (for this processor 1896 only) for the additive Schwarz preconditioner. 1897 1898 Not Collective 1899 1900 Input Parameter: 1901 . pc - the preconditioner context 1902 1903 Output Parameters: 1904 + n - the number of matrices for this processor (default value = 1) 1905 - mat - the matrices 1906 1907 Notes: 1908 matrices returned by this routine have the same communicators as the index sets (IS) 1909 used to define subdomains in PCGASMSetSubdomains() 1910 Level: advanced 1911 1912 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), 1913 PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains() 1914 @*/ 1915 PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1916 { 1917 PC_GASM *osm; 1918 PetscErrorCode ierr; 1919 PetscBool match; 1920 1921 PetscFunctionBegin; 1922 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1923 PetscValidIntPointer(n,2); 1924 if (mat) PetscValidPointer(mat,3); 1925 if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp()."); 1926 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1927 if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name); 1928 osm = (PC_GASM*)pc->data; 1929 if (n) *n = osm->n; 1930 if (mat) *mat = osm->pmat; 1931 PetscFunctionReturn(0); 1932 } 1933 1934 /*@ 1935 PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1936 Logically Collective 1937 1938 Input Parameter: 1939 + pc - the preconditioner 1940 - flg - boolean indicating whether to use subdomains defined by the DM 1941 1942 Options Database Key: 1943 . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains 1944 1945 Level: intermediate 1946 1947 Notes: 1948 PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(), 1949 so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap() 1950 automatically turns the latter off. 1951 1952 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap() 1953 PCGASMCreateSubdomains2D() 1954 @*/ 1955 PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg) 1956 { 1957 PC_GASM *osm = (PC_GASM*)pc->data; 1958 PetscErrorCode ierr; 1959 PetscBool match; 1960 1961 PetscFunctionBegin; 1962 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1963 PetscValidLogicalCollectiveBool(pc,flg,2); 1964 if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC."); 1965 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1966 if (match) { 1967 if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) { 1968 osm->dm_subdomains = flg; 1969 } 1970 } 1971 PetscFunctionReturn(0); 1972 } 1973 1974 /*@ 1975 PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1976 Not Collective 1977 1978 Input Parameter: 1979 . pc - the preconditioner 1980 1981 Output Parameter: 1982 . flg - boolean indicating whether to use subdomains defined by the DM 1983 1984 Level: intermediate 1985 1986 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap() 1987 PCGASMCreateSubdomains2D() 1988 @*/ 1989 PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg) 1990 { 1991 PC_GASM *osm = (PC_GASM*)pc->data; 1992 PetscErrorCode ierr; 1993 PetscBool match; 1994 1995 PetscFunctionBegin; 1996 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1997 PetscValidBoolPointer(flg,2); 1998 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1999 if (match) { 2000 if (flg) *flg = osm->dm_subdomains; 2001 } 2002 PetscFunctionReturn(0); 2003 } 2004