1 2 /* 3 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 4 In this version each processor may have any number of subdomains and a subdomain may live on multiple 5 processors. 6 7 N - total number of subdomains on all processors 8 n - actual number of subdomains on this processor 9 nmax - maximum number of subdomains per processor 10 */ 11 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 12 13 typedef struct { 14 PetscInt N,n,nmax; 15 PetscInt overlap; /* overlap requested by user */ 16 KSP *ksp; /* linear solvers for each block */ 17 Vec gx,gy; /* Merged work vectors */ 18 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 19 IS gis, gis_local; /* merged ISs */ 20 VecScatter grestriction; /* merged restriction */ 21 VecScatter gprolongation; /* merged prolongation */ 22 IS *is; /* index set that defines each overlapping subdomain */ 23 IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */ 24 Mat *pmat; /* subdomain block matrices */ 25 PCGASMType type; /* use reduced interpolation, restriction or both */ 26 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 27 PetscBool same_local_solves; /* flag indicating whether all local solvers are same */ 28 PetscBool sort_indices; /* flag to sort subdomain indices */ 29 } PC_GASM; 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCGASMPrintSubdomains" 33 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 34 { 35 PC_GASM *osm = (PC_GASM*)pc->data; 36 const char *prefix; 37 char fname[PETSC_MAX_PATH_LEN+1]; 38 PetscViewer viewer, sviewer; 39 PetscInt i,j,nidx; 40 const PetscInt *idx; 41 char *cidx; 42 PetscBool found; 43 PetscMPIInt size, rank; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 48 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 49 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 50 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 51 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 52 for (i=0;i<osm->n;++i) { 53 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 54 ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr); 55 /* 56 No more than 15 characters per index plus a space. 57 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 58 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 59 For nidx == 0, the whole string 16 '\0'. 60 */ 61 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 62 ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr); 63 ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr); 64 for(j = 0; j < nidx; ++j) { 65 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 66 } 67 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 68 ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr); 69 70 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 71 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr); 72 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 73 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 74 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 75 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 76 ierr = PetscFree(cidx);CHKERRQ(ierr); 77 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 78 if (osm->is_local) { 79 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 80 ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr); 81 /* 82 No more than 15 characters per index plus a space. 83 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 84 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 85 For nidx == 0, the whole string 16 '\0'. 86 */ 87 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 88 ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr); 89 ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr); 90 for(j = 0; j < nidx; ++j) { 91 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 92 } 93 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 94 ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 95 96 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 97 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr); 98 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 99 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 100 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 101 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 102 ierr = PetscFree(cidx);CHKERRQ(ierr); 103 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 104 } 105 } 106 PetscFunctionReturn(0); 107 } 108 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCView_GASM" 112 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 113 { 114 PC_GASM *osm = (PC_GASM*)pc->data; 115 const char *prefix; 116 PetscErrorCode ierr; 117 PetscMPIInt rank, size; 118 PetscInt i,bsz; 119 PetscBool iascii,isstring, print_subdomains=PETSC_FALSE; 120 PetscViewer sviewer; 121 122 123 PetscFunctionBegin; 124 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 125 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 126 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 127 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 128 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 129 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr); 130 if (iascii) { 131 char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set"; 132 if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);} 133 if (osm->nmax > 0) {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);} 134 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 135 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 136 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 137 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 138 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr); 139 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 140 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 141 if (osm->same_local_solves) { 142 if (osm->ksp) { 143 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr); 144 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 145 if (!rank) { 146 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 147 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 148 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 149 } 150 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 151 } 152 } else { 153 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 154 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr); 155 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 156 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 157 for (i=0; i<osm->nmax; i++) { 158 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 159 if (i < osm->n) { 160 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 161 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr); 162 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr); 163 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 164 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 165 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 166 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr); 167 } 168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 169 } 170 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 171 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 172 } 173 } else if (isstring) { 174 ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr); 175 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 176 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 177 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 178 } else { 179 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 180 } 181 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 182 if(print_subdomains) { 183 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 189 190 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "PCSetUp_GASM" 194 static PetscErrorCode PCSetUp_GASM(PC pc) 195 { 196 PC_GASM *osm = (PC_GASM*)pc->data; 197 PetscErrorCode ierr; 198 PetscBool symset,flg; 199 PetscInt i; 200 PetscMPIInt rank, size; 201 MatReuse scall = MAT_REUSE_MATRIX; 202 KSP ksp; 203 PC subpc; 204 const char *prefix,*pprefix; 205 PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */ 206 const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */ 207 PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */ 208 PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */ 209 IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */ 210 Vec x,y; 211 PetscScalar *gxarray, *gyarray; 212 PetscInt gfirst, glast; 213 DM *domain_dm = PETSC_NULL; 214 215 PetscFunctionBegin; 216 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 217 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 218 if (!pc->setupcalled) { 219 220 if (!osm->type_set) { 221 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 222 if (symset && flg) { osm->type = PC_GASM_BASIC; } 223 } 224 225 if (osm->N == PETSC_DECIDE && osm->n < 1) { 226 /* no subdomains given, try pc->dm */ 227 if(pc->dm) { 228 char ddm_name[1024]; 229 DM ddm; 230 PetscBool flg; 231 PetscInt num_domains, d; 232 char **domain_names; 233 IS *domain_is; 234 /* Allow the user to request a decomposition DM by name */ 235 ierr = PetscStrncpy(ddm_name, "", 1024); CHKERRQ(ierr); 236 ierr = PetscOptionsString("-pc_asm_decomposition_dm", "Name of the DM defining the composition", "PCSetDM", ddm_name, ddm_name,1024,&flg); CHKERRQ(ierr); 237 if(flg) { 238 ierr = DMCreateDecompositionDM(pc->dm, ddm_name, &ddm); CHKERRQ(ierr); 239 if(!ddm) { 240 SETERRQ1(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Uknown DM decomposition name %s", ddm_name); 241 } 242 ierr = PetscInfo(pc,"Using decomposition DM defined using options database\n");CHKERRQ(ierr); 243 ierr = PCSetDM(pc,ddm); CHKERRQ(ierr); 244 } 245 ierr = DMCreateDecomposition(pc->dm, &num_domains, &domain_names, &domain_is, &domain_dm); CHKERRQ(ierr); 246 ierr = PCGASMSetLocalSubdomains(pc, num_domains, domain_is, PETSC_NULL);CHKERRQ(ierr); 247 for(d = 0; d < num_domains; ++d) { 248 ierr = PetscFree(domain_names[d]); CHKERRQ(ierr); 249 ierr = ISDestroy(&domain_is[d]); CHKERRQ(ierr); 250 } 251 ierr = PetscFree(domain_names);CHKERRQ(ierr); 252 ierr = PetscFree(domain_is);CHKERRQ(ierr); 253 } 254 else { /* no dm, use one per processor */ 255 osm->nmax = osm->n = 1; 256 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 257 osm->N = size; 258 } 259 } else if (osm->N == PETSC_DECIDE) { 260 PetscInt inwork[2], outwork[2]; 261 /* determine global number of subdomains and the max number of local subdomains */ 262 inwork[0] = inwork[1] = osm->n; 263 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 264 osm->nmax = outwork[0]; 265 osm->N = outwork[1]; 266 } 267 if (!osm->is){ /* create the index sets */ 268 ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr); 269 } 270 if (!osm->is_local) { 271 /* 272 This indicates that osm->is should define a nonoverlapping decomposition 273 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains, 274 but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created 275 via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition). 276 Therefore, osm->is will be used to define osm->is_local. 277 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap, 278 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping. 279 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is. 280 */ 281 ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 282 for (i=0; i<osm->n; i++) { 283 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 284 ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr); 285 ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr); 286 } else { 287 ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr); 288 osm->is_local[i] = osm->is[i]; 289 } 290 } 291 } 292 /* Beyond this point osm->is_local is not null. */ 293 if (osm->overlap > 0) { 294 /* Extend the "overlapping" regions by a number of steps */ 295 ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr); 296 } 297 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 298 flg = PETSC_FALSE; 299 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 300 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 301 302 if (osm->sort_indices) { 303 for (i=0; i<osm->n; i++) { 304 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 305 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 306 } 307 } 308 /* Merge the ISs, create merged vectors and scatter contexts. */ 309 /* Restriction ISs. */ 310 ddn = 0; 311 for (i=0; i<osm->n; i++) { 312 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 313 ddn += dn; 314 } 315 ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr); 316 ddn = 0; 317 for (i=0; i<osm->n; i++) { 318 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 319 ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr); 320 ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr); 321 ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr); 322 ddn += dn; 323 } 324 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr); 325 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 326 ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr); 327 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 328 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 329 ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr); 330 ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr); 331 ierr = ISDestroy(&gid);CHKERRQ(ierr); 332 /* Prolongation ISs */ 333 { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */ 334 const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */ 335 PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */ 336 PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */ 337 /**/ 338 ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */ 339 PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */ 340 PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */ 341 IS gis_llocal; /* IS with ddidx_llocal indices. */ 342 PetscInt j; 343 ddn_local = 0; 344 for (i=0; i<osm->n; i++) { 345 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 346 ddn_local += dn_local; 347 } 348 ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr); 349 ddn_local = 0; 350 for (i=0; i<osm->n; i++) { 351 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 352 ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr); 353 ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr); 354 ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr); 355 ddn_local += dn_local; 356 } 357 ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr); 358 ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr); 359 ierr = ISLocalToGlobalMappingCreateIS(osm->gis,<og);CHKERRQ(ierr); 360 ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr); 361 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 362 if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local); 363 /* Now convert these localized indices into the global indices into the merged output vector. */ 364 ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr); 365 for(j=0; j < ddn_llocal; ++j) { 366 ddidx_llocal[j] += gfirst; 367 } 368 ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr); 369 ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr); 370 ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr); 371 } 372 /* Create the subdomain work vectors. */ 373 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 374 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 375 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 376 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 377 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 378 for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) { 379 PetscInt dN; 380 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 381 ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr); 382 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr); 383 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr); 384 } 385 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 386 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 387 ierr = VecDestroy(&x);CHKERRQ(ierr); 388 ierr = VecDestroy(&y);CHKERRQ(ierr); 389 /* Create the local solvers */ 390 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 391 for (i=0; i<osm->n; i++) { /* KSPs are local */ 392 ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr); 393 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 394 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 395 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 396 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 397 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 398 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 399 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 400 osm->ksp[i] = ksp; 401 } 402 scall = MAT_INITIAL_MATRIX; 403 404 } else { 405 /* 406 Destroy the blocks from the previous iteration 407 */ 408 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 409 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 410 scall = MAT_INITIAL_MATRIX; 411 } 412 } 413 414 /* 415 Extract out the submatrices. 416 */ 417 if(size > 1) { 418 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 419 } 420 else { 421 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 422 } 423 if (scall == MAT_INITIAL_MATRIX) { 424 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 425 for (i=0; i<osm->n; i++) { 426 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 427 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 428 } 429 } 430 431 /* Return control to the user so that the submatrices can be modified (e.g., to apply 432 different boundary conditions for the submatrices than for the global problem) */ 433 ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 434 435 /* 436 Loop over submatrices putting them into local ksp 437 */ 438 for (i=0; i<osm->n; i++) { 439 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 440 if (!pc->setupcalled) { 441 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 442 } 443 } 444 445 PetscFunctionReturn(0); 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 450 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 451 { 452 PC_GASM *osm = (PC_GASM*)pc->data; 453 PetscErrorCode ierr; 454 PetscInt i; 455 456 PetscFunctionBegin; 457 for (i=0; i<osm->n; i++) { 458 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 459 } 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "PCApply_GASM" 465 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 466 { 467 PC_GASM *osm = (PC_GASM*)pc->data; 468 PetscErrorCode ierr; 469 PetscInt i; 470 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 471 472 PetscFunctionBegin; 473 /* 474 Support for limiting the restriction or interpolation to only local 475 subdomain values (leaving the other values 0). 476 */ 477 if (!(osm->type & PC_GASM_RESTRICT)) { 478 forward = SCATTER_FORWARD_LOCAL; 479 /* have to zero the work RHS since scatter may leave some slots empty */ 480 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 481 } 482 if (!(osm->type & PC_GASM_INTERPOLATE)) { 483 reverse = SCATTER_REVERSE_LOCAL; 484 } 485 486 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 487 ierr = VecZeroEntries(y);CHKERRQ(ierr); 488 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 489 /* do the subdomain solves */ 490 for (i=0; i<osm->n; ++i) { 491 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 492 } 493 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 494 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "PCApplyTranspose_GASM" 500 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 501 { 502 PC_GASM *osm = (PC_GASM*)pc->data; 503 PetscErrorCode ierr; 504 PetscInt i; 505 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 506 507 PetscFunctionBegin; 508 /* 509 Support for limiting the restriction or interpolation to only local 510 subdomain values (leaving the other values 0). 511 512 Note: these are reversed from the PCApply_GASM() because we are applying the 513 transpose of the three terms 514 */ 515 if (!(osm->type & PC_GASM_INTERPOLATE)) { 516 forward = SCATTER_FORWARD_LOCAL; 517 /* have to zero the work RHS since scatter may leave some slots empty */ 518 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 519 } 520 if (!(osm->type & PC_GASM_RESTRICT)) { 521 reverse = SCATTER_REVERSE_LOCAL; 522 } 523 524 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 525 ierr = VecZeroEntries(y);CHKERRQ(ierr); 526 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 527 /* do the local solves */ 528 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 529 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 530 } 531 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 532 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 #undef __FUNCT__ 537 #define __FUNCT__ "PCReset_GASM" 538 static PetscErrorCode PCReset_GASM(PC pc) 539 { 540 PC_GASM *osm = (PC_GASM*)pc->data; 541 PetscErrorCode ierr; 542 PetscInt i; 543 544 PetscFunctionBegin; 545 if (osm->ksp) { 546 for (i=0; i<osm->n; i++) { 547 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 548 } 549 } 550 if (osm->pmat) { 551 if (osm->n > 0) { 552 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 553 } 554 } 555 if (osm->x) { 556 for (i=0; i<osm->n; i++) { 557 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 558 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 559 } 560 } 561 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 562 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 563 564 ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr); 565 ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr); 566 if (osm->is) { 567 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 568 osm->is = 0; 569 osm->is_local = 0; 570 } 571 ierr = ISDestroy(&osm->gis);CHKERRQ(ierr); 572 ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "PCDestroy_GASM" 578 static PetscErrorCode PCDestroy_GASM(PC pc) 579 { 580 PC_GASM *osm = (PC_GASM*)pc->data; 581 PetscErrorCode ierr; 582 PetscInt i; 583 584 PetscFunctionBegin; 585 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 586 if (osm->ksp) { 587 for (i=0; i<osm->n; i++) { 588 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 589 } 590 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 591 } 592 ierr = PetscFree(osm->x);CHKERRQ(ierr); 593 ierr = PetscFree(osm->y);CHKERRQ(ierr); 594 ierr = PetscFree(pc->data);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "PCSetFromOptions_GASM" 600 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 601 PC_GASM *osm = (PC_GASM*)pc->data; 602 PetscErrorCode ierr; 603 PetscInt blocks,ovl; 604 PetscBool symset,flg; 605 PCGASMType gasmtype; 606 607 PetscFunctionBegin; 608 /* set the type to symmetric if matrix is symmetric */ 609 if (!osm->type_set && pc->pmat) { 610 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 611 if (symset && flg) { osm->type = PC_GASM_BASIC; } 612 } 613 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 614 ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 615 if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); } 616 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 617 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 618 flg = PETSC_FALSE; 619 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 620 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 621 ierr = PetscOptionsTail();CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 /*------------------------------------------------------------------------------------*/ 626 627 EXTERN_C_BEGIN 628 #undef __FUNCT__ 629 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM" 630 PetscErrorCode PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[]) 631 { 632 PC_GASM *osm = (PC_GASM*)pc->data; 633 PetscErrorCode ierr; 634 PetscInt i; 635 636 PetscFunctionBegin; 637 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 638 if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp()."); 639 640 if (!pc->setupcalled) { 641 osm->n = n; 642 osm->is = 0; 643 osm->is_local = 0; 644 if (is) { 645 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 646 } 647 if (is_local) { 648 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 649 } 650 if (osm->is) { 651 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 652 } 653 if (is) { 654 ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr); 655 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 656 /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */ 657 osm->overlap = -1; 658 } 659 if (is_local) { 660 ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 661 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 EXTERN_C_END 667 668 EXTERN_C_BEGIN 669 #undef __FUNCT__ 670 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM" 671 PetscErrorCode PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) { 672 PC_GASM *osm = (PC_GASM*)pc->data; 673 PetscErrorCode ierr; 674 PetscMPIInt rank,size; 675 PetscInt n; 676 677 PetscFunctionBegin; 678 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 679 680 /* 681 Split the subdomains equally among all processors 682 */ 683 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 684 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 685 n = N/size + ((N % size) > rank); 686 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 687 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 688 if (!pc->setupcalled) { 689 if (osm->is) { 690 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 691 } 692 osm->N = N; 693 osm->n = n; 694 osm->is = 0; 695 osm->is_local = 0; 696 } 697 PetscFunctionReturn(0); 698 }/* PCGASMSetTotalSubdomains_GASM() */ 699 EXTERN_C_END 700 701 EXTERN_C_BEGIN 702 #undef __FUNCT__ 703 #define __FUNCT__ "PCGASMSetOverlap_GASM" 704 PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 705 { 706 PC_GASM *osm = (PC_GASM*)pc->data; 707 708 PetscFunctionBegin; 709 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 710 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 711 if (!pc->setupcalled) { 712 osm->overlap = ovl; 713 } 714 PetscFunctionReturn(0); 715 } 716 EXTERN_C_END 717 718 EXTERN_C_BEGIN 719 #undef __FUNCT__ 720 #define __FUNCT__ "PCGASMSetType_GASM" 721 PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 722 { 723 PC_GASM *osm = (PC_GASM*)pc->data; 724 725 PetscFunctionBegin; 726 osm->type = type; 727 osm->type_set = PETSC_TRUE; 728 PetscFunctionReturn(0); 729 } 730 EXTERN_C_END 731 732 EXTERN_C_BEGIN 733 #undef __FUNCT__ 734 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 735 PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 736 { 737 PC_GASM *osm = (PC_GASM*)pc->data; 738 739 PetscFunctionBegin; 740 osm->sort_indices = doSort; 741 PetscFunctionReturn(0); 742 } 743 EXTERN_C_END 744 745 EXTERN_C_BEGIN 746 #undef __FUNCT__ 747 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 748 /* 749 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 750 In particular, it would upset the global subdomain number calculation. 751 */ 752 PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 753 { 754 PC_GASM *osm = (PC_GASM*)pc->data; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 759 760 if (n) { 761 *n = osm->n; 762 } 763 if (first) { 764 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 765 *first -= osm->n; 766 } 767 if (ksp) { 768 /* Assume that local solves are now different; not necessarily 769 true though! This flag is used only for PCView_GASM() */ 770 *ksp = osm->ksp; 771 osm->same_local_solves = PETSC_FALSE; 772 } 773 PetscFunctionReturn(0); 774 }/* PCGASMGetSubKSP_GASM() */ 775 EXTERN_C_END 776 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "PCGASMSetLocalSubdomains" 780 /*@C 781 PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor 782 only) for the additive Schwarz preconditioner. 783 784 Collective on PC 785 786 Input Parameters: 787 + pc - the preconditioner context 788 . n - the number of subdomains for this processor (default value = 1) 789 . is - the index set that defines the subdomains for this processor 790 (or PETSC_NULL for PETSc to determine subdomains) 791 - is_local - the index sets that define the local part of the subdomains for this processor 792 (or PETSC_NULL to use the default of 1 subdomain per process) 793 794 Notes: 795 The IS numbering is in the parallel, global numbering of the vector. 796 797 By default the GASM preconditioner uses 1 block per processor. 798 799 Use PCGASMSetTotalSubdomains() to set the subdomains for all processors. 800 801 Level: advanced 802 803 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 804 805 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 806 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 807 @*/ 808 PetscErrorCode PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 809 { 810 PetscErrorCode ierr; 811 812 PetscFunctionBegin; 813 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 814 ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr); 815 PetscFunctionReturn(0); 816 } 817 818 #undef __FUNCT__ 819 #define __FUNCT__ "PCGASMSetTotalSubdomains" 820 /*@C 821 PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the 822 additive Schwarz preconditioner. Either all or no processors in the 823 PC communicator must call this routine, with the same index sets. 824 825 Collective on PC 826 827 Input Parameters: 828 + pc - the preconditioner context 829 . n - the number of subdomains for all processors 830 . is - the index sets that define the subdomains for all processor 831 (or PETSC_NULL for PETSc to determine subdomains) 832 - is_local - the index sets that define the local part of the subdomains for this processor 833 (or PETSC_NULL to use the default of 1 subdomain per process) 834 835 Options Database Key: 836 To set the total number of subdomain blocks rather than specify the 837 index sets, use the option 838 . -pc_gasm_blocks <blks> - Sets total blocks 839 840 Notes: 841 Currently you cannot use this to set the actual subdomains with the argument is. 842 843 By default the GASM preconditioner uses 1 block per processor. 844 845 These index sets cannot be destroyed until after completion of the 846 linear solves for which the GASM preconditioner is being used. 847 848 Use PCGASMSetLocalSubdomains() to set local subdomains. 849 850 Level: advanced 851 852 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 853 854 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 855 PCGASMCreateSubdomains2D() 856 @*/ 857 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 858 { 859 PetscErrorCode ierr; 860 861 PetscFunctionBegin; 862 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 863 ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "PCGASMSetOverlap" 869 /*@ 870 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 871 additive Schwarz preconditioner. Either all or no processors in the 872 PC communicator must call this routine. 873 874 Logically Collective on PC 875 876 Input Parameters: 877 + pc - the preconditioner context 878 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 879 880 Options Database Key: 881 . -pc_gasm_overlap <ovl> - Sets overlap 882 883 Notes: 884 By default the GASM preconditioner uses 1 block per processor. To use 885 multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and 886 PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>). 887 888 The overlap defaults to 1, so if one desires that no additional 889 overlap be computed beyond what may have been set with a call to 890 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl 891 must be set to be 0. In particular, if one does not explicitly set 892 the subdomains an application code, then all overlap would be computed 893 internally by PETSc, and using an overlap of 0 would result in an GASM 894 variant that is equivalent to the block Jacobi preconditioner. 895 896 Note that one can define initial index sets with any overlap via 897 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine 898 PCGASMSetOverlap() merely allows PETSc to extend that overlap further 899 if desired. 900 901 Level: intermediate 902 903 .keywords: PC, GASM, set, overlap 904 905 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 906 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 907 @*/ 908 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 909 { 910 PetscErrorCode ierr; 911 912 PetscFunctionBegin; 913 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 914 PetscValidLogicalCollectiveInt(pc,ovl,2); 915 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 #undef __FUNCT__ 920 #define __FUNCT__ "PCGASMSetType" 921 /*@ 922 PCGASMSetType - Sets the type of restriction and interpolation used 923 for local problems in the additive Schwarz method. 924 925 Logically Collective on PC 926 927 Input Parameters: 928 + pc - the preconditioner context 929 - type - variant of GASM, one of 930 .vb 931 PC_GASM_BASIC - full interpolation and restriction 932 PC_GASM_RESTRICT - full restriction, local processor interpolation 933 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 934 PC_GASM_NONE - local processor restriction and interpolation 935 .ve 936 937 Options Database Key: 938 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 939 940 Level: intermediate 941 942 .keywords: PC, GASM, set, type 943 944 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 945 PCGASMCreateSubdomains2D() 946 @*/ 947 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 948 { 949 PetscErrorCode ierr; 950 951 PetscFunctionBegin; 952 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 953 PetscValidLogicalCollectiveEnum(pc,type,2); 954 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 #undef __FUNCT__ 959 #define __FUNCT__ "PCGASMSetSortIndices" 960 /*@ 961 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 962 963 Logically Collective on PC 964 965 Input Parameters: 966 + pc - the preconditioner context 967 - doSort - sort the subdomain indices 968 969 Level: intermediate 970 971 .keywords: PC, GASM, set, type 972 973 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 974 PCGASMCreateSubdomains2D() 975 @*/ 976 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 977 { 978 PetscErrorCode ierr; 979 980 PetscFunctionBegin; 981 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 982 PetscValidLogicalCollectiveBool(pc,doSort,2); 983 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "PCGASMGetSubKSP" 989 /*@C 990 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 991 this processor. 992 993 Collective on PC iff first_local is requested 994 995 Input Parameter: 996 . pc - the preconditioner context 997 998 Output Parameters: 999 + n_local - the number of blocks on this processor or PETSC_NULL 1000 . first_local - the global number of the first block on this processor or PETSC_NULL, 1001 all processors must request or all must pass PETSC_NULL 1002 - ksp - the array of KSP contexts 1003 1004 Note: 1005 After PCGASMGetSubKSP() the array of KSPes is not to be freed 1006 1007 Currently for some matrix implementations only 1 block per processor 1008 is supported. 1009 1010 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 1011 1012 Level: advanced 1013 1014 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 1015 1016 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), 1017 PCGASMCreateSubdomains2D(), 1018 @*/ 1019 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 1020 { 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1025 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 /* -------------------------------------------------------------------------------------*/ 1030 /*MC 1031 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1032 its own KSP object. 1033 1034 Options Database Keys: 1035 + -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal() 1036 . -pc_gasm_blocks <blks> - Sets total blocks 1037 . -pc_gasm_overlap <ovl> - Sets overlap 1038 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1039 1040 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1041 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1042 -pc_gasm_type basic to use the standard GASM. 1043 1044 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 1045 than one processor. Defaults to one block per processor. 1046 1047 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1048 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1049 1050 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1051 and set the options directly on the resulting KSP object (you can access its PC 1052 with KSPGetPC()) 1053 1054 1055 Level: beginner 1056 1057 Concepts: additive Schwarz method 1058 1059 References: 1060 An additive variant of the Schwarz alternating method for the case of many subregions 1061 M Dryja, OB Widlund - Courant Institute, New York University Technical report 1062 1063 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1064 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 1065 1066 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1067 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(), 1068 PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 1069 1070 M*/ 1071 1072 EXTERN_C_BEGIN 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "PCCreate_GASM" 1075 PetscErrorCode PCCreate_GASM(PC pc) 1076 { 1077 PetscErrorCode ierr; 1078 PC_GASM *osm; 1079 1080 PetscFunctionBegin; 1081 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1082 osm->N = PETSC_DECIDE; 1083 osm->n = 0; 1084 osm->nmax = 0; 1085 osm->overlap = 1; 1086 osm->ksp = 0; 1087 osm->grestriction = 0; 1088 osm->gprolongation = 0; 1089 osm->gx = 0; 1090 osm->gy = 0; 1091 osm->x = 0; 1092 osm->y = 0; 1093 osm->is = 0; 1094 osm->is_local = 0; 1095 osm->pmat = 0; 1096 osm->type = PC_GASM_RESTRICT; 1097 osm->same_local_solves = PETSC_TRUE; 1098 osm->sort_indices = PETSC_TRUE; 1099 1100 pc->data = (void*)osm; 1101 pc->ops->apply = PCApply_GASM; 1102 pc->ops->applytranspose = PCApplyTranspose_GASM; 1103 pc->ops->setup = PCSetUp_GASM; 1104 pc->ops->reset = PCReset_GASM; 1105 pc->ops->destroy = PCDestroy_GASM; 1106 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1107 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1108 pc->ops->view = PCView_GASM; 1109 pc->ops->applyrichardson = 0; 1110 1111 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM", 1112 PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr); 1113 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM", 1114 PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr); 1115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1116 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1118 PCGASMSetType_GASM);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1120 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1122 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 EXTERN_C_END 1126 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "PCGASMCreateSubdomains" 1130 /*@C 1131 PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 1132 preconditioner for a any problem on a general grid. 1133 1134 Collective 1135 1136 Input Parameters: 1137 + A - The global matrix operator 1138 - n - the number of local blocks 1139 1140 Output Parameters: 1141 . outis - the array of index sets defining the subdomains 1142 1143 Level: advanced 1144 1145 Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap 1146 from these if you use PCGASMSetLocalSubdomains() 1147 1148 In the Fortran version you must provide the array outis[] already allocated of length n. 1149 1150 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1151 1152 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains() 1153 @*/ 1154 PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 1155 { 1156 MatPartitioning mpart; 1157 const char *prefix; 1158 PetscErrorCode (*f)(Mat,MatReuse,Mat*); 1159 PetscMPIInt size; 1160 PetscInt i,j,rstart,rend,bs; 1161 PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1162 Mat Ad = PETSC_NULL, adj; 1163 IS ispart,isnumb,*is; 1164 PetscErrorCode ierr; 1165 1166 PetscFunctionBegin; 1167 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1168 PetscValidPointer(outis,3); 1169 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1170 1171 /* Get prefix, row distribution, and block size */ 1172 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1173 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1174 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1175 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1176 1177 /* Get diagonal block from matrix if possible */ 1178 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1179 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1180 if (f) { 1181 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1182 } else if (size == 1) { 1183 Ad = A; 1184 } 1185 if (Ad) { 1186 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1187 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1188 } 1189 if (Ad && n > 1) { 1190 PetscBool match,done; 1191 /* Try to setup a good matrix partitioning if available */ 1192 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1193 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1194 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1195 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1196 if (!match) { 1197 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1198 } 1199 if (!match) { /* assume a "good" partitioner is available */ 1200 PetscInt na,*ia,*ja; 1201 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1202 if (done) { 1203 /* Build adjacency matrix by hand. Unfortunately a call to 1204 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1205 remove the block-aij structure and we cannot expect 1206 MatPartitioning to split vertices as we need */ 1207 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1208 nnz = 0; 1209 for (i=0; i<na; i++) { /* count number of nonzeros */ 1210 len = ia[i+1] - ia[i]; 1211 row = ja + ia[i]; 1212 for (j=0; j<len; j++) { 1213 if (row[j] == i) { /* don't count diagonal */ 1214 len--; break; 1215 } 1216 } 1217 nnz += len; 1218 } 1219 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1220 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1221 nnz = 0; 1222 iia[0] = 0; 1223 for (i=0; i<na; i++) { /* fill adjacency */ 1224 cnt = 0; 1225 len = ia[i+1] - ia[i]; 1226 row = ja + ia[i]; 1227 for (j=0; j<len; j++) { 1228 if (row[j] != i) { /* if not diagonal */ 1229 jja[nnz+cnt++] = row[j]; 1230 } 1231 } 1232 nnz += cnt; 1233 iia[i+1] = nnz; 1234 } 1235 /* Partitioning of the adjacency matrix */ 1236 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1237 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1238 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1239 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1240 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1241 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1242 foundpart = PETSC_TRUE; 1243 } 1244 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1245 } 1246 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1247 } 1248 1249 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1250 *outis = is; 1251 1252 if (!foundpart) { 1253 1254 /* Partitioning by contiguous chunks of rows */ 1255 1256 PetscInt mbs = (rend-rstart)/bs; 1257 PetscInt start = rstart; 1258 for (i=0; i<n; i++) { 1259 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1260 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1261 start += count; 1262 } 1263 1264 } else { 1265 1266 /* Partitioning by adjacency of diagonal block */ 1267 1268 const PetscInt *numbering; 1269 PetscInt *count,nidx,*indices,*newidx,start=0; 1270 /* Get node count in each partition */ 1271 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1272 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1273 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1274 for (i=0; i<n; i++) count[i] *= bs; 1275 } 1276 /* Build indices from node numbering */ 1277 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1278 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1279 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1280 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1281 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1282 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1283 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1284 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1285 for (i=0; i<nidx; i++) 1286 for (j=0; j<bs; j++) 1287 newidx[i*bs+j] = indices[i]*bs + j; 1288 ierr = PetscFree(indices);CHKERRQ(ierr); 1289 nidx *= bs; 1290 indices = newidx; 1291 } 1292 /* Shift to get global indices */ 1293 for (i=0; i<nidx; i++) indices[i] += rstart; 1294 1295 /* Build the index sets for each block */ 1296 for (i=0; i<n; i++) { 1297 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1298 ierr = ISSort(is[i]);CHKERRQ(ierr); 1299 start += count[i]; 1300 } 1301 1302 ierr = PetscFree(count); 1303 ierr = PetscFree(indices); 1304 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1305 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1306 } 1307 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "PCGASMDestroySubdomains" 1313 /*@C 1314 PCGASMDestroySubdomains - Destroys the index sets created with 1315 PCGASMCreateSubdomains(). Should be called after setting subdomains 1316 with PCGASMSetLocalSubdomains(). 1317 1318 Collective 1319 1320 Input Parameters: 1321 + n - the number of index sets 1322 . is - the array of index sets 1323 - is_local - the array of local index sets, can be PETSC_NULL 1324 1325 Level: advanced 1326 1327 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1328 1329 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains() 1330 @*/ 1331 PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1332 { 1333 PetscInt i; 1334 PetscErrorCode ierr; 1335 PetscFunctionBegin; 1336 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1337 PetscValidPointer(is,2); 1338 for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); } 1339 ierr = PetscFree(is);CHKERRQ(ierr); 1340 if (is_local) { 1341 PetscValidPointer(is_local,3); 1342 for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); } 1343 ierr = PetscFree(is_local);CHKERRQ(ierr); 1344 } 1345 PetscFunctionReturn(0); 1346 } 1347 1348 1349 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1350 { \ 1351 PetscInt first_row = first/M, last_row = last/M+1; \ 1352 /* \ 1353 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1354 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1355 subdomain). \ 1356 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1357 of the intersection. \ 1358 */ \ 1359 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1360 *ylow_loc = PetscMax(first_row,ylow); \ 1361 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1362 *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1363 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1364 *yhigh_loc = PetscMin(last_row,yhigh); \ 1365 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1366 *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1367 /* Now compute the size of the local subdomain n. */ \ 1368 *n = 0; \ 1369 if(*ylow_loc < *yhigh_loc) { \ 1370 PetscInt width = xright-xleft; \ 1371 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1372 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1373 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1374 }\ 1375 } 1376 1377 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1381 /*@ 1382 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1383 preconditioner for a two-dimensional problem on a regular grid. 1384 1385 Collective 1386 1387 Input Parameters: 1388 + M, N - the global number of mesh points in the x and y directions 1389 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1390 . dof - degrees of freedom per node 1391 - overlap - overlap in mesh lines 1392 1393 Output Parameters: 1394 + Nsub - the number of local subdomains created 1395 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1396 - is_local - array of index sets defining non-overlapping subdomains 1397 1398 Note: 1399 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1400 preconditioners. More general related routines are 1401 PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains(). 1402 1403 Level: advanced 1404 1405 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1406 1407 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 1408 PCGASMSetOverlap() 1409 @*/ 1410 PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local) 1411 { 1412 PetscErrorCode ierr; 1413 PetscMPIInt size, rank; 1414 PetscInt i, j; 1415 PetscInt maxheight, maxwidth; 1416 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1417 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1418 PetscInt x[2][2], y[2][2], n[2]; 1419 PetscInt first, last; 1420 PetscInt nidx, *idx; 1421 PetscInt ii,jj,s,q,d; 1422 PetscInt k,kk; 1423 PetscMPIInt color; 1424 MPI_Comm comm, subcomm; 1425 IS **iis = 0; 1426 1427 PetscFunctionBegin; 1428 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1429 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1430 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1431 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1432 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1433 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1434 1435 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1436 s = 0; 1437 ystart = 0; 1438 for (j=0; j<Ndomains; ++j) { 1439 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1440 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1441 /* Vertical domain limits with an overlap. */ 1442 ylow = PetscMax(ystart - overlap,0); 1443 yhigh = PetscMin(ystart + maxheight + overlap,N); 1444 xstart = 0; 1445 for (i=0; i<Mdomains; ++i) { 1446 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1447 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1448 /* Horizontal domain limits with an overlap. */ 1449 xleft = PetscMax(xstart - overlap,0); 1450 xright = PetscMin(xstart + maxwidth + overlap,M); 1451 /* 1452 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1453 */ 1454 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1455 if(nidx) { 1456 ++s; 1457 } 1458 xstart += maxwidth; 1459 }/* for(i = 0; i < Mdomains; ++i) */ 1460 ystart += maxheight; 1461 }/* for(j = 0; j < Ndomains; ++j) */ 1462 /* Now we can allocate the necessary number of ISs. */ 1463 *nsub = s; 1464 ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr); 1465 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr); 1466 s = 0; 1467 ystart = 0; 1468 for (j=0; j<Ndomains; ++j) { 1469 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1470 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1471 /* Vertical domain limits with an overlap. */ 1472 y[0][0] = PetscMax(ystart - overlap,0); 1473 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1474 /* Vertical domain limits without an overlap. */ 1475 y[1][0] = ystart; 1476 y[1][1] = PetscMin(ystart + maxheight,N); 1477 xstart = 0; 1478 for (i=0; i<Mdomains; ++i) { 1479 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1480 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1481 /* Horizontal domain limits with an overlap. */ 1482 x[0][0] = PetscMax(xstart - overlap,0); 1483 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1484 /* Horizontal domain limits without an overlap. */ 1485 x[1][0] = xstart; 1486 x[1][1] = PetscMin(xstart+maxwidth,M); 1487 /* 1488 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1489 Do this twice: first for the domains with overlaps, and once without. 1490 During the first pass create the subcommunicators, and use them on the second pass as well. 1491 */ 1492 for(q = 0; q < 2; ++q) { 1493 /* 1494 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1495 according to whether the domain with an overlap or without is considered. 1496 */ 1497 xleft = x[q][0]; xright = x[q][1]; 1498 ylow = y[q][0]; yhigh = y[q][1]; 1499 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1500 nidx *= dof; 1501 n[q] = nidx; 1502 /* 1503 Based on the counted number of indices in the local domain *with an overlap*, 1504 construct a subcommunicator of all the processors supporting this domain. 1505 Observe that a domain with an overlap might have nontrivial local support, 1506 while the domain without an overlap might not. Hence, the decision to participate 1507 in the subcommunicator must be based on the domain with an overlap. 1508 */ 1509 if (q == 0) { 1510 if(nidx) { 1511 color = 1; 1512 } else { 1513 color = MPI_UNDEFINED; 1514 } 1515 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr); 1516 } 1517 /* 1518 Proceed only if the number of local indices *with an overlap* is nonzero. 1519 */ 1520 if (n[0]) { 1521 if(q == 0) { 1522 iis = is; 1523 } 1524 if (q == 1) { 1525 /* 1526 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1527 Moreover, if the overlap is zero, the two ISs are identical. 1528 */ 1529 if (overlap == 0) { 1530 (*is_local)[s] = (*is)[s]; 1531 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1532 continue; 1533 } else { 1534 iis = is_local; 1535 subcomm = ((PetscObject)(*is)[s])->comm; 1536 } 1537 }/* if(q == 1) */ 1538 idx = PETSC_NULL; 1539 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1540 if(nidx) { 1541 k = 0; 1542 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1543 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1544 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1545 kk = dof*(M*jj + x0); 1546 for (ii=x0; ii<x1; ++ii) { 1547 for(d = 0; d < dof; ++d) { 1548 idx[k++] = kk++; 1549 } 1550 } 1551 } 1552 } 1553 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr); 1554 }/* if(n[0]) */ 1555 }/* for(q = 0; q < 2; ++q) */ 1556 if(n[0]) { 1557 ++s; 1558 } 1559 xstart += maxwidth; 1560 }/* for(i = 0; i < Mdomains; ++i) */ 1561 ystart += maxheight; 1562 }/* for(j = 0; j < Ndomains; ++j) */ 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "PCGASMGetLocalSubdomains" 1568 /*@C 1569 PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor 1570 only) for the additive Schwarz preconditioner. 1571 1572 Not Collective 1573 1574 Input Parameter: 1575 . pc - the preconditioner context 1576 1577 Output Parameters: 1578 + n - the number of subdomains for this processor (default value = 1) 1579 . is - the index sets that define the subdomains for this processor 1580 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1581 1582 1583 Notes: 1584 The IS numbering is in the parallel, global numbering of the vector. 1585 1586 Level: advanced 1587 1588 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 1589 1590 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1591 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices() 1592 @*/ 1593 PetscErrorCode PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1594 { 1595 PC_GASM *osm; 1596 PetscErrorCode ierr; 1597 PetscBool match; 1598 1599 PetscFunctionBegin; 1600 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1601 PetscValidIntPointer(n,2); 1602 if (is) PetscValidPointer(is,3); 1603 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1604 if (!match) { 1605 if (n) *n = 0; 1606 if (is) *is = PETSC_NULL; 1607 } else { 1608 osm = (PC_GASM*)pc->data; 1609 if (n) *n = osm->n; 1610 if (is) *is = osm->is; 1611 if (is_local) *is_local = osm->is_local; 1612 } 1613 PetscFunctionReturn(0); 1614 } 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "PCGASMGetLocalSubmatrices" 1618 /*@C 1619 PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1620 only) for the additive Schwarz preconditioner. 1621 1622 Not Collective 1623 1624 Input Parameter: 1625 . pc - the preconditioner context 1626 1627 Output Parameters: 1628 + n - the number of matrices for this processor (default value = 1) 1629 - mat - the matrices 1630 1631 1632 Level: advanced 1633 1634 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1635 1636 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1637 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains() 1638 @*/ 1639 PetscErrorCode PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1640 { 1641 PC_GASM *osm; 1642 PetscErrorCode ierr; 1643 PetscBool match; 1644 1645 PetscFunctionBegin; 1646 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1647 PetscValidIntPointer(n,2); 1648 if (mat) PetscValidPointer(mat,3); 1649 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1650 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1651 if (!match) { 1652 if (n) *n = 0; 1653 if (mat) *mat = PETSC_NULL; 1654 } else { 1655 osm = (PC_GASM*)pc->data; 1656 if (n) *n = osm->n; 1657 if (mat) *mat = osm->pmat; 1658 } 1659 PetscFunctionReturn(0); 1660 } 1661