1 2 /* 3 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 4 In this version each processor may have any number of subdomains and a subdomain may live on multiple 5 processors. 6 7 N - total number of subdomains on all processors 8 n - actual number of subdomains on this processor 9 nmax - maximum number of subdomains per processor 10 */ 11 #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/ 12 13 typedef struct { 14 PetscInt N,n,nmax; 15 PetscInt overlap; /* overlap requested by user */ 16 KSP *ksp; /* linear solvers for each block */ 17 Vec gx,gy; /* Merged work vectors */ 18 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 19 IS gis, gis_local; /* merged ISs */ 20 VecScatter grestriction; /* merged restriction */ 21 VecScatter gprolongation; /* merged prolongation */ 22 IS *is; /* index set that defines each overlapping subdomain */ 23 IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */ 24 Mat *pmat; /* subdomain block matrices */ 25 PCGASMType type; /* use reduced interpolation, restriction or both */ 26 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 27 PetscBool same_local_solves; /* flag indicating whether all local solvers are same */ 28 PetscBool sort_indices; /* flag to sort subdomain indices */ 29 } PC_GASM; 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCGASMPrintSubdomains" 33 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 34 { 35 PC_GASM *osm = (PC_GASM*)pc->data; 36 const char *prefix; 37 char fname[PETSC_MAX_PATH_LEN+1]; 38 PetscViewer viewer, sviewer; 39 PetscInt i,j,nidx; 40 const PetscInt *idx; 41 char *cidx; 42 PetscBool found; 43 PetscMPIInt size, rank; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 48 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 49 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 50 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 51 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 52 for (i=0;i<osm->n;++i) { 53 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 54 ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr); 55 /* 56 No more than 15 characters per index plus a space. 57 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 58 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 59 For nidx == 0, the whole string 16 '\0'. 60 */ 61 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 62 ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr); 63 ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr); 64 for(j = 0; j < nidx; ++j) { 65 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 66 } 67 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 68 ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr); 69 70 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 71 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr); 72 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 73 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 74 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 75 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 76 ierr = PetscFree(cidx);CHKERRQ(ierr); 77 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 78 if (osm->is_local) { 79 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 80 ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr); 81 /* 82 No more than 15 characters per index plus a space. 83 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 84 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 85 For nidx == 0, the whole string 16 '\0'. 86 */ 87 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 88 ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr); 89 ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr); 90 for(j = 0; j < nidx; ++j) { 91 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 92 } 93 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 94 ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 95 96 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 97 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr); 98 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 99 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 100 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 101 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 102 ierr = PetscFree(cidx);CHKERRQ(ierr); 103 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 104 } 105 } 106 PetscFunctionReturn(0); 107 } 108 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCView_GASM" 112 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 113 { 114 PC_GASM *osm = (PC_GASM*)pc->data; 115 const char *prefix; 116 PetscErrorCode ierr; 117 PetscMPIInt rank, size; 118 PetscInt i,bsz; 119 PetscBool iascii,isstring, print_subdomains=PETSC_FALSE; 120 PetscViewer sviewer; 121 122 123 PetscFunctionBegin; 124 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 125 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 126 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 127 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 128 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 129 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr); 130 if (iascii) { 131 char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set"; 132 if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);} 133 if (osm->nmax > 0) {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);} 134 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 135 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 136 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 137 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 138 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr); 139 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 140 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 141 if (osm->same_local_solves) { 142 if (osm->ksp) { 143 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr); 144 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 145 if (!rank) { 146 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 147 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 148 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 149 } 150 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 151 } 152 } else { 153 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 154 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr); 155 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 156 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 157 for (i=0; i<osm->nmax; i++) { 158 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 159 if (i < osm->n) { 160 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 161 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr); 162 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr); 163 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 164 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 165 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 166 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr); 167 } 168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 169 } 170 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 171 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 172 } 173 } else if (isstring) { 174 ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr); 175 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 176 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 177 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 178 } else { 179 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 180 } 181 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 182 if(print_subdomains) { 183 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 189 190 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "PCSetUp_GASM" 194 static PetscErrorCode PCSetUp_GASM(PC pc) 195 { 196 PC_GASM *osm = (PC_GASM*)pc->data; 197 PetscErrorCode ierr; 198 PetscBool symset,flg; 199 PetscInt i; 200 PetscMPIInt rank, size; 201 MatReuse scall = MAT_REUSE_MATRIX; 202 KSP ksp; 203 PC subpc; 204 const char *prefix,*pprefix; 205 PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */ 206 const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */ 207 PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */ 208 PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */ 209 IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */ 210 Vec x,y; 211 PetscScalar *gxarray, *gyarray; 212 PetscInt gfirst, glast; 213 DM *domain_dm = PETSC_NULL; 214 215 PetscFunctionBegin; 216 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 217 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 218 if (!pc->setupcalled) { 219 220 if (!osm->type_set) { 221 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 222 if (symset && flg) { osm->type = PC_GASM_BASIC; } 223 } 224 225 /* Note: if osm->n has been set, it is at least 1. */ 226 if (osm->N == PETSC_DECIDE && osm->n < 1) { 227 /* no subdomains given */ 228 /* try pc->dm first */ 229 if(pc->dm) { 230 char ddm_name[1024]; 231 DM ddm; 232 PetscBool flg; 233 PetscInt num_domains, d; 234 char **domain_names; 235 IS *domain_is; 236 /* Allow the user to request a decomposition DM by name */ 237 ierr = PetscStrncpy(ddm_name, "", 1024); CHKERRQ(ierr); 238 ierr = PetscOptionsString("-pc_asm_decomposition_dm", "Name of the DM defining the composition", "PCSetDM", ddm_name, ddm_name,1024,&flg); CHKERRQ(ierr); 239 if(flg) { 240 ierr = DMCreateDecompositionDM(pc->dm, ddm_name, &ddm); CHKERRQ(ierr); 241 if(!ddm) { 242 SETERRQ1(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Uknown DM decomposition name %s", ddm_name); 243 } 244 ierr = PetscInfo(pc,"Using decomposition DM defined using options database\n");CHKERRQ(ierr); 245 ierr = PCSetDM(pc,ddm); CHKERRQ(ierr); 246 } 247 ierr = DMCreateDecomposition(pc->dm, &num_domains, &domain_names, &domain_is, &domain_dm); CHKERRQ(ierr); 248 if(num_domains) { 249 ierr = PCGASMSetLocalSubdomains(pc, num_domains, domain_is, PETSC_NULL);CHKERRQ(ierr); 250 } 251 for(d = 0; d < num_domains; ++d) { 252 ierr = PetscFree(domain_names[d]); CHKERRQ(ierr); 253 ierr = ISDestroy(&domain_is[d]); CHKERRQ(ierr); 254 } 255 ierr = PetscFree(domain_names);CHKERRQ(ierr); 256 ierr = PetscFree(domain_is);CHKERRQ(ierr); 257 } 258 if (osm->N == PETSC_DECIDE && osm->n < 1) { /* still no subdomains; use one per processor */ 259 osm->nmax = osm->n = 1; 260 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 261 osm->N = size; 262 } 263 } else if (osm->N == PETSC_DECIDE) { 264 PetscInt inwork[2], outwork[2]; 265 /* determine global number of subdomains and the max number of local subdomains */ 266 inwork[0] = inwork[1] = osm->n; 267 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 268 osm->nmax = outwork[0]; 269 osm->N = outwork[1]; 270 } 271 if (!osm->is){ /* create the index sets */ 272 ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr); 273 } 274 if (!osm->is_local) { 275 /* 276 This indicates that osm->is should define a nonoverlapping decomposition 277 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains, 278 but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created 279 via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition). 280 Therefore, osm->is will be used to define osm->is_local. 281 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap, 282 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping. 283 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is. 284 */ 285 ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 286 for (i=0; i<osm->n; i++) { 287 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 288 ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr); 289 ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr); 290 } else { 291 ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr); 292 osm->is_local[i] = osm->is[i]; 293 } 294 } 295 } 296 /* Beyond this point osm->is_local is not null. */ 297 if (osm->overlap > 0) { 298 /* Extend the "overlapping" regions by a number of steps */ 299 ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr); 300 } 301 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 302 flg = PETSC_FALSE; 303 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 304 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 305 306 if (osm->sort_indices) { 307 for (i=0; i<osm->n; i++) { 308 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 309 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 310 } 311 } 312 /* Merge the ISs, create merged vectors and scatter contexts. */ 313 /* Restriction ISs. */ 314 ddn = 0; 315 for (i=0; i<osm->n; i++) { 316 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 317 ddn += dn; 318 } 319 ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr); 320 ddn = 0; 321 for (i=0; i<osm->n; i++) { 322 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 323 ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr); 324 ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr); 325 ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr); 326 ddn += dn; 327 } 328 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr); 329 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 330 ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr); 331 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 332 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 333 ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr); 334 ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr); 335 ierr = ISDestroy(&gid);CHKERRQ(ierr); 336 /* Prolongation ISs */ 337 { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */ 338 const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */ 339 PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */ 340 PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */ 341 /**/ 342 ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */ 343 PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */ 344 PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */ 345 IS gis_llocal; /* IS with ddidx_llocal indices. */ 346 PetscInt j; 347 ddn_local = 0; 348 for (i=0; i<osm->n; i++) { 349 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 350 ddn_local += dn_local; 351 } 352 ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr); 353 ddn_local = 0; 354 for (i=0; i<osm->n; i++) { 355 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 356 ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr); 357 ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr); 358 ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr); 359 ddn_local += dn_local; 360 } 361 ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr); 362 ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr); 363 ierr = ISLocalToGlobalMappingCreateIS(osm->gis,<og);CHKERRQ(ierr); 364 ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr); 365 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 366 if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local); 367 /* Now convert these localized indices into the global indices into the merged output vector. */ 368 ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr); 369 for(j=0; j < ddn_llocal; ++j) { 370 ddidx_llocal[j] += gfirst; 371 } 372 ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr); 373 ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr); 374 ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr); 375 } 376 /* Create the subdomain work vectors. */ 377 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 378 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 379 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 380 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 381 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 382 for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) { 383 PetscInt dN; 384 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 385 ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr); 386 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr); 387 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr); 388 } 389 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 390 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 391 ierr = VecDestroy(&x);CHKERRQ(ierr); 392 ierr = VecDestroy(&y);CHKERRQ(ierr); 393 /* Create the local solvers */ 394 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 395 for (i=0; i<osm->n; i++) { /* KSPs are local */ 396 ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr); 397 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 398 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 399 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 400 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 401 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 402 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 403 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 404 osm->ksp[i] = ksp; 405 } 406 scall = MAT_INITIAL_MATRIX; 407 408 } else { 409 /* 410 Destroy the blocks from the previous iteration 411 */ 412 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 413 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 414 scall = MAT_INITIAL_MATRIX; 415 } 416 } 417 418 /* 419 Extract out the submatrices. 420 */ 421 if(size > 1) { 422 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 423 } 424 else { 425 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 426 } 427 if (scall == MAT_INITIAL_MATRIX) { 428 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 429 for (i=0; i<osm->n; i++) { 430 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 431 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 432 } 433 } 434 435 /* Return control to the user so that the submatrices can be modified (e.g., to apply 436 different boundary conditions for the submatrices than for the global problem) */ 437 ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 438 439 /* 440 Loop over submatrices putting them into local ksp 441 */ 442 for (i=0; i<osm->n; i++) { 443 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 444 if (!pc->setupcalled) { 445 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 446 } 447 } 448 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 454 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 455 { 456 PC_GASM *osm = (PC_GASM*)pc->data; 457 PetscErrorCode ierr; 458 PetscInt i; 459 460 PetscFunctionBegin; 461 for (i=0; i<osm->n; i++) { 462 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 463 } 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "PCApply_GASM" 469 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 470 { 471 PC_GASM *osm = (PC_GASM*)pc->data; 472 PetscErrorCode ierr; 473 PetscInt i; 474 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 475 476 PetscFunctionBegin; 477 /* 478 Support for limiting the restriction or interpolation to only local 479 subdomain values (leaving the other values 0). 480 */ 481 if (!(osm->type & PC_GASM_RESTRICT)) { 482 forward = SCATTER_FORWARD_LOCAL; 483 /* have to zero the work RHS since scatter may leave some slots empty */ 484 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 485 } 486 if (!(osm->type & PC_GASM_INTERPOLATE)) { 487 reverse = SCATTER_REVERSE_LOCAL; 488 } 489 490 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 491 ierr = VecZeroEntries(y);CHKERRQ(ierr); 492 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 493 /* do the subdomain solves */ 494 for (i=0; i<osm->n; ++i) { 495 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 496 } 497 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 498 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "PCApplyTranspose_GASM" 504 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 505 { 506 PC_GASM *osm = (PC_GASM*)pc->data; 507 PetscErrorCode ierr; 508 PetscInt i; 509 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 510 511 PetscFunctionBegin; 512 /* 513 Support for limiting the restriction or interpolation to only local 514 subdomain values (leaving the other values 0). 515 516 Note: these are reversed from the PCApply_GASM() because we are applying the 517 transpose of the three terms 518 */ 519 if (!(osm->type & PC_GASM_INTERPOLATE)) { 520 forward = SCATTER_FORWARD_LOCAL; 521 /* have to zero the work RHS since scatter may leave some slots empty */ 522 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 523 } 524 if (!(osm->type & PC_GASM_RESTRICT)) { 525 reverse = SCATTER_REVERSE_LOCAL; 526 } 527 528 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 529 ierr = VecZeroEntries(y);CHKERRQ(ierr); 530 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 531 /* do the local solves */ 532 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 533 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 534 } 535 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 536 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 537 PetscFunctionReturn(0); 538 } 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "PCReset_GASM" 542 static PetscErrorCode PCReset_GASM(PC pc) 543 { 544 PC_GASM *osm = (PC_GASM*)pc->data; 545 PetscErrorCode ierr; 546 PetscInt i; 547 548 PetscFunctionBegin; 549 if (osm->ksp) { 550 for (i=0; i<osm->n; i++) { 551 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 552 } 553 } 554 if (osm->pmat) { 555 if (osm->n > 0) { 556 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 557 } 558 } 559 if (osm->x) { 560 for (i=0; i<osm->n; i++) { 561 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 562 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 563 } 564 } 565 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 566 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 567 568 ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr); 569 ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr); 570 if (osm->is) { 571 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 572 osm->is = 0; 573 osm->is_local = 0; 574 } 575 ierr = ISDestroy(&osm->gis);CHKERRQ(ierr); 576 ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "PCDestroy_GASM" 582 static PetscErrorCode PCDestroy_GASM(PC pc) 583 { 584 PC_GASM *osm = (PC_GASM*)pc->data; 585 PetscErrorCode ierr; 586 PetscInt i; 587 588 PetscFunctionBegin; 589 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 590 if (osm->ksp) { 591 for (i=0; i<osm->n; i++) { 592 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 593 } 594 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 595 } 596 ierr = PetscFree(osm->x);CHKERRQ(ierr); 597 ierr = PetscFree(osm->y);CHKERRQ(ierr); 598 ierr = PetscFree(pc->data);CHKERRQ(ierr); 599 PetscFunctionReturn(0); 600 } 601 602 #undef __FUNCT__ 603 #define __FUNCT__ "PCSetFromOptions_GASM" 604 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 605 PC_GASM *osm = (PC_GASM*)pc->data; 606 PetscErrorCode ierr; 607 PetscInt blocks,ovl; 608 PetscBool symset,flg; 609 PCGASMType gasmtype; 610 611 PetscFunctionBegin; 612 /* set the type to symmetric if matrix is symmetric */ 613 if (!osm->type_set && pc->pmat) { 614 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 615 if (symset && flg) { osm->type = PC_GASM_BASIC; } 616 } 617 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 618 ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 619 if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); } 620 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 621 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 622 flg = PETSC_FALSE; 623 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 624 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 625 ierr = PetscOptionsTail();CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 /*------------------------------------------------------------------------------------*/ 630 631 EXTERN_C_BEGIN 632 #undef __FUNCT__ 633 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM" 634 PetscErrorCode PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[]) 635 { 636 PC_GASM *osm = (PC_GASM*)pc->data; 637 PetscErrorCode ierr; 638 PetscInt i; 639 640 PetscFunctionBegin; 641 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, n = %D",n); 642 if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp()."); 643 644 if (!pc->setupcalled) { 645 osm->n = n; 646 osm->is = 0; 647 osm->is_local = 0; 648 if (is) { 649 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 650 } 651 if (is_local) { 652 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 653 } 654 if (osm->is) { 655 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 656 } 657 if (is) { 658 ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr); 659 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 660 /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */ 661 osm->overlap = -1; 662 } 663 if (is_local) { 664 ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 665 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 EXTERN_C_END 671 672 EXTERN_C_BEGIN 673 #undef __FUNCT__ 674 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM" 675 PetscErrorCode PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) { 676 PC_GASM *osm = (PC_GASM*)pc->data; 677 PetscErrorCode ierr; 678 PetscMPIInt rank,size; 679 PetscInt n; 680 681 PetscFunctionBegin; 682 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 683 684 /* 685 Split the subdomains equally among all processors 686 */ 687 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 688 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 689 n = N/size + ((N % size) > rank); 690 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 691 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 692 if (!pc->setupcalled) { 693 if (osm->is) { 694 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 695 } 696 osm->N = N; 697 osm->n = n; 698 osm->is = 0; 699 osm->is_local = 0; 700 } 701 PetscFunctionReturn(0); 702 }/* PCGASMSetTotalSubdomains_GASM() */ 703 EXTERN_C_END 704 705 EXTERN_C_BEGIN 706 #undef __FUNCT__ 707 #define __FUNCT__ "PCGASMSetOverlap_GASM" 708 PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 709 { 710 PC_GASM *osm = (PC_GASM*)pc->data; 711 712 PetscFunctionBegin; 713 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 714 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 715 if (!pc->setupcalled) { 716 osm->overlap = ovl; 717 } 718 PetscFunctionReturn(0); 719 } 720 EXTERN_C_END 721 722 EXTERN_C_BEGIN 723 #undef __FUNCT__ 724 #define __FUNCT__ "PCGASMSetType_GASM" 725 PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 726 { 727 PC_GASM *osm = (PC_GASM*)pc->data; 728 729 PetscFunctionBegin; 730 osm->type = type; 731 osm->type_set = PETSC_TRUE; 732 PetscFunctionReturn(0); 733 } 734 EXTERN_C_END 735 736 EXTERN_C_BEGIN 737 #undef __FUNCT__ 738 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 739 PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 740 { 741 PC_GASM *osm = (PC_GASM*)pc->data; 742 743 PetscFunctionBegin; 744 osm->sort_indices = doSort; 745 PetscFunctionReturn(0); 746 } 747 EXTERN_C_END 748 749 EXTERN_C_BEGIN 750 #undef __FUNCT__ 751 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 752 /* 753 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 754 In particular, it would upset the global subdomain number calculation. 755 */ 756 PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 757 { 758 PC_GASM *osm = (PC_GASM*)pc->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 763 764 if (n) { 765 *n = osm->n; 766 } 767 if (first) { 768 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 769 *first -= osm->n; 770 } 771 if (ksp) { 772 /* Assume that local solves are now different; not necessarily 773 true though! This flag is used only for PCView_GASM() */ 774 *ksp = osm->ksp; 775 osm->same_local_solves = PETSC_FALSE; 776 } 777 PetscFunctionReturn(0); 778 }/* PCGASMGetSubKSP_GASM() */ 779 EXTERN_C_END 780 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "PCGASMSetLocalSubdomains" 784 /*@C 785 PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor 786 only) for the additive Schwarz preconditioner. 787 788 Collective on PC 789 790 Input Parameters: 791 + pc - the preconditioner context 792 . n - the number of subdomains for this processor (default value = 1) 793 . is - the index set that defines the subdomains for this processor 794 (or PETSC_NULL for PETSc to determine subdomains) 795 - is_local - the index sets that define the local part of the subdomains for this processor 796 (or PETSC_NULL to use the default of 1 subdomain per process) 797 798 Notes: 799 The IS numbering is in the parallel, global numbering of the vector. 800 801 By default the GASM preconditioner uses 1 block per processor. 802 803 Use PCGASMSetTotalSubdomains() to set the subdomains for all processors. 804 805 Level: advanced 806 807 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 808 809 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 810 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 811 @*/ 812 PetscErrorCode PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 813 { 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 818 ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr); 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "PCGASMSetTotalSubdomains" 824 /*@C 825 PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the 826 additive Schwarz preconditioner. Either all or no processors in the 827 PC communicator must call this routine, with the same index sets. 828 829 Collective on PC 830 831 Input Parameters: 832 + pc - the preconditioner context 833 . n - the number of subdomains for all processors 834 . is - the index sets that define the subdomains for all processor 835 (or PETSC_NULL for PETSc to determine subdomains) 836 - is_local - the index sets that define the local part of the subdomains for this processor 837 (or PETSC_NULL to use the default of 1 subdomain per process) 838 839 Options Database Key: 840 To set the total number of subdomain blocks rather than specify the 841 index sets, use the option 842 . -pc_gasm_blocks <blks> - Sets total blocks 843 844 Notes: 845 Currently you cannot use this to set the actual subdomains with the argument is. 846 847 By default the GASM preconditioner uses 1 block per processor. 848 849 These index sets cannot be destroyed until after completion of the 850 linear solves for which the GASM preconditioner is being used. 851 852 Use PCGASMSetLocalSubdomains() to set local subdomains. 853 854 Level: advanced 855 856 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 857 858 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 859 PCGASMCreateSubdomains2D() 860 @*/ 861 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 862 { 863 PetscErrorCode ierr; 864 865 PetscFunctionBegin; 866 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 867 ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "PCGASMSetOverlap" 873 /*@ 874 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 875 additive Schwarz preconditioner. Either all or no processors in the 876 PC communicator must call this routine. 877 878 Logically Collective on PC 879 880 Input Parameters: 881 + pc - the preconditioner context 882 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 883 884 Options Database Key: 885 . -pc_gasm_overlap <ovl> - Sets overlap 886 887 Notes: 888 By default the GASM preconditioner uses 1 block per processor. To use 889 multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and 890 PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>). 891 892 The overlap defaults to 1, so if one desires that no additional 893 overlap be computed beyond what may have been set with a call to 894 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl 895 must be set to be 0. In particular, if one does not explicitly set 896 the subdomains an application code, then all overlap would be computed 897 internally by PETSc, and using an overlap of 0 would result in an GASM 898 variant that is equivalent to the block Jacobi preconditioner. 899 900 Note that one can define initial index sets with any overlap via 901 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine 902 PCGASMSetOverlap() merely allows PETSc to extend that overlap further 903 if desired. 904 905 Level: intermediate 906 907 .keywords: PC, GASM, set, overlap 908 909 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 910 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 911 @*/ 912 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 913 { 914 PetscErrorCode ierr; 915 916 PetscFunctionBegin; 917 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 918 PetscValidLogicalCollectiveInt(pc,ovl,2); 919 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNCT__ 924 #define __FUNCT__ "PCGASMSetType" 925 /*@ 926 PCGASMSetType - Sets the type of restriction and interpolation used 927 for local problems in the additive Schwarz method. 928 929 Logically Collective on PC 930 931 Input Parameters: 932 + pc - the preconditioner context 933 - type - variant of GASM, one of 934 .vb 935 PC_GASM_BASIC - full interpolation and restriction 936 PC_GASM_RESTRICT - full restriction, local processor interpolation 937 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 938 PC_GASM_NONE - local processor restriction and interpolation 939 .ve 940 941 Options Database Key: 942 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 943 944 Level: intermediate 945 946 .keywords: PC, GASM, set, type 947 948 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 949 PCGASMCreateSubdomains2D() 950 @*/ 951 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 952 { 953 PetscErrorCode ierr; 954 955 PetscFunctionBegin; 956 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 957 PetscValidLogicalCollectiveEnum(pc,type,2); 958 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "PCGASMSetSortIndices" 964 /*@ 965 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 966 967 Logically Collective on PC 968 969 Input Parameters: 970 + pc - the preconditioner context 971 - doSort - sort the subdomain indices 972 973 Level: intermediate 974 975 .keywords: PC, GASM, set, type 976 977 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 978 PCGASMCreateSubdomains2D() 979 @*/ 980 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 981 { 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 986 PetscValidLogicalCollectiveBool(pc,doSort,2); 987 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "PCGASMGetSubKSP" 993 /*@C 994 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 995 this processor. 996 997 Collective on PC iff first_local is requested 998 999 Input Parameter: 1000 . pc - the preconditioner context 1001 1002 Output Parameters: 1003 + n_local - the number of blocks on this processor or PETSC_NULL 1004 . first_local - the global number of the first block on this processor or PETSC_NULL, 1005 all processors must request or all must pass PETSC_NULL 1006 - ksp - the array of KSP contexts 1007 1008 Note: 1009 After PCGASMGetSubKSP() the array of KSPes is not to be freed 1010 1011 Currently for some matrix implementations only 1 block per processor 1012 is supported. 1013 1014 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 1015 1016 Level: advanced 1017 1018 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 1019 1020 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), 1021 PCGASMCreateSubdomains2D(), 1022 @*/ 1023 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 1024 { 1025 PetscErrorCode ierr; 1026 1027 PetscFunctionBegin; 1028 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1029 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 /* -------------------------------------------------------------------------------------*/ 1034 /*MC 1035 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1036 its own KSP object. 1037 1038 Options Database Keys: 1039 + -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal() 1040 . -pc_gasm_blocks <blks> - Sets total blocks 1041 . -pc_gasm_overlap <ovl> - Sets overlap 1042 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1043 1044 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1045 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1046 -pc_gasm_type basic to use the standard GASM. 1047 1048 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 1049 than one processor. Defaults to one block per processor. 1050 1051 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1052 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1053 1054 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1055 and set the options directly on the resulting KSP object (you can access its PC 1056 with KSPGetPC()) 1057 1058 1059 Level: beginner 1060 1061 Concepts: additive Schwarz method 1062 1063 References: 1064 An additive variant of the Schwarz alternating method for the case of many subregions 1065 M Dryja, OB Widlund - Courant Institute, New York University Technical report 1066 1067 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1068 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 1069 1070 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1071 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(), 1072 PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 1073 1074 M*/ 1075 1076 EXTERN_C_BEGIN 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "PCCreate_GASM" 1079 PetscErrorCode PCCreate_GASM(PC pc) 1080 { 1081 PetscErrorCode ierr; 1082 PC_GASM *osm; 1083 1084 PetscFunctionBegin; 1085 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1086 osm->N = PETSC_DECIDE; 1087 osm->n = 0; 1088 osm->nmax = 0; 1089 osm->overlap = 1; 1090 osm->ksp = 0; 1091 osm->grestriction = 0; 1092 osm->gprolongation = 0; 1093 osm->gx = 0; 1094 osm->gy = 0; 1095 osm->x = 0; 1096 osm->y = 0; 1097 osm->is = 0; 1098 osm->is_local = 0; 1099 osm->pmat = 0; 1100 osm->type = PC_GASM_RESTRICT; 1101 osm->same_local_solves = PETSC_TRUE; 1102 osm->sort_indices = PETSC_TRUE; 1103 1104 pc->data = (void*)osm; 1105 pc->ops->apply = PCApply_GASM; 1106 pc->ops->applytranspose = PCApplyTranspose_GASM; 1107 pc->ops->setup = PCSetUp_GASM; 1108 pc->ops->reset = PCReset_GASM; 1109 pc->ops->destroy = PCDestroy_GASM; 1110 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1111 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1112 pc->ops->view = PCView_GASM; 1113 pc->ops->applyrichardson = 0; 1114 1115 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM", 1116 PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr); 1117 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM", 1118 PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr); 1119 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1120 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1122 PCGASMSetType_GASM);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1124 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1126 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 EXTERN_C_END 1130 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "PCGASMCreateSubdomains" 1134 /*@C 1135 PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 1136 preconditioner for a any problem on a general grid. 1137 1138 Collective 1139 1140 Input Parameters: 1141 + A - The global matrix operator 1142 - n - the number of local blocks 1143 1144 Output Parameters: 1145 . outis - the array of index sets defining the subdomains 1146 1147 Level: advanced 1148 1149 Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap 1150 from these if you use PCGASMSetLocalSubdomains() 1151 1152 In the Fortran version you must provide the array outis[] already allocated of length n. 1153 1154 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1155 1156 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains() 1157 @*/ 1158 PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 1159 { 1160 MatPartitioning mpart; 1161 const char *prefix; 1162 PetscErrorCode (*f)(Mat,MatReuse,Mat*); 1163 PetscMPIInt size; 1164 PetscInt i,j,rstart,rend,bs; 1165 PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1166 Mat Ad = PETSC_NULL, adj; 1167 IS ispart,isnumb,*is; 1168 PetscErrorCode ierr; 1169 1170 PetscFunctionBegin; 1171 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1172 PetscValidPointer(outis,3); 1173 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1174 1175 /* Get prefix, row distribution, and block size */ 1176 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1177 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1178 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1179 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1180 1181 /* Get diagonal block from matrix if possible */ 1182 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1183 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1184 if (f) { 1185 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1186 } else if (size == 1) { 1187 Ad = A; 1188 } 1189 if (Ad) { 1190 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1191 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1192 } 1193 if (Ad && n > 1) { 1194 PetscBool match,done; 1195 /* Try to setup a good matrix partitioning if available */ 1196 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1197 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1198 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1199 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1200 if (!match) { 1201 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1202 } 1203 if (!match) { /* assume a "good" partitioner is available */ 1204 PetscInt na,*ia,*ja; 1205 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1206 if (done) { 1207 /* Build adjacency matrix by hand. Unfortunately a call to 1208 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1209 remove the block-aij structure and we cannot expect 1210 MatPartitioning to split vertices as we need */ 1211 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1212 nnz = 0; 1213 for (i=0; i<na; i++) { /* count number of nonzeros */ 1214 len = ia[i+1] - ia[i]; 1215 row = ja + ia[i]; 1216 for (j=0; j<len; j++) { 1217 if (row[j] == i) { /* don't count diagonal */ 1218 len--; break; 1219 } 1220 } 1221 nnz += len; 1222 } 1223 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1224 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1225 nnz = 0; 1226 iia[0] = 0; 1227 for (i=0; i<na; i++) { /* fill adjacency */ 1228 cnt = 0; 1229 len = ia[i+1] - ia[i]; 1230 row = ja + ia[i]; 1231 for (j=0; j<len; j++) { 1232 if (row[j] != i) { /* if not diagonal */ 1233 jja[nnz+cnt++] = row[j]; 1234 } 1235 } 1236 nnz += cnt; 1237 iia[i+1] = nnz; 1238 } 1239 /* Partitioning of the adjacency matrix */ 1240 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1241 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1242 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1243 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1244 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1245 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1246 foundpart = PETSC_TRUE; 1247 } 1248 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1249 } 1250 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1251 } 1252 1253 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1254 *outis = is; 1255 1256 if (!foundpart) { 1257 1258 /* Partitioning by contiguous chunks of rows */ 1259 1260 PetscInt mbs = (rend-rstart)/bs; 1261 PetscInt start = rstart; 1262 for (i=0; i<n; i++) { 1263 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1264 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1265 start += count; 1266 } 1267 1268 } else { 1269 1270 /* Partitioning by adjacency of diagonal block */ 1271 1272 const PetscInt *numbering; 1273 PetscInt *count,nidx,*indices,*newidx,start=0; 1274 /* Get node count in each partition */ 1275 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1276 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1277 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1278 for (i=0; i<n; i++) count[i] *= bs; 1279 } 1280 /* Build indices from node numbering */ 1281 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1282 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1283 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1284 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1285 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1286 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1287 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1288 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1289 for (i=0; i<nidx; i++) 1290 for (j=0; j<bs; j++) 1291 newidx[i*bs+j] = indices[i]*bs + j; 1292 ierr = PetscFree(indices);CHKERRQ(ierr); 1293 nidx *= bs; 1294 indices = newidx; 1295 } 1296 /* Shift to get global indices */ 1297 for (i=0; i<nidx; i++) indices[i] += rstart; 1298 1299 /* Build the index sets for each block */ 1300 for (i=0; i<n; i++) { 1301 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1302 ierr = ISSort(is[i]);CHKERRQ(ierr); 1303 start += count[i]; 1304 } 1305 1306 ierr = PetscFree(count); 1307 ierr = PetscFree(indices); 1308 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1309 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1310 } 1311 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "PCGASMDestroySubdomains" 1317 /*@C 1318 PCGASMDestroySubdomains - Destroys the index sets created with 1319 PCGASMCreateSubdomains(). Should be called after setting subdomains 1320 with PCGASMSetLocalSubdomains(). 1321 1322 Collective 1323 1324 Input Parameters: 1325 + n - the number of index sets 1326 . is - the array of index sets 1327 - is_local - the array of local index sets, can be PETSC_NULL 1328 1329 Level: advanced 1330 1331 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1332 1333 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains() 1334 @*/ 1335 PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1336 { 1337 PetscInt i; 1338 PetscErrorCode ierr; 1339 PetscFunctionBegin; 1340 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1341 PetscValidPointer(is,2); 1342 for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); } 1343 ierr = PetscFree(is);CHKERRQ(ierr); 1344 if (is_local) { 1345 PetscValidPointer(is_local,3); 1346 for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); } 1347 ierr = PetscFree(is_local);CHKERRQ(ierr); 1348 } 1349 PetscFunctionReturn(0); 1350 } 1351 1352 1353 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1354 { \ 1355 PetscInt first_row = first/M, last_row = last/M+1; \ 1356 /* \ 1357 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1358 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1359 subdomain). \ 1360 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1361 of the intersection. \ 1362 */ \ 1363 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1364 *ylow_loc = PetscMax(first_row,ylow); \ 1365 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1366 *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1367 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1368 *yhigh_loc = PetscMin(last_row,yhigh); \ 1369 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1370 *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1371 /* Now compute the size of the local subdomain n. */ \ 1372 *n = 0; \ 1373 if(*ylow_loc < *yhigh_loc) { \ 1374 PetscInt width = xright-xleft; \ 1375 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1376 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1377 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1378 }\ 1379 } 1380 1381 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1385 /*@ 1386 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1387 preconditioner for a two-dimensional problem on a regular grid. 1388 1389 Collective 1390 1391 Input Parameters: 1392 + M, N - the global number of mesh points in the x and y directions 1393 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1394 . dof - degrees of freedom per node 1395 - overlap - overlap in mesh lines 1396 1397 Output Parameters: 1398 + Nsub - the number of local subdomains created 1399 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1400 - is_local - array of index sets defining non-overlapping subdomains 1401 1402 Note: 1403 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1404 preconditioners. More general related routines are 1405 PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains(). 1406 1407 Level: advanced 1408 1409 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1410 1411 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 1412 PCGASMSetOverlap() 1413 @*/ 1414 PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local) 1415 { 1416 PetscErrorCode ierr; 1417 PetscMPIInt size, rank; 1418 PetscInt i, j; 1419 PetscInt maxheight, maxwidth; 1420 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1421 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1422 PetscInt x[2][2], y[2][2], n[2]; 1423 PetscInt first, last; 1424 PetscInt nidx, *idx; 1425 PetscInt ii,jj,s,q,d; 1426 PetscInt k,kk; 1427 PetscMPIInt color; 1428 MPI_Comm comm, subcomm; 1429 IS **iis = 0; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1433 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1434 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1435 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1436 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1437 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1438 1439 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1440 s = 0; 1441 ystart = 0; 1442 for (j=0; j<Ndomains; ++j) { 1443 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1444 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1445 /* Vertical domain limits with an overlap. */ 1446 ylow = PetscMax(ystart - overlap,0); 1447 yhigh = PetscMin(ystart + maxheight + overlap,N); 1448 xstart = 0; 1449 for (i=0; i<Mdomains; ++i) { 1450 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1451 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1452 /* Horizontal domain limits with an overlap. */ 1453 xleft = PetscMax(xstart - overlap,0); 1454 xright = PetscMin(xstart + maxwidth + overlap,M); 1455 /* 1456 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1457 */ 1458 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1459 if(nidx) { 1460 ++s; 1461 } 1462 xstart += maxwidth; 1463 }/* for(i = 0; i < Mdomains; ++i) */ 1464 ystart += maxheight; 1465 }/* for(j = 0; j < Ndomains; ++j) */ 1466 /* Now we can allocate the necessary number of ISs. */ 1467 *nsub = s; 1468 ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr); 1469 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr); 1470 s = 0; 1471 ystart = 0; 1472 for (j=0; j<Ndomains; ++j) { 1473 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1474 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1475 /* Vertical domain limits with an overlap. */ 1476 y[0][0] = PetscMax(ystart - overlap,0); 1477 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1478 /* Vertical domain limits without an overlap. */ 1479 y[1][0] = ystart; 1480 y[1][1] = PetscMin(ystart + maxheight,N); 1481 xstart = 0; 1482 for (i=0; i<Mdomains; ++i) { 1483 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1484 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1485 /* Horizontal domain limits with an overlap. */ 1486 x[0][0] = PetscMax(xstart - overlap,0); 1487 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1488 /* Horizontal domain limits without an overlap. */ 1489 x[1][0] = xstart; 1490 x[1][1] = PetscMin(xstart+maxwidth,M); 1491 /* 1492 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1493 Do this twice: first for the domains with overlaps, and once without. 1494 During the first pass create the subcommunicators, and use them on the second pass as well. 1495 */ 1496 for(q = 0; q < 2; ++q) { 1497 /* 1498 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1499 according to whether the domain with an overlap or without is considered. 1500 */ 1501 xleft = x[q][0]; xright = x[q][1]; 1502 ylow = y[q][0]; yhigh = y[q][1]; 1503 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1504 nidx *= dof; 1505 n[q] = nidx; 1506 /* 1507 Based on the counted number of indices in the local domain *with an overlap*, 1508 construct a subcommunicator of all the processors supporting this domain. 1509 Observe that a domain with an overlap might have nontrivial local support, 1510 while the domain without an overlap might not. Hence, the decision to participate 1511 in the subcommunicator must be based on the domain with an overlap. 1512 */ 1513 if (q == 0) { 1514 if(nidx) { 1515 color = 1; 1516 } else { 1517 color = MPI_UNDEFINED; 1518 } 1519 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr); 1520 } 1521 /* 1522 Proceed only if the number of local indices *with an overlap* is nonzero. 1523 */ 1524 if (n[0]) { 1525 if(q == 0) { 1526 iis = is; 1527 } 1528 if (q == 1) { 1529 /* 1530 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1531 Moreover, if the overlap is zero, the two ISs are identical. 1532 */ 1533 if (overlap == 0) { 1534 (*is_local)[s] = (*is)[s]; 1535 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1536 continue; 1537 } else { 1538 iis = is_local; 1539 subcomm = ((PetscObject)(*is)[s])->comm; 1540 } 1541 }/* if(q == 1) */ 1542 idx = PETSC_NULL; 1543 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1544 if(nidx) { 1545 k = 0; 1546 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1547 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1548 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1549 kk = dof*(M*jj + x0); 1550 for (ii=x0; ii<x1; ++ii) { 1551 for(d = 0; d < dof; ++d) { 1552 idx[k++] = kk++; 1553 } 1554 } 1555 } 1556 } 1557 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr); 1558 }/* if(n[0]) */ 1559 }/* for(q = 0; q < 2; ++q) */ 1560 if(n[0]) { 1561 ++s; 1562 } 1563 xstart += maxwidth; 1564 }/* for(i = 0; i < Mdomains; ++i) */ 1565 ystart += maxheight; 1566 }/* for(j = 0; j < Ndomains; ++j) */ 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "PCGASMGetLocalSubdomains" 1572 /*@C 1573 PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor 1574 only) for the additive Schwarz preconditioner. 1575 1576 Not Collective 1577 1578 Input Parameter: 1579 . pc - the preconditioner context 1580 1581 Output Parameters: 1582 + n - the number of subdomains for this processor (default value = 1) 1583 . is - the index sets that define the subdomains for this processor 1584 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1585 1586 1587 Notes: 1588 The IS numbering is in the parallel, global numbering of the vector. 1589 1590 Level: advanced 1591 1592 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 1593 1594 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1595 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices() 1596 @*/ 1597 PetscErrorCode PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1598 { 1599 PC_GASM *osm; 1600 PetscErrorCode ierr; 1601 PetscBool match; 1602 1603 PetscFunctionBegin; 1604 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1605 PetscValidIntPointer(n,2); 1606 if (is) PetscValidPointer(is,3); 1607 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1608 if (!match) { 1609 if (n) *n = 0; 1610 if (is) *is = PETSC_NULL; 1611 } else { 1612 osm = (PC_GASM*)pc->data; 1613 if (n) *n = osm->n; 1614 if (is) *is = osm->is; 1615 if (is_local) *is_local = osm->is_local; 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "PCGASMGetLocalSubmatrices" 1622 /*@C 1623 PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1624 only) for the additive Schwarz preconditioner. 1625 1626 Not Collective 1627 1628 Input Parameter: 1629 . pc - the preconditioner context 1630 1631 Output Parameters: 1632 + n - the number of matrices for this processor (default value = 1) 1633 - mat - the matrices 1634 1635 1636 Level: advanced 1637 1638 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1639 1640 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1641 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains() 1642 @*/ 1643 PetscErrorCode PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1644 { 1645 PC_GASM *osm; 1646 PetscErrorCode ierr; 1647 PetscBool match; 1648 1649 PetscFunctionBegin; 1650 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1651 PetscValidIntPointer(n,2); 1652 if (mat) PetscValidPointer(mat,3); 1653 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1654 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1655 if (!match) { 1656 if (n) *n = 0; 1657 if (mat) *mat = PETSC_NULL; 1658 } else { 1659 osm = (PC_GASM*)pc->data; 1660 if (n) *n = osm->n; 1661 if (mat) *mat = osm->pmat; 1662 } 1663 PetscFunctionReturn(0); 1664 } 1665