1 2 /* 3 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 4 In this version each processor may have any number of subdomains and a subdomain may live on multiple 5 processors. 6 7 N - total number of subdomains on all processors 8 n - actual number of subdomains on this processor 9 nmax - maximum number of subdomains per processor 10 */ 11 #include <private/pcimpl.h> /*I "petscpc.h" I*/ 12 13 typedef struct { 14 PetscInt N,n,nmax; 15 PetscInt overlap; /* overlap requested by user */ 16 KSP *ksp; /* linear solvers for each block */ 17 Vec gx,gy; /* Merged work vectors */ 18 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 19 IS gis, gis_local; /* merged ISs */ 20 VecScatter grestriction; /* merged restriction */ 21 VecScatter gprolongation; /* merged prolongation */ 22 IS *is; /* index set that defines each overlapping subdomain */ 23 IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */ 24 Mat *pmat; /* subdomain block matrices */ 25 PCGASMType type; /* use reduced interpolation, restriction or both */ 26 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 27 PetscBool same_local_solves; /* flag indicating whether all local solvers are same */ 28 PetscBool sort_indices; /* flag to sort subdomain indices */ 29 } PC_GASM; 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "PCGASMPrintSubdomains" 33 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 34 { 35 PC_GASM *osm = (PC_GASM*)pc->data; 36 const char *prefix; 37 char fname[PETSC_MAX_PATH_LEN+1]; 38 PetscViewer viewer, sviewer; 39 PetscInt i,j,nidx; 40 const PetscInt *idx; 41 char *cidx; 42 PetscBool found; 43 PetscMPIInt size, rank; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 48 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 49 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 50 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 51 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 52 for (i=0;i<osm->n;++i) { 53 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 54 ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr); 55 /* 56 No more than 15 characters per index plus a space. 57 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 58 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 59 For nidx == 0, the whole string 16 '\0'. 60 */ 61 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 62 ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr); 63 ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr); 64 for(j = 0; j < nidx; ++j) { 65 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 66 } 67 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 68 ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr); 69 70 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 71 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr); 72 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 73 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 74 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 75 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 76 ierr = PetscFree(cidx);CHKERRQ(ierr); 77 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 78 if (osm->is_local) { 79 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 80 ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr); 81 /* 82 No more than 15 characters per index plus a space. 83 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 84 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 85 For nidx == 0, the whole string 16 '\0'. 86 */ 87 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr); 88 ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr); 89 ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr); 90 for(j = 0; j < nidx; ++j) { 91 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr); 92 } 93 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr); 94 ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 95 96 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 97 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr); 98 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr); 99 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 100 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 101 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr); 102 ierr = PetscFree(cidx);CHKERRQ(ierr); 103 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 104 } 105 } 106 PetscFunctionReturn(0); 107 } 108 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "PCView_GASM" 112 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 113 { 114 PC_GASM *osm = (PC_GASM*)pc->data; 115 const char *prefix; 116 PetscErrorCode ierr; 117 PetscMPIInt rank, size; 118 PetscInt i,bsz; 119 PetscBool iascii,isstring, print_subdomains; 120 PetscViewer sviewer; 121 122 123 PetscFunctionBegin; 124 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr); 125 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr); 126 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 127 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 128 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 129 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr); 130 if (iascii) { 131 char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set"; 132 if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);} 133 if (osm->nmax > 0) {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);} 134 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 135 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 136 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 137 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 138 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr); 139 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 140 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 141 if (osm->same_local_solves) { 142 if (osm->ksp) { 143 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr); 144 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 145 if (!rank) { 146 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 147 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 148 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 149 } 150 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 151 } 152 } else { 153 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 154 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr); 155 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 156 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 157 for (i=0; i<osm->nmax; i++) { 158 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 159 if (i < osm->n) { 160 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 161 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr); 162 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr); 163 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 164 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 165 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr); 166 ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr); 167 } 168 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 169 } 170 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 171 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 172 } 173 } else if (isstring) { 174 ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr); 175 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 176 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 177 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 178 } else { 179 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 180 } 181 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 182 if(print_subdomains) { 183 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 189 190 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "PCSetUp_GASM" 194 static PetscErrorCode PCSetUp_GASM(PC pc) 195 { 196 PC_GASM *osm = (PC_GASM*)pc->data; 197 PetscErrorCode ierr; 198 PetscBool symset,flg; 199 PetscInt i; 200 PetscMPIInt rank, size; 201 MatReuse scall = MAT_REUSE_MATRIX; 202 KSP ksp; 203 PC subpc; 204 const char *prefix,*pprefix; 205 PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */ 206 const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */ 207 PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */ 208 PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */ 209 IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */ 210 Vec x,y; 211 PetscScalar *gxarray, *gyarray; 212 PetscInt gfirst, glast; 213 214 PetscFunctionBegin; 215 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 216 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 217 if (!pc->setupcalled) { 218 219 if (!osm->type_set) { 220 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 221 if (symset && flg) { osm->type = PC_GASM_BASIC; } 222 } 223 224 if (osm->N == PETSC_DECIDE && osm->n < 1) { 225 /* no subdomains given, use one per processor */ 226 osm->nmax = osm->n = 1; 227 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 228 osm->N = size; 229 } else if (osm->N == PETSC_DECIDE) { 230 PetscInt inwork[2], outwork[2]; 231 /* determine global number of subdomains and the max number of local subdomains */ 232 inwork[0] = inwork[1] = osm->n; 233 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 234 osm->nmax = outwork[0]; 235 osm->N = outwork[1]; 236 } 237 if (!osm->is){ /* create the index sets */ 238 ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr); 239 } 240 if (!osm->is_local) { 241 /* 242 This indicates that osm->is should define a nonoverlapping decomposition 243 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains, 244 but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created 245 via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition). 246 Therefore, osm->is will be used to define osm->is_local. 247 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap, 248 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping. 249 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is. 250 */ 251 ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 252 for (i=0; i<osm->n; i++) { 253 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 254 ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr); 255 ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr); 256 } else { 257 ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr); 258 osm->is_local[i] = osm->is[i]; 259 } 260 } 261 } 262 /* Beyond this point osm->is_local is not null. */ 263 if (osm->overlap > 0) { 264 /* Extend the "overlapping" regions by a number of steps */ 265 ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr); 266 } 267 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 268 flg = PETSC_FALSE; 269 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 270 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 271 272 if (osm->sort_indices) { 273 for (i=0; i<osm->n; i++) { 274 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 275 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 276 } 277 } 278 /* Merge the ISs, create merged vectors and scatter contexts. */ 279 /* Restriction ISs. */ 280 ddn = 0; 281 for (i=0; i<osm->n; i++) { 282 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 283 ddn += dn; 284 } 285 ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr); 286 ddn = 0; 287 for (i=0; i<osm->n; i++) { 288 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 289 ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr); 290 ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr); 291 ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr); 292 ddn += dn; 293 } 294 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr); 295 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 296 ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr); 297 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr); 298 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 299 ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr); 300 ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr); 301 ierr = ISDestroy(&gid);CHKERRQ(ierr); 302 /* Prolongation ISs */ 303 { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */ 304 const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */ 305 PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */ 306 PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */ 307 /**/ 308 ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */ 309 PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */ 310 PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */ 311 IS gis_llocal; /* IS with ddidx_llocal indices. */ 312 PetscInt j; 313 ddn_local = 0; 314 for (i=0; i<osm->n; i++) { 315 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 316 ddn_local += dn_local; 317 } 318 ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr); 319 ddn_local = 0; 320 for (i=0; i<osm->n; i++) { 321 ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr); 322 ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr); 323 ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr); 324 ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr); 325 ddn_local += dn_local; 326 } 327 ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr); 328 ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr); 329 ierr = ISLocalToGlobalMappingCreateIS(osm->gis,<og);CHKERRQ(ierr); 330 ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr); 331 ierr = ISLocalToGlobalMappingDestroy(<og);CHKERRQ(ierr); 332 if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local); 333 /* Now convert these localized indices into the global indices into the merged output vector. */ 334 ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr); 335 for(j=0; j < ddn_llocal; ++j) { 336 ddidx_llocal[j] += gfirst; 337 } 338 ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr); 339 ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr); 340 ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr); 341 } 342 /* Create the subdomain work vectors. */ 343 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 344 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 345 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 346 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr); 347 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr); 348 for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) { 349 PetscInt dN; 350 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 351 ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr); 352 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr); 353 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr); 354 } 355 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr); 356 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr); 357 ierr = VecDestroy(&x);CHKERRQ(ierr); 358 ierr = VecDestroy(&y);CHKERRQ(ierr); 359 /* Create the local solvers */ 360 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 361 for (i=0; i<osm->n; i++) { /* KSPs are local */ 362 ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr); 363 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 364 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 365 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 366 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 367 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 368 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 369 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 370 osm->ksp[i] = ksp; 371 } 372 scall = MAT_INITIAL_MATRIX; 373 374 } else { 375 /* 376 Destroy the blocks from the previous iteration 377 */ 378 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 379 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 380 scall = MAT_INITIAL_MATRIX; 381 } 382 } 383 384 /* 385 Extract out the submatrices. 386 */ 387 if(size > 1) { 388 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 389 } 390 else { 391 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 392 } 393 if (scall == MAT_INITIAL_MATRIX) { 394 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 395 for (i=0; i<osm->n; i++) { 396 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 397 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 398 } 399 } 400 401 /* Return control to the user so that the submatrices can be modified (e.g., to apply 402 different boundary conditions for the submatrices than for the global problem) */ 403 ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 404 405 /* 406 Loop over submatrices putting them into local ksp 407 */ 408 for (i=0; i<osm->n; i++) { 409 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 410 if (!pc->setupcalled) { 411 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 412 } 413 } 414 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 420 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 421 { 422 PC_GASM *osm = (PC_GASM*)pc->data; 423 PetscErrorCode ierr; 424 PetscInt i; 425 426 PetscFunctionBegin; 427 for (i=0; i<osm->n; i++) { 428 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 429 } 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "PCApply_GASM" 435 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 436 { 437 PC_GASM *osm = (PC_GASM*)pc->data; 438 PetscErrorCode ierr; 439 PetscInt i; 440 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 441 442 PetscFunctionBegin; 443 /* 444 Support for limiting the restriction or interpolation to only local 445 subdomain values (leaving the other values 0). 446 */ 447 if (!(osm->type & PC_GASM_RESTRICT)) { 448 forward = SCATTER_FORWARD_LOCAL; 449 /* have to zero the work RHS since scatter may leave some slots empty */ 450 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 451 } 452 if (!(osm->type & PC_GASM_INTERPOLATE)) { 453 reverse = SCATTER_REVERSE_LOCAL; 454 } 455 456 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 457 ierr = VecZeroEntries(y);CHKERRQ(ierr); 458 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 459 /* do the subdomain solves */ 460 for (i=0; i<osm->n; ++i) { 461 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 462 } 463 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 464 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "PCApplyTranspose_GASM" 470 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 471 { 472 PC_GASM *osm = (PC_GASM*)pc->data; 473 PetscErrorCode ierr; 474 PetscInt i; 475 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 476 477 PetscFunctionBegin; 478 /* 479 Support for limiting the restriction or interpolation to only local 480 subdomain values (leaving the other values 0). 481 482 Note: these are reversed from the PCApply_GASM() because we are applying the 483 transpose of the three terms 484 */ 485 if (!(osm->type & PC_GASM_INTERPOLATE)) { 486 forward = SCATTER_FORWARD_LOCAL; 487 /* have to zero the work RHS since scatter may leave some slots empty */ 488 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 489 } 490 if (!(osm->type & PC_GASM_RESTRICT)) { 491 reverse = SCATTER_REVERSE_LOCAL; 492 } 493 494 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 495 ierr = VecZeroEntries(y);CHKERRQ(ierr); 496 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 497 /* do the local solves */ 498 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 499 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 500 } 501 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 502 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "PCReset_GASM" 508 static PetscErrorCode PCReset_GASM(PC pc) 509 { 510 PC_GASM *osm = (PC_GASM*)pc->data; 511 PetscErrorCode ierr; 512 PetscInt i; 513 514 PetscFunctionBegin; 515 if (osm->ksp) { 516 for (i=0; i<osm->n; i++) { 517 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr); 518 } 519 } 520 if (osm->pmat) { 521 if (osm->n > 0) { 522 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 523 } 524 } 525 if (osm->x) { 526 for (i=0; i<osm->n; i++) { 527 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr); 528 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr); 529 } 530 } 531 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr); 532 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr); 533 534 ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr); 535 ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr); 536 if (osm->is) {ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); osm->is = 0;} 537 ierr = ISDestroy(&osm->gis);CHKERRQ(ierr); 538 ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "PCDestroy_GASM" 544 static PetscErrorCode PCDestroy_GASM(PC pc) 545 { 546 PC_GASM *osm = (PC_GASM*)pc->data; 547 PetscErrorCode ierr; 548 PetscInt i; 549 550 PetscFunctionBegin; 551 ierr = PCReset_GASM(pc);CHKERRQ(ierr); 552 if (osm->ksp) { 553 for (i=0; i<osm->n; i++) { 554 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr); 555 } 556 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 557 } 558 ierr = PetscFree(osm->x);CHKERRQ(ierr); 559 ierr = PetscFree(osm->y);CHKERRQ(ierr); 560 ierr = PetscFree(pc->data);CHKERRQ(ierr); 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "PCSetFromOptions_GASM" 566 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 567 PC_GASM *osm = (PC_GASM*)pc->data; 568 PetscErrorCode ierr; 569 PetscInt blocks,ovl; 570 PetscBool symset,flg; 571 PCGASMType gasmtype; 572 573 PetscFunctionBegin; 574 /* set the type to symmetric if matrix is symmetric */ 575 if (!osm->type_set && pc->pmat) { 576 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 577 if (symset && flg) { osm->type = PC_GASM_BASIC; } 578 } 579 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 580 ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 581 if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); } 582 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 583 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 584 flg = PETSC_FALSE; 585 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 586 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 587 ierr = PetscOptionsTail();CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 /*------------------------------------------------------------------------------------*/ 592 593 EXTERN_C_BEGIN 594 #undef __FUNCT__ 595 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM" 596 PetscErrorCode PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[]) 597 { 598 PC_GASM *osm = (PC_GASM*)pc->data; 599 PetscErrorCode ierr; 600 PetscInt i; 601 602 PetscFunctionBegin; 603 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 604 if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp()."); 605 606 if (!pc->setupcalled) { 607 osm->n = n; 608 osm->is = 0; 609 osm->is_local = 0; 610 if (is) { 611 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 612 } 613 if (is_local) { 614 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 615 } 616 if (osm->is) { 617 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 618 } 619 if (is) { 620 ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr); 621 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 622 /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */ 623 osm->overlap = -1; 624 } 625 if (is_local) { 626 ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 627 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 628 } 629 } 630 PetscFunctionReturn(0); 631 } 632 EXTERN_C_END 633 634 EXTERN_C_BEGIN 635 #undef __FUNCT__ 636 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM" 637 PetscErrorCode PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) { 638 PC_GASM *osm = (PC_GASM*)pc->data; 639 PetscErrorCode ierr; 640 PetscMPIInt rank,size; 641 PetscInt n; 642 643 PetscFunctionBegin; 644 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 645 646 /* 647 Split the subdomains equally among all processors 648 */ 649 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 650 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 651 n = N/size + ((N % size) > rank); 652 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 653 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 654 if (!pc->setupcalled) { 655 if (osm->is) { 656 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 657 } 658 osm->N = N; 659 osm->n = n; 660 osm->is = 0; 661 osm->is_local = 0; 662 } 663 PetscFunctionReturn(0); 664 }/* PCGASMSetTotalSubdomains_GASM() */ 665 EXTERN_C_END 666 667 EXTERN_C_BEGIN 668 #undef __FUNCT__ 669 #define __FUNCT__ "PCGASMSetOverlap_GASM" 670 PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 671 { 672 PC_GASM *osm = (PC_GASM*)pc->data; 673 674 PetscFunctionBegin; 675 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 676 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 677 if (!pc->setupcalled) { 678 osm->overlap = ovl; 679 } 680 PetscFunctionReturn(0); 681 } 682 EXTERN_C_END 683 684 EXTERN_C_BEGIN 685 #undef __FUNCT__ 686 #define __FUNCT__ "PCGASMSetType_GASM" 687 PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 688 { 689 PC_GASM *osm = (PC_GASM*)pc->data; 690 691 PetscFunctionBegin; 692 osm->type = type; 693 osm->type_set = PETSC_TRUE; 694 PetscFunctionReturn(0); 695 } 696 EXTERN_C_END 697 698 EXTERN_C_BEGIN 699 #undef __FUNCT__ 700 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 701 PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 702 { 703 PC_GASM *osm = (PC_GASM*)pc->data; 704 705 PetscFunctionBegin; 706 osm->sort_indices = doSort; 707 PetscFunctionReturn(0); 708 } 709 EXTERN_C_END 710 711 EXTERN_C_BEGIN 712 #undef __FUNCT__ 713 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 714 /* 715 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 716 In particular, it would upset the global subdomain number calculation. 717 */ 718 PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 719 { 720 PC_GASM *osm = (PC_GASM*)pc->data; 721 PetscErrorCode ierr; 722 723 PetscFunctionBegin; 724 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 725 726 if (n) { 727 *n = osm->n; 728 } 729 if (first) { 730 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 731 *first -= osm->n; 732 } 733 if (ksp) { 734 /* Assume that local solves are now different; not necessarily 735 true though! This flag is used only for PCView_GASM() */ 736 *ksp = osm->ksp; 737 osm->same_local_solves = PETSC_FALSE; 738 } 739 PetscFunctionReturn(0); 740 }/* PCGASMGetSubKSP_GASM() */ 741 EXTERN_C_END 742 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "PCGASMSetLocalSubdomains" 746 /*@C 747 PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor 748 only) for the additive Schwarz preconditioner. 749 750 Collective on PC 751 752 Input Parameters: 753 + pc - the preconditioner context 754 . n - the number of subdomains for this processor (default value = 1) 755 . is - the index set that defines the subdomains for this processor 756 (or PETSC_NULL for PETSc to determine subdomains) 757 - is_local - the index sets that define the local part of the subdomains for this processor 758 (or PETSC_NULL to use the default of 1 subdomain per process) 759 760 Notes: 761 The IS numbering is in the parallel, global numbering of the vector. 762 763 By default the GASM preconditioner uses 1 block per processor. 764 765 Use PCGASMSetTotalSubdomains() to set the subdomains for all processors. 766 767 Level: advanced 768 769 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 770 771 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 772 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 773 @*/ 774 PetscErrorCode PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 775 { 776 PetscErrorCode ierr; 777 778 PetscFunctionBegin; 779 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 780 ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "PCGASMSetTotalSubdomains" 786 /*@C 787 PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the 788 additive Schwarz preconditioner. Either all or no processors in the 789 PC communicator must call this routine, with the same index sets. 790 791 Collective on PC 792 793 Input Parameters: 794 + pc - the preconditioner context 795 . n - the number of subdomains for all processors 796 . is - the index sets that define the subdomains for all processor 797 (or PETSC_NULL for PETSc to determine subdomains) 798 - is_local - the index sets that define the local part of the subdomains for this processor 799 (or PETSC_NULL to use the default of 1 subdomain per process) 800 801 Options Database Key: 802 To set the total number of subdomain blocks rather than specify the 803 index sets, use the option 804 . -pc_gasm_blocks <blks> - Sets total blocks 805 806 Notes: 807 Currently you cannot use this to set the actual subdomains with the argument is. 808 809 By default the GASM preconditioner uses 1 block per processor. 810 811 These index sets cannot be destroyed until after completion of the 812 linear solves for which the GASM preconditioner is being used. 813 814 Use PCGASMSetLocalSubdomains() to set local subdomains. 815 816 Level: advanced 817 818 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 819 820 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 821 PCGASMCreateSubdomains2D() 822 @*/ 823 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 824 { 825 PetscErrorCode ierr; 826 827 PetscFunctionBegin; 828 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 829 ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "PCGASMSetOverlap" 835 /*@ 836 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 837 additive Schwarz preconditioner. Either all or no processors in the 838 PC communicator must call this routine. 839 840 Logically Collective on PC 841 842 Input Parameters: 843 + pc - the preconditioner context 844 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 845 846 Options Database Key: 847 . -pc_gasm_overlap <ovl> - Sets overlap 848 849 Notes: 850 By default the GASM preconditioner uses 1 block per processor. To use 851 multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and 852 PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>). 853 854 The overlap defaults to 1, so if one desires that no additional 855 overlap be computed beyond what may have been set with a call to 856 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl 857 must be set to be 0. In particular, if one does not explicitly set 858 the subdomains an application code, then all overlap would be computed 859 internally by PETSc, and using an overlap of 0 would result in an GASM 860 variant that is equivalent to the block Jacobi preconditioner. 861 862 Note that one can define initial index sets with any overlap via 863 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine 864 PCGASMSetOverlap() merely allows PETSc to extend that overlap further 865 if desired. 866 867 Level: intermediate 868 869 .keywords: PC, GASM, set, overlap 870 871 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 872 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 873 @*/ 874 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 875 { 876 PetscErrorCode ierr; 877 878 PetscFunctionBegin; 879 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 880 PetscValidLogicalCollectiveInt(pc,ovl,2); 881 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 882 PetscFunctionReturn(0); 883 } 884 885 #undef __FUNCT__ 886 #define __FUNCT__ "PCGASMSetType" 887 /*@ 888 PCGASMSetType - Sets the type of restriction and interpolation used 889 for local problems in the additive Schwarz method. 890 891 Logically Collective on PC 892 893 Input Parameters: 894 + pc - the preconditioner context 895 - type - variant of GASM, one of 896 .vb 897 PC_GASM_BASIC - full interpolation and restriction 898 PC_GASM_RESTRICT - full restriction, local processor interpolation 899 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 900 PC_GASM_NONE - local processor restriction and interpolation 901 .ve 902 903 Options Database Key: 904 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 905 906 Level: intermediate 907 908 .keywords: PC, GASM, set, type 909 910 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 911 PCGASMCreateSubdomains2D() 912 @*/ 913 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 914 { 915 PetscErrorCode ierr; 916 917 PetscFunctionBegin; 918 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 919 PetscValidLogicalCollectiveEnum(pc,type,2); 920 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 921 PetscFunctionReturn(0); 922 } 923 924 #undef __FUNCT__ 925 #define __FUNCT__ "PCGASMSetSortIndices" 926 /*@ 927 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 928 929 Logically Collective on PC 930 931 Input Parameters: 932 + pc - the preconditioner context 933 - doSort - sort the subdomain indices 934 935 Level: intermediate 936 937 .keywords: PC, GASM, set, type 938 939 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 940 PCGASMCreateSubdomains2D() 941 @*/ 942 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 943 { 944 PetscErrorCode ierr; 945 946 PetscFunctionBegin; 947 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 948 PetscValidLogicalCollectiveBool(pc,doSort,2); 949 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "PCGASMGetSubKSP" 955 /*@C 956 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 957 this processor. 958 959 Collective on PC iff first_local is requested 960 961 Input Parameter: 962 . pc - the preconditioner context 963 964 Output Parameters: 965 + n_local - the number of blocks on this processor or PETSC_NULL 966 . first_local - the global number of the first block on this processor or PETSC_NULL, 967 all processors must request or all must pass PETSC_NULL 968 - ksp - the array of KSP contexts 969 970 Note: 971 After PCGASMGetSubKSP() the array of KSPes is not to be freed 972 973 Currently for some matrix implementations only 1 block per processor 974 is supported. 975 976 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 977 978 Level: advanced 979 980 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 981 982 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), 983 PCGASMCreateSubdomains2D(), 984 @*/ 985 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 986 { 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 991 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } 994 995 /* -------------------------------------------------------------------------------------*/ 996 /*MC 997 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 998 its own KSP object. 999 1000 Options Database Keys: 1001 + -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal() 1002 . -pc_gasm_blocks <blks> - Sets total blocks 1003 . -pc_gasm_overlap <ovl> - Sets overlap 1004 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 1005 1006 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1007 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 1008 -pc_gasm_type basic to use the standard GASM. 1009 1010 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 1011 than one processor. Defaults to one block per processor. 1012 1013 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1014 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1015 1016 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 1017 and set the options directly on the resulting KSP object (you can access its PC 1018 with KSPGetPC()) 1019 1020 1021 Level: beginner 1022 1023 Concepts: additive Schwarz method 1024 1025 References: 1026 An additive variant of the Schwarz alternating method for the case of many subregions 1027 M Dryja, OB Widlund - Courant Institute, New York University Technical report 1028 1029 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1030 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 1031 1032 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1033 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(), 1034 PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 1035 1036 M*/ 1037 1038 EXTERN_C_BEGIN 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "PCCreate_GASM" 1041 PetscErrorCode PCCreate_GASM(PC pc) 1042 { 1043 PetscErrorCode ierr; 1044 PC_GASM *osm; 1045 1046 PetscFunctionBegin; 1047 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1048 osm->N = PETSC_DECIDE; 1049 osm->n = 0; 1050 osm->nmax = 0; 1051 osm->overlap = 1; 1052 osm->ksp = 0; 1053 osm->grestriction = 0; 1054 osm->gprolongation = 0; 1055 osm->gx = 0; 1056 osm->gy = 0; 1057 osm->x = 0; 1058 osm->y = 0; 1059 osm->is = 0; 1060 osm->is_local = 0; 1061 osm->pmat = 0; 1062 osm->type = PC_GASM_RESTRICT; 1063 osm->same_local_solves = PETSC_TRUE; 1064 osm->sort_indices = PETSC_TRUE; 1065 1066 pc->data = (void*)osm; 1067 pc->ops->apply = PCApply_GASM; 1068 pc->ops->applytranspose = PCApplyTranspose_GASM; 1069 pc->ops->setup = PCSetUp_GASM; 1070 pc->ops->reset = PCReset_GASM; 1071 pc->ops->destroy = PCDestroy_GASM; 1072 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1073 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1074 pc->ops->view = PCView_GASM; 1075 pc->ops->applyrichardson = 0; 1076 1077 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM", 1078 PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr); 1079 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM", 1080 PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr); 1081 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1082 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1083 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1084 PCGASMSetType_GASM);CHKERRQ(ierr); 1085 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1086 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1087 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1088 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 EXTERN_C_END 1092 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "PCGASMCreateSubdomains" 1096 /*@C 1097 PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 1098 preconditioner for a any problem on a general grid. 1099 1100 Collective 1101 1102 Input Parameters: 1103 + A - The global matrix operator 1104 - n - the number of local blocks 1105 1106 Output Parameters: 1107 . outis - the array of index sets defining the subdomains 1108 1109 Level: advanced 1110 1111 Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap 1112 from these if you use PCGASMSetLocalSubdomains() 1113 1114 In the Fortran version you must provide the array outis[] already allocated of length n. 1115 1116 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1117 1118 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains() 1119 @*/ 1120 PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 1121 { 1122 MatPartitioning mpart; 1123 const char *prefix; 1124 PetscErrorCode (*f)(Mat,MatReuse,Mat*); 1125 PetscMPIInt size; 1126 PetscInt i,j,rstart,rend,bs; 1127 PetscBool isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1128 Mat Ad = PETSC_NULL, adj; 1129 IS ispart,isnumb,*is; 1130 PetscErrorCode ierr; 1131 1132 PetscFunctionBegin; 1133 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1134 PetscValidPointer(outis,3); 1135 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1136 1137 /* Get prefix, row distribution, and block size */ 1138 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1139 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1140 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1141 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1142 1143 /* Get diagonal block from matrix if possible */ 1144 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1145 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1146 if (f) { 1147 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr); 1148 } else if (size == 1) { 1149 Ad = A; 1150 } 1151 if (Ad) { 1152 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1153 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1154 } 1155 if (Ad && n > 1) { 1156 PetscBool match,done; 1157 /* Try to setup a good matrix partitioning if available */ 1158 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1159 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1160 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1161 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1162 if (!match) { 1163 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1164 } 1165 if (!match) { /* assume a "good" partitioner is available */ 1166 PetscInt na,*ia,*ja; 1167 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1168 if (done) { 1169 /* Build adjacency matrix by hand. Unfortunately a call to 1170 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1171 remove the block-aij structure and we cannot expect 1172 MatPartitioning to split vertices as we need */ 1173 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1174 nnz = 0; 1175 for (i=0; i<na; i++) { /* count number of nonzeros */ 1176 len = ia[i+1] - ia[i]; 1177 row = ja + ia[i]; 1178 for (j=0; j<len; j++) { 1179 if (row[j] == i) { /* don't count diagonal */ 1180 len--; break; 1181 } 1182 } 1183 nnz += len; 1184 } 1185 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1186 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1187 nnz = 0; 1188 iia[0] = 0; 1189 for (i=0; i<na; i++) { /* fill adjacency */ 1190 cnt = 0; 1191 len = ia[i+1] - ia[i]; 1192 row = ja + ia[i]; 1193 for (j=0; j<len; j++) { 1194 if (row[j] != i) { /* if not diagonal */ 1195 jja[nnz+cnt++] = row[j]; 1196 } 1197 } 1198 nnz += cnt; 1199 iia[i+1] = nnz; 1200 } 1201 /* Partitioning of the adjacency matrix */ 1202 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1203 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1204 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1205 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1206 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1207 ierr = MatDestroy(&adj);CHKERRQ(ierr); 1208 foundpart = PETSC_TRUE; 1209 } 1210 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1211 } 1212 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr); 1213 } 1214 1215 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1216 *outis = is; 1217 1218 if (!foundpart) { 1219 1220 /* Partitioning by contiguous chunks of rows */ 1221 1222 PetscInt mbs = (rend-rstart)/bs; 1223 PetscInt start = rstart; 1224 for (i=0; i<n; i++) { 1225 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1226 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1227 start += count; 1228 } 1229 1230 } else { 1231 1232 /* Partitioning by adjacency of diagonal block */ 1233 1234 const PetscInt *numbering; 1235 PetscInt *count,nidx,*indices,*newidx,start=0; 1236 /* Get node count in each partition */ 1237 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1238 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1239 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1240 for (i=0; i<n; i++) count[i] *= bs; 1241 } 1242 /* Build indices from node numbering */ 1243 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1244 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1245 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1246 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1247 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1248 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1249 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1250 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1251 for (i=0; i<nidx; i++) 1252 for (j=0; j<bs; j++) 1253 newidx[i*bs+j] = indices[i]*bs + j; 1254 ierr = PetscFree(indices);CHKERRQ(ierr); 1255 nidx *= bs; 1256 indices = newidx; 1257 } 1258 /* Shift to get global indices */ 1259 for (i=0; i<nidx; i++) indices[i] += rstart; 1260 1261 /* Build the index sets for each block */ 1262 for (i=0; i<n; i++) { 1263 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1264 ierr = ISSort(is[i]);CHKERRQ(ierr); 1265 start += count[i]; 1266 } 1267 1268 ierr = PetscFree(count); 1269 ierr = PetscFree(indices); 1270 ierr = ISDestroy(&isnumb);CHKERRQ(ierr); 1271 ierr = ISDestroy(&ispart);CHKERRQ(ierr); 1272 } 1273 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "PCGASMDestroySubdomains" 1279 /*@C 1280 PCGASMDestroySubdomains - Destroys the index sets created with 1281 PCGASMCreateSubdomains(). Should be called after setting subdomains 1282 with PCGASMSetLocalSubdomains(). 1283 1284 Collective 1285 1286 Input Parameters: 1287 + n - the number of index sets 1288 . is - the array of index sets 1289 - is_local - the array of local index sets, can be PETSC_NULL 1290 1291 Level: advanced 1292 1293 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1294 1295 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains() 1296 @*/ 1297 PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1298 { 1299 PetscInt i; 1300 PetscErrorCode ierr; 1301 PetscFunctionBegin; 1302 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1303 PetscValidPointer(is,2); 1304 for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); } 1305 ierr = PetscFree(is);CHKERRQ(ierr); 1306 if (is_local) { 1307 PetscValidPointer(is_local,3); 1308 for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); } 1309 ierr = PetscFree(is_local);CHKERRQ(ierr); 1310 } 1311 PetscFunctionReturn(0); 1312 } 1313 1314 1315 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1316 { \ 1317 PetscInt first_row = first/M, last_row = last/M+1; \ 1318 /* \ 1319 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1320 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1321 subdomain). \ 1322 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1323 of the intersection. \ 1324 */ \ 1325 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1326 *ylow_loc = PetscMax(first_row,ylow); \ 1327 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1328 *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1329 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1330 *yhigh_loc = PetscMin(last_row,yhigh); \ 1331 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1332 *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1333 /* Now compute the size of the local subdomain n. */ \ 1334 *n = 0; \ 1335 if(*ylow_loc < *yhigh_loc) { \ 1336 PetscInt width = xright-xleft; \ 1337 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1338 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1339 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1340 }\ 1341 } 1342 1343 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1347 /*@ 1348 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1349 preconditioner for a two-dimensional problem on a regular grid. 1350 1351 Collective 1352 1353 Input Parameters: 1354 + M, N - the global number of mesh points in the x and y directions 1355 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1356 . dof - degrees of freedom per node 1357 - overlap - overlap in mesh lines 1358 1359 Output Parameters: 1360 + Nsub - the number of local subdomains created 1361 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1362 - is_local - array of index sets defining non-overlapping subdomains 1363 1364 Note: 1365 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1366 preconditioners. More general related routines are 1367 PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains(). 1368 1369 Level: advanced 1370 1371 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1372 1373 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 1374 PCGASMSetOverlap() 1375 @*/ 1376 PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local) 1377 { 1378 PetscErrorCode ierr; 1379 PetscMPIInt size, rank; 1380 PetscInt i, j; 1381 PetscInt maxheight, maxwidth; 1382 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1383 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1384 PetscInt x[2][2], y[2][2], n[2]; 1385 PetscInt first, last; 1386 PetscInt nidx, *idx; 1387 PetscInt ii,jj,s,q,d; 1388 PetscInt k,kk; 1389 PetscMPIInt color; 1390 MPI_Comm comm, subcomm; 1391 IS **iis = 0; 1392 1393 PetscFunctionBegin; 1394 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr); 1395 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1396 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1397 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr); 1398 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1399 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1400 1401 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1402 s = 0; 1403 ystart = 0; 1404 for (j=0; j<Ndomains; ++j) { 1405 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1406 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1407 /* Vertical domain limits with an overlap. */ 1408 ylow = PetscMax(ystart - overlap,0); 1409 yhigh = PetscMin(ystart + maxheight + overlap,N); 1410 xstart = 0; 1411 for (i=0; i<Mdomains; ++i) { 1412 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1413 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1414 /* Horizontal domain limits with an overlap. */ 1415 xleft = PetscMax(xstart - overlap,0); 1416 xright = PetscMin(xstart + maxwidth + overlap,M); 1417 /* 1418 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1419 */ 1420 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1421 if(nidx) { 1422 ++s; 1423 } 1424 xstart += maxwidth; 1425 }/* for(i = 0; i < Mdomains; ++i) */ 1426 ystart += maxheight; 1427 }/* for(j = 0; j < Ndomains; ++j) */ 1428 /* Now we can allocate the necessary number of ISs. */ 1429 *nsub = s; 1430 ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr); 1431 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr); 1432 s = 0; 1433 ystart = 0; 1434 for (j=0; j<Ndomains; ++j) { 1435 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1436 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1437 /* Vertical domain limits with an overlap. */ 1438 y[0][0] = PetscMax(ystart - overlap,0); 1439 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1440 /* Vertical domain limits without an overlap. */ 1441 y[1][0] = ystart; 1442 y[1][1] = PetscMin(ystart + maxheight,N); 1443 xstart = 0; 1444 for (i=0; i<Mdomains; ++i) { 1445 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1446 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1447 /* Horizontal domain limits with an overlap. */ 1448 x[0][0] = PetscMax(xstart - overlap,0); 1449 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1450 /* Horizontal domain limits without an overlap. */ 1451 x[1][0] = xstart; 1452 x[1][1] = PetscMin(xstart+maxwidth,M); 1453 /* 1454 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1455 Do this twice: first for the domains with overlaps, and once without. 1456 During the first pass create the subcommunicators, and use them on the second pass as well. 1457 */ 1458 for(q = 0; q < 2; ++q) { 1459 /* 1460 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1461 according to whether the domain with an overlap or without is considered. 1462 */ 1463 xleft = x[q][0]; xright = x[q][1]; 1464 ylow = y[q][0]; yhigh = y[q][1]; 1465 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1466 nidx *= dof; 1467 n[q] = nidx; 1468 /* 1469 Based on the counted number of indices in the local domain *with an overlap*, 1470 construct a subcommunicator of all the processors supporting this domain. 1471 Observe that a domain with an overlap might have nontrivial local support, 1472 while the domain without an overlap might not. Hence, the decision to participate 1473 in the subcommunicator must be based on the domain with an overlap. 1474 */ 1475 if (q == 0) { 1476 if(nidx) { 1477 color = 1; 1478 } else { 1479 color = MPI_UNDEFINED; 1480 } 1481 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr); 1482 } 1483 /* 1484 Proceed only if the number of local indices *with an overlap* is nonzero. 1485 */ 1486 if (n[0]) { 1487 if(q == 0) { 1488 iis = is; 1489 } 1490 if (q == 1) { 1491 /* 1492 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1493 Moreover, if the overlap is zero, the two ISs are identical. 1494 */ 1495 if (overlap == 0) { 1496 (*is_local)[s] = (*is)[s]; 1497 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1498 continue; 1499 } else { 1500 iis = is_local; 1501 subcomm = ((PetscObject)(*is)[s])->comm; 1502 } 1503 }/* if(q == 1) */ 1504 idx = PETSC_NULL; 1505 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1506 if(nidx) { 1507 k = 0; 1508 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1509 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1510 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1511 kk = dof*(M*jj + x0); 1512 for (ii=x0; ii<x1; ++ii) { 1513 for(d = 0; d < dof; ++d) { 1514 idx[k++] = kk++; 1515 } 1516 } 1517 } 1518 } 1519 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr); 1520 }/* if(n[0]) */ 1521 }/* for(q = 0; q < 2; ++q) */ 1522 if(n[0]) { 1523 ++s; 1524 } 1525 xstart += maxwidth; 1526 }/* for(i = 0; i < Mdomains; ++i) */ 1527 ystart += maxheight; 1528 }/* for(j = 0; j < Ndomains; ++j) */ 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "PCGASMGetLocalSubdomains" 1534 /*@C 1535 PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor 1536 only) for the additive Schwarz preconditioner. 1537 1538 Not Collective 1539 1540 Input Parameter: 1541 . pc - the preconditioner context 1542 1543 Output Parameters: 1544 + n - the number of subdomains for this processor (default value = 1) 1545 . is - the index sets that define the subdomains for this processor 1546 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1547 1548 1549 Notes: 1550 The IS numbering is in the parallel, global numbering of the vector. 1551 1552 Level: advanced 1553 1554 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 1555 1556 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1557 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices() 1558 @*/ 1559 PetscErrorCode PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1560 { 1561 PC_GASM *osm; 1562 PetscErrorCode ierr; 1563 PetscBool match; 1564 1565 PetscFunctionBegin; 1566 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1567 PetscValidIntPointer(n,2); 1568 if (is) PetscValidPointer(is,3); 1569 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1570 if (!match) { 1571 if (n) *n = 0; 1572 if (is) *is = PETSC_NULL; 1573 } else { 1574 osm = (PC_GASM*)pc->data; 1575 if (n) *n = osm->n; 1576 if (is) *is = osm->is; 1577 if (is_local) *is_local = osm->is_local; 1578 } 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "PCGASMGetLocalSubmatrices" 1584 /*@C 1585 PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1586 only) for the additive Schwarz preconditioner. 1587 1588 Not Collective 1589 1590 Input Parameter: 1591 . pc - the preconditioner context 1592 1593 Output Parameters: 1594 + n - the number of matrices for this processor (default value = 1) 1595 - mat - the matrices 1596 1597 1598 Level: advanced 1599 1600 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1601 1602 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1603 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains() 1604 @*/ 1605 PetscErrorCode PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1606 { 1607 PC_GASM *osm; 1608 PetscErrorCode ierr; 1609 PetscBool match; 1610 1611 PetscFunctionBegin; 1612 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1613 PetscValidIntPointer(n,2); 1614 if (mat) PetscValidPointer(mat,3); 1615 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1616 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1617 if (!match) { 1618 if (n) *n = 0; 1619 if (mat) *mat = PETSC_NULL; 1620 } else { 1621 osm = (PC_GASM*)pc->data; 1622 if (n) *n = osm->n; 1623 if (mat) *mat = osm->pmat; 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627