1 #define PETSCKSP_DLL 2 3 /* 4 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation. 5 In this version each processor may have any number of subdomains and a subdomain may live on multiple 6 processors. 7 8 N - total number of subdomains on all processors 9 n - actual number of subdomains on this processor 10 nmax - maximum number of subdomains per processor 11 */ 12 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 13 14 typedef struct { 15 PetscInt N,n,nmax; 16 PetscInt overlap; /* overlap requested by user */ 17 KSP *ksp; /* linear solvers for each block */ 18 Vec gx,gy; /* Merged work vectors */ 19 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */ 20 IS gis, gis_local; /* merged ISs */ 21 VecScatter grestriction; /* merged restriction */ 22 VecScatter gprolongation; /* merged prolongation */ 23 IS *is; /* index set that defines each overlapping subdomain */ 24 IS *is_local; /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */ 25 Mat *pmat; /* subdomain block matrices */ 26 PCGASMType type; /* use reduced interpolation, restriction or both */ 27 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */ 28 PetscBool same_local_solves; /* flag indicating whether all local solvers are same */ 29 PetscBool sort_indices; /* flag to sort subdomain indices */ 30 } PC_GASM; 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCGASMPrintSubdomains" 34 static PetscErrorCode PCGASMPrintSubdomains(PC pc) 35 { 36 PC_GASM *osm = (PC_GASM*)pc->data; 37 const char *prefix; 38 char fname[PETSC_MAX_PATH_LEN+1]; 39 PetscViewer viewer, sviewer; 40 PetscInt i,j,nidx; 41 const PetscInt *idx; 42 char *cidx; 43 PetscBool found; 44 PetscMPIInt size, rank; 45 PetscErrorCode ierr; 46 47 PetscFunctionBegin; 48 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr); 49 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr); 50 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 51 ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr); 52 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 53 for (i=0;i<osm->n;++i) { 54 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 55 ierr = ISGetLocalSize(osm->is[i], &nidx); CHKERRQ(ierr); 56 /* 57 No more than 15 characters per index plus a space. 58 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 59 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 60 For nidx == 0, the whole string 16 '\0'. 61 */ 62 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr); 63 ierr = ISGetIndices(osm->is[i], &idx); CHKERRQ(ierr); 64 ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr); 65 for(j = 0; j < nidx; ++j) { 66 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr); 67 } 68 ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr); 69 ierr = ISRestoreIndices(osm->is[i],&idx); CHKERRQ(ierr); 70 71 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n"); CHKERRQ(ierr); 72 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr); 73 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 74 ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr); 75 ierr = PetscFree(cidx); CHKERRQ(ierr); 76 ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr); 77 if (osm->is_local) { 78 ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr); 79 ierr = ISGetLocalSize(osm->is_local[i], &nidx); CHKERRQ(ierr); 80 /* 81 No more than 15 characters per index plus a space. 82 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx, 83 in case nidx == 0. That will take care of the space for the trailing '\0' as well. 84 For nidx == 0, the whole string 16 '\0'. 85 */ 86 ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr); 87 ierr = ISGetIndices(osm->is_local[i], &idx); CHKERRQ(ierr); 88 ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr); 89 for(j = 0; j < nidx; ++j) { 90 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr); 91 } 92 ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr); 93 ierr = ISRestoreIndices(osm->is_local[i],&idx); CHKERRQ(ierr); 94 95 ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n"); CHKERRQ(ierr); 96 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr); 97 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 98 ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr); 99 ierr = PetscFree(cidx); CHKERRQ(ierr); 100 ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr); 101 } 102 } 103 PetscFunctionReturn(0); 104 } 105 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "PCView_GASM" 109 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer) 110 { 111 PC_GASM *osm = (PC_GASM*)pc->data; 112 const char *prefix; 113 PetscErrorCode ierr; 114 PetscMPIInt rank, size; 115 PetscInt i,bsz; 116 PetscBool iascii,isstring, print_subdomains; 117 PetscViewer sviewer; 118 119 120 PetscFunctionBegin; 121 ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr); 122 ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr); 123 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 124 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 125 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 126 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr); 127 if (iascii) { 128 char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set"; 129 if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);} 130 if (osm->nmax > 0) {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);} 131 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr); 132 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 133 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr); 134 ierr = PetscViewerASCIIPrintf(viewer," Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr); 135 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 136 if (osm->same_local_solves) { 137 if (osm->ksp) { 138 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr); 139 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 140 if (!rank) { 141 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 142 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 143 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 144 } 145 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 146 } 147 } else { 148 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 149 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr); 150 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 151 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 152 for (i=0; i<osm->nmax; i++) { 153 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 154 if (i < osm->n) { 155 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 156 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr); 157 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 158 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 159 } 160 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 161 } 162 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 163 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 164 } 165 } else if (isstring) { 166 ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr); 167 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 168 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 169 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 170 } else { 171 SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name); 172 } 173 ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); 174 if(print_subdomains) { 175 ierr = PCGASMPrintSubdomains(pc); CHKERRQ(ierr); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 181 182 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "PCSetUp_GASM" 186 static PetscErrorCode PCSetUp_GASM(PC pc) 187 { 188 PC_GASM *osm = (PC_GASM*)pc->data; 189 PetscErrorCode ierr; 190 PetscBool symset,flg; 191 PetscInt i; 192 PetscMPIInt rank, size; 193 MatReuse scall = MAT_REUSE_MATRIX; 194 KSP ksp; 195 PC subpc; 196 const char *prefix,*pprefix; 197 PetscInt dn; /* Number of indices in a single subdomain assigned to this processor. */ 198 const PetscInt *didx; /* Indices from a single subdomain assigned to this processor. */ 199 PetscInt ddn; /* Number of indices in all subdomains assigned to this processor. */ 200 PetscInt *ddidx; /* Indices of all subdomains assigned to this processor. */ 201 IS gid; /* Identity IS of the size of all subdomains assigned to this processor. */ 202 Vec x,y; 203 PetscScalar *gxarray, *gyarray; 204 PetscInt gfirst, glast; 205 206 PetscFunctionBegin; 207 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 208 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 209 if (!pc->setupcalled) { 210 211 if (!osm->type_set) { 212 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 213 if (symset && flg) { osm->type = PC_GASM_BASIC; } 214 } 215 216 if (osm->N == PETSC_DECIDE && osm->n < 1) { 217 /* no subdomains given, use one per processor */ 218 osm->nmax = osm->n = 1; 219 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 220 osm->N = size; 221 } else if (osm->N == PETSC_DECIDE) { 222 PetscInt inwork[2], outwork[2]; 223 /* determine global number of subdomains and the max number of local subdomains */ 224 inwork[0] = inwork[1] = osm->n; 225 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 226 osm->nmax = outwork[0]; 227 osm->N = outwork[1]; 228 } 229 if (!osm->is){ /* create the index sets */ 230 ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr); 231 } 232 if (!osm->is_local) { 233 /* 234 This indicates that osm->is should define a nonoverlapping decomposition 235 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains, 236 but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created 237 via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition). 238 Therefore, osm->is will be used to define osm->is_local. 239 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap, 240 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping. 241 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is. 242 */ 243 ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 244 for (i=0; i<osm->n; i++) { 245 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 246 ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr); 247 ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr); 248 } else { 249 ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr); 250 osm->is_local[i] = osm->is[i]; 251 } 252 } 253 } 254 /* Beyond this point osm->is_local is not null. */ 255 if (osm->overlap > 0) { 256 /* Extend the "overlapping" regions by a number of steps */ 257 ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr); 258 } 259 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 260 flg = PETSC_FALSE; 261 ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 262 if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); } 263 264 if (osm->sort_indices) { 265 for (i=0; i<osm->n; i++) { 266 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 267 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 268 } 269 } 270 /* Merge the ISs, create merged vectors and scatter contexts. */ 271 /* Restriction ISs. */ 272 ddn = 0; 273 for (i=0; i<osm->n; i++) { 274 ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr); 275 ddn += dn; 276 } 277 ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx); CHKERRQ(ierr); 278 ddn = 0; 279 for (i=0; i<osm->n; i++) { 280 ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr); 281 ierr = ISGetIndices(osm->is[i],&didx); CHKERRQ(ierr); 282 ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn); CHKERRQ(ierr); 283 ierr = ISRestoreIndices(osm->is[i], &didx); CHKERRQ(ierr); 284 ddn += dn; 285 } 286 ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis); CHKERRQ(ierr); 287 ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr); 288 ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx); CHKERRQ(ierr); 289 ierr = VecDuplicate(osm->gx,&osm->gy); CHKERRQ(ierr); 290 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast); CHKERRQ(ierr); 291 ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid); CHKERRQ(ierr); 292 ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction)); CHKERRQ(ierr); 293 ierr = ISDestroy(gid); CHKERRQ(ierr); 294 /* Prolongation ISs */ 295 { PetscInt dn_local; /* Number of indices in the local part of a single domain assigned to this processor. */ 296 const PetscInt *didx_local; /* Global indices from the local part of a single domain assigned to this processor. */ 297 PetscInt ddn_local; /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */ 298 PetscInt *ddidx_local; /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */ 299 /**/ 300 ISLocalToGlobalMapping ltog; /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */ 301 PetscInt *ddidx_llocal; /* Mapped local indices of the disjoint union of local parts of subdomains. */ 302 PetscInt ddn_llocal; /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */ 303 IS gis_llocal; /* IS with ddidx_llocal indices. */ 304 PetscInt j; 305 ddn_local = 0; 306 for (i=0; i<osm->n; i++) { 307 ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr); 308 ddn_local += dn_local; 309 } 310 ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local); CHKERRQ(ierr); 311 ddn_local = 0; 312 for (i=0; i<osm->n; i++) { 313 ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr); 314 ierr = ISGetIndices(osm->is_local[i],&didx_local); CHKERRQ(ierr); 315 ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local); CHKERRQ(ierr); 316 ierr = ISRestoreIndices(osm->is_local[i], &didx_local); CHKERRQ(ierr); 317 ddn_local += dn_local; 318 } 319 ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal); CHKERRQ(ierr); 320 ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local)); CHKERRQ(ierr); 321 ierr = ISLocalToGlobalMappingCreateIS(osm->gis,<og);CHKERRQ(ierr); 322 ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr); 323 ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr); 324 if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local); 325 /* Now convert these localized indices into the global indices into the merged output vector. */ 326 ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast); CHKERRQ(ierr); 327 for(j=0; j < ddn_llocal; ++j) { 328 ddidx_llocal[j] += gfirst; 329 } 330 ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal); CHKERRQ(ierr); 331 ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation); CHKERRQ(ierr); 332 ierr = ISDestroy(gis_llocal);CHKERRQ(ierr); 333 } 334 /* Create the subdomain work vectors. */ 335 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr); 336 ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr); 337 ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr); 338 ierr = VecGetArray(osm->gx, &gxarray); CHKERRQ(ierr); 339 ierr = VecGetArray(osm->gy, &gyarray); CHKERRQ(ierr); 340 for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) { 341 PetscInt dN; 342 ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr); 343 ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr); 344 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr); 345 ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr); 346 } 347 ierr = VecRestoreArray(osm->gx, &gxarray); CHKERRQ(ierr); 348 ierr = VecRestoreArray(osm->gy, &gyarray); CHKERRQ(ierr); 349 ierr = VecDestroy(x); CHKERRQ(ierr); 350 ierr = VecDestroy(y); CHKERRQ(ierr); 351 /* Create the local solvers */ 352 ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 353 for (i=0; i<osm->n; i++) { /* KSPs are local */ 354 ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr); 355 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 356 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 357 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 358 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 359 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 360 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 361 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 362 osm->ksp[i] = ksp; 363 } 364 scall = MAT_INITIAL_MATRIX; 365 366 } else { 367 /* 368 Destroy the blocks from the previous iteration 369 */ 370 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 371 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 372 scall = MAT_INITIAL_MATRIX; 373 } 374 } 375 376 /* 377 Extract out the submatrices. 378 */ 379 if(size > 1) { 380 ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 381 } 382 else { 383 ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr); 384 } 385 if (scall == MAT_INITIAL_MATRIX) { 386 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 387 for (i=0; i<osm->n; i++) { 388 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 389 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 390 } 391 } 392 393 /* Return control to the user so that the submatrices can be modified (e.g., to apply 394 different boundary conditions for the submatrices than for the global problem) */ 395 ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 396 397 /* 398 Loop over submatrices putting them into local ksp 399 */ 400 for (i=0; i<osm->n; i++) { 401 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 402 if (!pc->setupcalled) { 403 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 404 } 405 } 406 407 PetscFunctionReturn(0); 408 } 409 410 #undef __FUNCT__ 411 #define __FUNCT__ "PCSetUpOnBlocks_GASM" 412 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc) 413 { 414 PC_GASM *osm = (PC_GASM*)pc->data; 415 PetscErrorCode ierr; 416 PetscInt i; 417 418 PetscFunctionBegin; 419 for (i=0; i<osm->n; i++) { 420 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 421 } 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "PCApply_GASM" 427 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y) 428 { 429 PC_GASM *osm = (PC_GASM*)pc->data; 430 PetscErrorCode ierr; 431 PetscInt i; 432 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 433 434 PetscFunctionBegin; 435 /* 436 Support for limiting the restriction or interpolation to only local 437 subdomain values (leaving the other values 0). 438 */ 439 if (!(osm->type & PC_GASM_RESTRICT)) { 440 forward = SCATTER_FORWARD_LOCAL; 441 /* have to zero the work RHS since scatter may leave some slots empty */ 442 ierr = VecZeroEntries(osm->gx); CHKERRQ(ierr); 443 } 444 if (!(osm->type & PC_GASM_INTERPOLATE)) { 445 reverse = SCATTER_REVERSE_LOCAL; 446 } 447 448 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 449 ierr = VecZeroEntries(y);CHKERRQ(ierr); 450 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 451 /* do the subdomain solves */ 452 for (i=0; i<osm->n; ++i) { 453 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 454 } 455 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 456 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "PCApplyTranspose_GASM" 462 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y) 463 { 464 PC_GASM *osm = (PC_GASM*)pc->data; 465 PetscErrorCode ierr; 466 PetscInt i; 467 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 468 469 PetscFunctionBegin; 470 /* 471 Support for limiting the restriction or interpolation to only local 472 subdomain values (leaving the other values 0). 473 474 Note: these are reversed from the PCApply_GASM() because we are applying the 475 transpose of the three terms 476 */ 477 if (!(osm->type & PC_GASM_INTERPOLATE)) { 478 forward = SCATTER_FORWARD_LOCAL; 479 /* have to zero the work RHS since scatter may leave some slots empty */ 480 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr); 481 } 482 if (!(osm->type & PC_GASM_RESTRICT)) { 483 reverse = SCATTER_REVERSE_LOCAL; 484 } 485 486 ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 487 ierr = VecZeroEntries(y);CHKERRQ(ierr); 488 ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr); 489 /* do the local solves */ 490 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */ 491 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 492 } 493 ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 494 ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "PCDestroy_GASM" 500 static PetscErrorCode PCDestroy_GASM(PC pc) 501 { 502 PC_GASM *osm = (PC_GASM*)pc->data; 503 PetscErrorCode ierr; 504 PetscInt i; 505 506 PetscFunctionBegin; 507 if (osm->ksp) { 508 for (i=0; i<osm->n; i++) { 509 ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr); 510 } 511 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 512 } 513 if (osm->pmat) { 514 if (osm->n > 0) { 515 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr); 516 } 517 } 518 for (i=0; i<osm->n; i++) { 519 ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr); 520 ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr); 521 } 522 ierr = PetscFree(osm->x);CHKERRQ(ierr); 523 ierr = PetscFree(osm->y);CHKERRQ(ierr); 524 ierr = VecDestroy(osm->gx); CHKERRQ(ierr); 525 ierr = VecDestroy(osm->gy); CHKERRQ(ierr); 526 527 ierr = VecScatterDestroy(osm->grestriction); CHKERRQ(ierr); 528 ierr = VecScatterDestroy(osm->gprolongation); CHKERRQ(ierr); 529 530 if (osm->is) { 531 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 532 } 533 ierr = PetscFree(osm);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "PCSetFromOptions_GASM" 539 static PetscErrorCode PCSetFromOptions_GASM(PC pc) { 540 PC_GASM *osm = (PC_GASM*)pc->data; 541 PetscErrorCode ierr; 542 PetscInt blocks,ovl; 543 PetscBool symset,flg; 544 PCGASMType gasmtype; 545 546 PetscFunctionBegin; 547 /* set the type to symmetric if matrix is symmetric */ 548 if (!osm->type_set && pc->pmat) { 549 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 550 if (symset && flg) { osm->type = PC_GASM_BASIC; } 551 } 552 ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr); 553 ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 554 if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); } 555 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 556 if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 557 flg = PETSC_FALSE; 558 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr); 559 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); } 560 ierr = PetscOptionsTail();CHKERRQ(ierr); 561 PetscFunctionReturn(0); 562 } 563 564 /*------------------------------------------------------------------------------------*/ 565 566 EXTERN_C_BEGIN 567 #undef __FUNCT__ 568 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM" 569 PetscErrorCode PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[]) 570 { 571 PC_GASM *osm = (PC_GASM*)pc->data; 572 PetscErrorCode ierr; 573 PetscInt i; 574 575 PetscFunctionBegin; 576 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 577 if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp()."); 578 579 if (!pc->setupcalled) { 580 osm->n = n; 581 osm->is = 0; 582 osm->is_local = 0; 583 if (is) { 584 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 585 } 586 if (is_local) { 587 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 588 } 589 if (osm->is) { 590 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 591 } 592 if (is) { 593 ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr); 594 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 595 /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */ 596 osm->overlap = -1; 597 } 598 if (is_local) { 599 ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr); 600 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 601 } 602 } 603 PetscFunctionReturn(0); 604 } 605 EXTERN_C_END 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM" 610 PetscErrorCode PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) { 611 PC_GASM *osm = (PC_GASM*)pc->data; 612 PetscErrorCode ierr; 613 PetscMPIInt rank,size; 614 PetscInt n; 615 616 PetscFunctionBegin; 617 if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 618 619 /* 620 Split the subdomains equally among all processors 621 */ 622 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 623 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 624 n = N/size + ((N % size) > rank); 625 if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 626 if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp()."); 627 if (!pc->setupcalled) { 628 if (osm->is) { 629 ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); 630 } 631 osm->N = N; 632 osm->n = n; 633 osm->is = 0; 634 osm->is_local = 0; 635 } 636 PetscFunctionReturn(0); 637 }/* PCGASMSetTotalSubdomains_GASM() */ 638 EXTERN_C_END 639 640 EXTERN_C_BEGIN 641 #undef __FUNCT__ 642 #define __FUNCT__ "PCGASMSetOverlap_GASM" 643 PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl) 644 { 645 PC_GASM *osm = (PC_GASM*)pc->data; 646 647 PetscFunctionBegin; 648 if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 649 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp()."); 650 if (!pc->setupcalled) { 651 osm->overlap = ovl; 652 } 653 PetscFunctionReturn(0); 654 } 655 EXTERN_C_END 656 657 EXTERN_C_BEGIN 658 #undef __FUNCT__ 659 #define __FUNCT__ "PCGASMSetType_GASM" 660 PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type) 661 { 662 PC_GASM *osm = (PC_GASM*)pc->data; 663 664 PetscFunctionBegin; 665 osm->type = type; 666 osm->type_set = PETSC_TRUE; 667 PetscFunctionReturn(0); 668 } 669 EXTERN_C_END 670 671 EXTERN_C_BEGIN 672 #undef __FUNCT__ 673 #define __FUNCT__ "PCGASMSetSortIndices_GASM" 674 PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort) 675 { 676 PC_GASM *osm = (PC_GASM*)pc->data; 677 678 PetscFunctionBegin; 679 osm->sort_indices = doSort; 680 PetscFunctionReturn(0); 681 } 682 EXTERN_C_END 683 684 EXTERN_C_BEGIN 685 #undef __FUNCT__ 686 #define __FUNCT__ "PCGASMGetSubKSP_GASM" 687 /* 688 FIX: This routine might need to be modified once multiple ranks per subdomain are allowed. 689 In particular, it would upset the global subdomain number calculation. 690 */ 691 PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp) 692 { 693 PC_GASM *osm = (PC_GASM*)pc->data; 694 PetscErrorCode ierr; 695 696 PetscFunctionBegin; 697 if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 698 699 if (n) { 700 *n = osm->n; 701 } 702 if (first) { 703 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 704 *first -= osm->n; 705 } 706 if (ksp) { 707 /* Assume that local solves are now different; not necessarily 708 true though! This flag is used only for PCView_GASM() */ 709 *ksp = osm->ksp; 710 osm->same_local_solves = PETSC_FALSE; 711 } 712 PetscFunctionReturn(0); 713 }/* PCGASMGetSubKSP_GASM() */ 714 EXTERN_C_END 715 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "PCGASMSetLocalSubdomains" 719 /*@C 720 PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor 721 only) for the additive Schwarz preconditioner. 722 723 Collective on PC 724 725 Input Parameters: 726 + pc - the preconditioner context 727 . n - the number of subdomains for this processor (default value = 1) 728 . is - the index set that defines the subdomains for this processor 729 (or PETSC_NULL for PETSc to determine subdomains) 730 - is_local - the index sets that define the local part of the subdomains for this processor 731 (or PETSC_NULL to use the default of 1 subdomain per process) 732 733 Notes: 734 The IS numbering is in the parallel, global numbering of the vector. 735 736 By default the GASM preconditioner uses 1 block per processor. 737 738 Use PCGASMSetTotalSubdomains() to set the subdomains for all processors. 739 740 Level: advanced 741 742 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 743 744 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 745 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 746 @*/ 747 PetscErrorCode PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 748 { 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 753 ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "PCGASMSetTotalSubdomains" 759 /*@C 760 PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the 761 additive Schwarz preconditioner. Either all or no processors in the 762 PC communicator must call this routine, with the same index sets. 763 764 Collective on PC 765 766 Input Parameters: 767 + pc - the preconditioner context 768 . n - the number of subdomains for all processors 769 . is - the index sets that define the subdomains for all processor 770 (or PETSC_NULL for PETSc to determine subdomains) 771 - is_local - the index sets that define the local part of the subdomains for this processor 772 (or PETSC_NULL to use the default of 1 subdomain per process) 773 774 Options Database Key: 775 To set the total number of subdomain blocks rather than specify the 776 index sets, use the option 777 . -pc_gasm_blocks <blks> - Sets total blocks 778 779 Notes: 780 Currently you cannot use this to set the actual subdomains with the argument is. 781 782 By default the GASM preconditioner uses 1 block per processor. 783 784 These index sets cannot be destroyed until after completion of the 785 linear solves for which the GASM preconditioner is being used. 786 787 Use PCGASMSetLocalSubdomains() to set local subdomains. 788 789 Level: advanced 790 791 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz 792 793 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 794 PCGASMCreateSubdomains2D() 795 @*/ 796 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N) 797 { 798 PetscErrorCode ierr; 799 800 PetscFunctionBegin; 801 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 802 ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr); 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "PCGASMSetOverlap" 808 /*@ 809 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the 810 additive Schwarz preconditioner. Either all or no processors in the 811 PC communicator must call this routine. 812 813 Logically Collective on PC 814 815 Input Parameters: 816 + pc - the preconditioner context 817 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 818 819 Options Database Key: 820 . -pc_gasm_overlap <ovl> - Sets overlap 821 822 Notes: 823 By default the GASM preconditioner uses 1 block per processor. To use 824 multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and 825 PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>). 826 827 The overlap defaults to 1, so if one desires that no additional 828 overlap be computed beyond what may have been set with a call to 829 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl 830 must be set to be 0. In particular, if one does not explicitly set 831 the subdomains an application code, then all overlap would be computed 832 internally by PETSc, and using an overlap of 0 would result in an GASM 833 variant that is equivalent to the block Jacobi preconditioner. 834 835 Note that one can define initial index sets with any overlap via 836 PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine 837 PCGASMSetOverlap() merely allows PETSc to extend that overlap further 838 if desired. 839 840 Level: intermediate 841 842 .keywords: PC, GASM, set, overlap 843 844 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 845 PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains() 846 @*/ 847 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl) 848 { 849 PetscErrorCode ierr; 850 851 PetscFunctionBegin; 852 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 853 PetscValidLogicalCollectiveInt(pc,ovl,2); 854 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "PCGASMSetType" 860 /*@ 861 PCGASMSetType - Sets the type of restriction and interpolation used 862 for local problems in the additive Schwarz method. 863 864 Logically Collective on PC 865 866 Input Parameters: 867 + pc - the preconditioner context 868 - type - variant of GASM, one of 869 .vb 870 PC_GASM_BASIC - full interpolation and restriction 871 PC_GASM_RESTRICT - full restriction, local processor interpolation 872 PC_GASM_INTERPOLATE - full interpolation, local processor restriction 873 PC_GASM_NONE - local processor restriction and interpolation 874 .ve 875 876 Options Database Key: 877 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 878 879 Level: intermediate 880 881 .keywords: PC, GASM, set, type 882 883 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 884 PCGASMCreateSubdomains2D() 885 @*/ 886 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type) 887 { 888 PetscErrorCode ierr; 889 890 PetscFunctionBegin; 891 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 892 PetscValidLogicalCollectiveEnum(pc,type,2); 893 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr); 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "PCGASMSetSortIndices" 899 /*@ 900 PCGASMSetSortIndices - Determines whether subdomain indices are sorted. 901 902 Logically Collective on PC 903 904 Input Parameters: 905 + pc - the preconditioner context 906 - doSort - sort the subdomain indices 907 908 Level: intermediate 909 910 .keywords: PC, GASM, set, type 911 912 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(), 913 PCGASMCreateSubdomains2D() 914 @*/ 915 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort) 916 { 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 921 PetscValidLogicalCollectiveBool(pc,doSort,2); 922 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 926 #undef __FUNCT__ 927 #define __FUNCT__ "PCGASMGetSubKSP" 928 /*@C 929 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on 930 this processor. 931 932 Collective on PC iff first_local is requested 933 934 Input Parameter: 935 . pc - the preconditioner context 936 937 Output Parameters: 938 + n_local - the number of blocks on this processor or PETSC_NULL 939 . first_local - the global number of the first block on this processor or PETSC_NULL, 940 all processors must request or all must pass PETSC_NULL 941 - ksp - the array of KSP contexts 942 943 Note: 944 After PCGASMGetSubKSP() the array of KSPes is not to be freed 945 946 Currently for some matrix implementations only 1 block per processor 947 is supported. 948 949 You must call KSPSetUp() before calling PCGASMGetSubKSP(). 950 951 Level: advanced 952 953 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context 954 955 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), 956 PCGASMCreateSubdomains2D(), 957 @*/ 958 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 959 { 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 964 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 /* -------------------------------------------------------------------------------------*/ 969 /*MC 970 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 971 its own KSP object. 972 973 Options Database Keys: 974 + -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal() 975 . -pc_gasm_blocks <blks> - Sets total blocks 976 . -pc_gasm_overlap <ovl> - Sets overlap 977 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type 978 979 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 980 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use 981 -pc_gasm_type basic to use the standard GASM. 982 983 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 984 than one processor. Defaults to one block per processor. 985 986 To set options on the solvers for each block append -sub_ to all the KSP, and PC 987 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 988 989 To set the options on the solvers separate for each block call PCGASMGetSubKSP() 990 and set the options directly on the resulting KSP object (you can access its PC 991 with KSPGetPC()) 992 993 994 Level: beginner 995 996 Concepts: additive Schwarz method 997 998 References: 999 An additive variant of the Schwarz alternating method for the case of many subregions 1000 M Dryja, OB Widlund - Courant Institute, New York University Technical report 1001 1002 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1003 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 1004 1005 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 1006 PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(), 1007 PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType() 1008 1009 M*/ 1010 1011 EXTERN_C_BEGIN 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "PCCreate_GASM" 1014 PetscErrorCode PCCreate_GASM(PC pc) 1015 { 1016 PetscErrorCode ierr; 1017 PC_GASM *osm; 1018 1019 PetscFunctionBegin; 1020 ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr); 1021 osm->N = PETSC_DECIDE; 1022 osm->n = 0; 1023 osm->nmax = 0; 1024 osm->overlap = 1; 1025 osm->ksp = 0; 1026 osm->grestriction = 0; 1027 osm->gprolongation = 0; 1028 osm->gx = 0; 1029 osm->gy = 0; 1030 osm->x = 0; 1031 osm->y = 0; 1032 osm->is = 0; 1033 osm->is_local = 0; 1034 osm->pmat = 0; 1035 osm->type = PC_GASM_RESTRICT; 1036 osm->same_local_solves = PETSC_TRUE; 1037 osm->sort_indices = PETSC_TRUE; 1038 1039 pc->data = (void*)osm; 1040 pc->ops->apply = PCApply_GASM; 1041 pc->ops->applytranspose = PCApplyTranspose_GASM; 1042 pc->ops->setup = PCSetUp_GASM; 1043 pc->ops->destroy = PCDestroy_GASM; 1044 pc->ops->setfromoptions = PCSetFromOptions_GASM; 1045 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM; 1046 pc->ops->view = PCView_GASM; 1047 pc->ops->applyrichardson = 0; 1048 1049 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM", 1050 PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr); 1051 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM", 1052 PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr); 1053 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM", 1054 PCGASMSetOverlap_GASM);CHKERRQ(ierr); 1055 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM", 1056 PCGASMSetType_GASM);CHKERRQ(ierr); 1057 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM", 1058 PCGASMSetSortIndices_GASM);CHKERRQ(ierr); 1059 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM", 1060 PCGASMGetSubKSP_GASM);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 EXTERN_C_END 1064 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "PCGASMCreateSubdomains" 1068 /*@C 1069 PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 1070 preconditioner for a any problem on a general grid. 1071 1072 Collective 1073 1074 Input Parameters: 1075 + A - The global matrix operator 1076 - n - the number of local blocks 1077 1078 Output Parameters: 1079 . outis - the array of index sets defining the subdomains 1080 1081 Level: advanced 1082 1083 Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap 1084 from these if you use PCGASMSetLocalSubdomains() 1085 1086 In the Fortran version you must provide the array outis[] already allocated of length n. 1087 1088 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1089 1090 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains() 1091 @*/ 1092 PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 1093 { 1094 MatPartitioning mpart; 1095 const char *prefix; 1096 PetscErrorCode (*f)(Mat,PetscBool *,MatReuse,Mat*); 1097 PetscMPIInt size; 1098 PetscInt i,j,rstart,rend,bs; 1099 PetscBool iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1100 Mat Ad = PETSC_NULL, adj; 1101 IS ispart,isnumb,*is; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1106 PetscValidPointer(outis,3); 1107 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n); 1108 1109 /* Get prefix, row distribution, and block size */ 1110 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1111 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1112 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1113 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1114 1115 /* Get diagonal block from matrix if possible */ 1116 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1117 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1118 if (f) { 1119 ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr); 1120 } else if (size == 1) { 1121 iscopy = PETSC_FALSE; Ad = A; 1122 } else { 1123 iscopy = PETSC_FALSE; Ad = PETSC_NULL; 1124 } 1125 if (Ad) { 1126 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1127 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1128 } 1129 if (Ad && n > 1) { 1130 PetscBool match,done; 1131 /* Try to setup a good matrix partitioning if available */ 1132 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1133 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1134 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1135 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr); 1136 if (!match) { 1137 ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr); 1138 } 1139 if (!match) { /* assume a "good" partitioner is available */ 1140 PetscInt na,*ia,*ja; 1141 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1142 if (done) { 1143 /* Build adjacency matrix by hand. Unfortunately a call to 1144 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1145 remove the block-aij structure and we cannot expect 1146 MatPartitioning to split vertices as we need */ 1147 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1148 nnz = 0; 1149 for (i=0; i<na; i++) { /* count number of nonzeros */ 1150 len = ia[i+1] - ia[i]; 1151 row = ja + ia[i]; 1152 for (j=0; j<len; j++) { 1153 if (row[j] == i) { /* don't count diagonal */ 1154 len--; break; 1155 } 1156 } 1157 nnz += len; 1158 } 1159 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1160 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1161 nnz = 0; 1162 iia[0] = 0; 1163 for (i=0; i<na; i++) { /* fill adjacency */ 1164 cnt = 0; 1165 len = ia[i+1] - ia[i]; 1166 row = ja + ia[i]; 1167 for (j=0; j<len; j++) { 1168 if (row[j] != i) { /* if not diagonal */ 1169 jja[nnz+cnt++] = row[j]; 1170 } 1171 } 1172 nnz += cnt; 1173 iia[i+1] = nnz; 1174 } 1175 /* Partitioning of the adjacency matrix */ 1176 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1177 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1178 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1179 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1180 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1181 ierr = MatDestroy(adj);CHKERRQ(ierr); 1182 foundpart = PETSC_TRUE; 1183 } 1184 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1185 } 1186 ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr); 1187 } 1188 if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);} 1189 1190 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1191 *outis = is; 1192 1193 if (!foundpart) { 1194 1195 /* Partitioning by contiguous chunks of rows */ 1196 1197 PetscInt mbs = (rend-rstart)/bs; 1198 PetscInt start = rstart; 1199 for (i=0; i<n; i++) { 1200 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1201 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1202 start += count; 1203 } 1204 1205 } else { 1206 1207 /* Partitioning by adjacency of diagonal block */ 1208 1209 const PetscInt *numbering; 1210 PetscInt *count,nidx,*indices,*newidx,start=0; 1211 /* Get node count in each partition */ 1212 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1213 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1214 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1215 for (i=0; i<n; i++) count[i] *= bs; 1216 } 1217 /* Build indices from node numbering */ 1218 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1219 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1220 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1221 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1222 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1223 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1224 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1225 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1226 for (i=0; i<nidx; i++) 1227 for (j=0; j<bs; j++) 1228 newidx[i*bs+j] = indices[i]*bs + j; 1229 ierr = PetscFree(indices);CHKERRQ(ierr); 1230 nidx *= bs; 1231 indices = newidx; 1232 } 1233 /* Shift to get global indices */ 1234 for (i=0; i<nidx; i++) indices[i] += rstart; 1235 1236 /* Build the index sets for each block */ 1237 for (i=0; i<n; i++) { 1238 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr); 1239 ierr = ISSort(is[i]);CHKERRQ(ierr); 1240 start += count[i]; 1241 } 1242 1243 ierr = PetscFree(count); 1244 ierr = PetscFree(indices); 1245 ierr = ISDestroy(isnumb);CHKERRQ(ierr); 1246 ierr = ISDestroy(ispart);CHKERRQ(ierr); 1247 1248 } 1249 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "PCGASMDestroySubdomains" 1255 /*@C 1256 PCGASMDestroySubdomains - Destroys the index sets created with 1257 PCGASMCreateSubdomains(). Should be called after setting subdomains 1258 with PCGASMSetLocalSubdomains(). 1259 1260 Collective 1261 1262 Input Parameters: 1263 + n - the number of index sets 1264 . is - the array of index sets 1265 - is_local - the array of local index sets, can be PETSC_NULL 1266 1267 Level: advanced 1268 1269 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid 1270 1271 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains() 1272 @*/ 1273 PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1274 { 1275 PetscInt i; 1276 PetscErrorCode ierr; 1277 PetscFunctionBegin; 1278 if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1279 PetscValidPointer(is,2); 1280 for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } 1281 ierr = PetscFree(is);CHKERRQ(ierr); 1282 if (is_local) { 1283 PetscValidPointer(is_local,3); 1284 for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); } 1285 ierr = PetscFree(is_local);CHKERRQ(ierr); 1286 } 1287 PetscFunctionReturn(0); 1288 } 1289 1290 1291 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \ 1292 { \ 1293 PetscInt first_row = first/M, last_row = last/M+1; \ 1294 /* \ 1295 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \ 1296 of the bounding box of the intersection of the subdomain with the local ownership range (local \ 1297 subdomain). \ 1298 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \ 1299 of the intersection. \ 1300 */ \ 1301 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \ 1302 *ylow_loc = PetscMax(first_row,ylow); \ 1303 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1304 *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft; \ 1305 /* yhigh_loc is the grid row above the last local subdomain element */ \ 1306 *yhigh_loc = PetscMin(last_row,yhigh); \ 1307 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \ 1308 *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright; \ 1309 /* Now compute the size of the local subdomain n. */ \ 1310 *n = 0; \ 1311 if(*ylow_loc < *yhigh_loc) { \ 1312 PetscInt width = xright-xleft; \ 1313 *n += width*(*yhigh_loc-*ylow_loc-1); \ 1314 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \ 1315 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \ 1316 }\ 1317 } 1318 1319 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "PCGASMCreateSubdomains2D" 1323 /*@ 1324 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1325 preconditioner for a two-dimensional problem on a regular grid. 1326 1327 Collective 1328 1329 Input Parameters: 1330 + M, N - the global number of mesh points in the x and y directions 1331 . Mdomains, Ndomains - the global number of subdomains in the x and y directions 1332 . dof - degrees of freedom per node 1333 - overlap - overlap in mesh lines 1334 1335 Output Parameters: 1336 + Nsub - the number of local subdomains created 1337 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1338 - is_local - array of index sets defining non-overlapping subdomains 1339 1340 Note: 1341 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1342 preconditioners. More general related routines are 1343 PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains(). 1344 1345 Level: advanced 1346 1347 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid 1348 1349 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(), 1350 PCGASMSetOverlap() 1351 @*/ 1352 PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local) 1353 { 1354 PetscErrorCode ierr; 1355 PetscMPIInt size, rank; 1356 PetscInt i, j; 1357 PetscInt maxheight, maxwidth; 1358 PetscInt xstart, xleft, xright, xleft_loc, xright_loc; 1359 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc; 1360 PetscInt x[2][2], y[2][2], n[2]; 1361 PetscInt first, last; 1362 PetscInt nidx, *idx; 1363 PetscInt ii,jj,s,q,d; 1364 PetscInt k,kk; 1365 PetscMPIInt color; 1366 MPI_Comm comm, subcomm; 1367 1368 IS **iis; 1369 PetscFunctionBegin; 1370 ierr = PetscObjectGetComm((PetscObject)pc, &comm); CHKERRQ(ierr); 1371 ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 1372 ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 1373 ierr = MatGetOwnershipRange(pc->pmat, &first, &last); CHKERRQ(ierr); 1374 if(first%dof || last%dof) { 1375 SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) " 1376 "does not respect the number of degrees of freedom per grid point %D", first, last, dof); 1377 } 1378 /* Determine the number of domains with nonzero intersections with the local ownership range. */ 1379 s = 0; 1380 ystart = 0; 1381 for (j=0; j<Ndomains; ++j) { 1382 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1383 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1384 /* Vertical domain limits with an overlap. */ 1385 ylow = PetscMax(ystart - overlap,0); 1386 yhigh = PetscMin(ystart + maxheight + overlap,N); 1387 xstart = 0; 1388 for (i=0; i<Mdomains; ++i) { 1389 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1390 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1391 /* Horizontal domain limits with an overlap. */ 1392 xleft = PetscMax(xstart - overlap,0); 1393 xright = PetscMin(xstart + maxwidth + overlap,M); 1394 /* 1395 Determine whether this subdomain intersects this processor's ownership range of pc->pmat. 1396 */ 1397 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1398 if(nidx) { 1399 ++s; 1400 } 1401 xstart += maxwidth; 1402 }/* for(i = 0; i < Mdomains; ++i) */ 1403 ystart += maxheight; 1404 }/* for(j = 0; j < Ndomains; ++j) */ 1405 /* Now we can allocate the necessary number of ISs. */ 1406 *nsub = s; 1407 ierr = PetscMalloc((*nsub)*sizeof(IS*),is); CHKERRQ(ierr); 1408 ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local); CHKERRQ(ierr); 1409 s = 0; 1410 ystart = 0; 1411 for (j=0; j<Ndomains; ++j) { 1412 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */ 1413 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N); 1414 /* Vertical domain limits with an overlap. */ 1415 y[0][0] = PetscMax(ystart - overlap,0); 1416 y[0][1] = PetscMin(ystart + maxheight + overlap,N); 1417 /* Vertical domain limits without an overlap. */ 1418 y[1][0] = ystart; 1419 y[1][1] = PetscMin(ystart + maxheight,N); 1420 xstart = 0; 1421 for (i=0; i<Mdomains; ++i) { 1422 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */ 1423 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M); 1424 /* Horizontal domain limits with an overlap. */ 1425 x[0][0] = PetscMax(xstart - overlap,0); 1426 x[0][1] = PetscMin(xstart + maxwidth + overlap,M); 1427 /* Horizontal domain limits without an overlap. */ 1428 x[1][0] = xstart; 1429 x[1][1] = PetscMin(xstart+maxwidth,M); 1430 /* 1431 Determine whether this domain intersects this processor's ownership range of pc->pmat. 1432 Do this twice: first for the domains with overlaps, and once without. 1433 During the first pass create the subcommunicators, and use them on the second pass as well. 1434 */ 1435 for(q = 0; q < 2; ++q) { 1436 /* 1437 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted 1438 according to whether the domain with an overlap or without is considered. 1439 */ 1440 xleft = x[q][0]; xright = x[q][1]; 1441 ylow = y[q][0]; yhigh = y[q][1]; 1442 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx)); 1443 nidx *= dof; 1444 n[q] = nidx; 1445 /* 1446 Based on the counted number of indices in the local domain *with an overlap*, 1447 construct a subcommunicator of all the processors supporting this domain. 1448 Observe that a domain with an overlap might have nontrivial local support, 1449 while the domain without an overlap might not. Hence, the decision to participate 1450 in the subcommunicator must be based on the domain with an overlap. 1451 */ 1452 if(q == 0) { 1453 if(nidx) { 1454 color = 1; 1455 } 1456 else { 1457 color = MPI_UNDEFINED; 1458 } 1459 ierr = MPI_Comm_split(comm, color, rank, &subcomm); CHKERRQ(ierr); 1460 } 1461 /* 1462 Proceed only if the number of local indices *with an overlap* is nonzero. 1463 */ 1464 if(n[0]) { 1465 if(q == 0) { 1466 iis = is; 1467 } 1468 if(q == 1) { 1469 /* 1470 The IS for the no-overlap subdomain shares a communicator with the overlapping domain. 1471 Moreover, if the overlap is zero, the two ISs are identical. 1472 */ 1473 if(overlap == 0) { 1474 (*is_local)[s] = (*is)[s]; 1475 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr); 1476 continue; 1477 } 1478 else { 1479 iis = is_local; 1480 subcomm = ((PetscObject)(*is)[s])->comm; 1481 } 1482 }/* if(q == 1) */ 1483 idx = PETSC_NULL; 1484 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1485 if(nidx) { 1486 k = 0; 1487 for (jj=ylow_loc; jj<yhigh_loc; ++jj) { 1488 PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft; 1489 PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright; 1490 kk = dof*(M*jj + x0); 1491 for (ii=x0; ii<x1; ++ii) { 1492 for(d = 0; d < dof; ++d) { 1493 idx[k++] = kk++; 1494 } 1495 } 1496 } 1497 } 1498 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr); 1499 }/* if(n[0]) */ 1500 }/* for(q = 0; q < 2; ++q) */ 1501 if(n[0]) { 1502 ++s; 1503 } 1504 xstart += maxwidth; 1505 }/* for(i = 0; i < Mdomains; ++i) */ 1506 ystart += maxheight; 1507 }/* for(j = 0; j < Ndomains; ++j) */ 1508 PetscFunctionReturn(0); 1509 } 1510 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "PCGASMGetLocalSubdomains" 1513 /*@C 1514 PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor 1515 only) for the additive Schwarz preconditioner. 1516 1517 Not Collective 1518 1519 Input Parameter: 1520 . pc - the preconditioner context 1521 1522 Output Parameters: 1523 + n - the number of subdomains for this processor (default value = 1) 1524 . is - the index sets that define the subdomains for this processor 1525 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1526 1527 1528 Notes: 1529 The IS numbering is in the parallel, global numbering of the vector. 1530 1531 Level: advanced 1532 1533 .keywords: PC, GASM, set, local, subdomains, additive Schwarz 1534 1535 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1536 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices() 1537 @*/ 1538 PetscErrorCode PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1539 { 1540 PC_GASM *osm; 1541 PetscErrorCode ierr; 1542 PetscBool match; 1543 1544 PetscFunctionBegin; 1545 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1546 PetscValidIntPointer(n,2); 1547 if (is) PetscValidPointer(is,3); 1548 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1549 if (!match) { 1550 if (n) *n = 0; 1551 if (is) *is = PETSC_NULL; 1552 } else { 1553 osm = (PC_GASM*)pc->data; 1554 if (n) *n = osm->n; 1555 if (is) *is = osm->is; 1556 if (is_local) *is_local = osm->is_local; 1557 } 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "PCGASMGetLocalSubmatrices" 1563 /*@C 1564 PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1565 only) for the additive Schwarz preconditioner. 1566 1567 Not Collective 1568 1569 Input Parameter: 1570 . pc - the preconditioner context 1571 1572 Output Parameters: 1573 + n - the number of matrices for this processor (default value = 1) 1574 - mat - the matrices 1575 1576 1577 Level: advanced 1578 1579 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi 1580 1581 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(), 1582 PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains() 1583 @*/ 1584 PetscErrorCode PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1585 { 1586 PC_GASM *osm; 1587 PetscErrorCode ierr; 1588 PetscBool match; 1589 1590 PetscFunctionBegin; 1591 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 1592 PetscValidIntPointer(n,2); 1593 if (mat) PetscValidPointer(mat,3); 1594 if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1595 ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr); 1596 if (!match) { 1597 if (n) *n = 0; 1598 if (mat) *mat = PETSC_NULL; 1599 } else { 1600 osm = (PC_GASM*)pc->data; 1601 if (n) *n = osm->n; 1602 if (mat) *mat = osm->pmat; 1603 } 1604 PetscFunctionReturn(0); 1605 } 1606