1 #define PETSCKSP_DLL 2 3 /* 4 This file defines an additive Schwarz preconditioner for any Mat implementation. 5 6 Note that each processor may have any number of subdomains. But in order to 7 deal easily with the VecScatter(), we treat each processor as if it has the 8 same number of subdomains. 9 10 n - total number of true subdomains on all processors 11 n_local_true - actual number of subdomains on this processor 12 n_local = maximum over all processors of n_local_true 13 */ 14 #include "private/pcimpl.h" /*I "petscpc.h" I*/ 15 16 typedef struct { 17 PetscInt n, n_local, n_local_true; 18 PetscInt overlap; /* overlap requested by user */ 19 KSP *ksp; /* linear solvers for each block */ 20 VecScatter *restriction; /* mapping from global to subregion */ 21 VecScatter *localization; /* mapping from overlapping to non-overlapping subregion */ 22 VecScatter *prolongation; /* mapping from subregion to global */ 23 Vec *x,*y,*y_local; /* work vectors */ 24 IS *is; /* index set that defines each overlapping subdomain */ 25 IS *is_local; /* index set that defines each non-overlapping subdomain, may be NULL */ 26 Mat *mat,*pmat; /* mat is not currently used */ 27 PCASMType type; /* use reduced interpolation, restriction or both */ 28 PetscTruth type_set; /* if user set this value (so won't change it for symmetric problems) */ 29 PetscTruth same_local_solves; /* flag indicating whether all local solvers are same */ 30 } PC_ASM; 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "PCView_ASM" 34 static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer) 35 { 36 PC_ASM *osm = (PC_ASM*)pc->data; 37 PetscErrorCode ierr; 38 PetscMPIInt rank; 39 PetscInt i,bsz; 40 PetscTruth iascii,isstring; 41 PetscViewer sviewer; 42 43 44 PetscFunctionBegin; 45 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 46 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr); 47 if (iascii) { 48 if (osm->n > 0) { 49 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: total subdomain blocks = %D, amount of overlap = %D\n",osm->n,osm->overlap);CHKERRQ(ierr); 50 } else { 51 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: total subdomain blocks not yet set, amount of overlap = %D\n",osm->overlap);CHKERRQ(ierr); 52 } 53 ierr = PetscViewerASCIIPrintf(viewer," Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);CHKERRQ(ierr); 54 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 55 if (osm->same_local_solves) { 56 if (osm->ksp) { 57 ierr = PetscViewerASCIIPrintf(viewer," Local solve is same for all blocks, in the following KSP and PC objects:\n");CHKERRQ(ierr); 58 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 59 if (!rank) { 60 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 61 ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr); 62 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 63 } 64 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 65 } 66 } else { 67 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);CHKERRQ(ierr); 68 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 69 ierr = PetscViewerASCIIPrintf(viewer," Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr); 70 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 71 ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 72 for (i=0; i<osm->n_local; i++) { 73 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 74 if (i < osm->n_local_true) { 75 ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr); 76 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);CHKERRQ(ierr); 77 ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr); 78 ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr); 79 } 80 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 81 } 82 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 83 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 84 } 85 } else if (isstring) { 86 ierr = PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);CHKERRQ(ierr); 87 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 88 if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);} 89 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 90 } else { 91 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name); 92 } 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCASMPrintSubdomains" 98 static PetscErrorCode PCASMPrintSubdomains(PC pc) 99 { 100 PC_ASM *osm = (PC_ASM*)pc->data; 101 const char *prefix; 102 char fname[PETSC_MAX_PATH_LEN+1]; 103 PetscViewer viewer; 104 PetscInt i,j,nidx; 105 const PetscInt *idx; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 110 ierr = PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",fname,PETSC_MAX_PATH_LEN,PETSC_NULL);CHKERRQ(ierr); 111 if (fname[0] == 0) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); }; 112 ierr = PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);CHKERRQ(ierr); 113 for (i=0;i<osm->n_local_true;i++) { 114 ierr = ISGetLocalSize(osm->is[i],&nidx);CHKERRQ(ierr); 115 ierr = ISGetIndices(osm->is[i],&idx);CHKERRQ(ierr); 116 for (j=0; j<nidx; j++) { 117 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr); 118 } 119 ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr); 120 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); 121 if (osm->is_local) { 122 ierr = ISGetLocalSize(osm->is_local[i],&nidx);CHKERRQ(ierr); 123 ierr = ISGetIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 124 for (j=0; j<nidx; j++) { 125 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr); 126 } 127 ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr); 128 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr); 129 } 130 } 131 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 132 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "PCSetUp_ASM" 138 static PetscErrorCode PCSetUp_ASM(PC pc) 139 { 140 PC_ASM *osm = (PC_ASM*)pc->data; 141 PetscErrorCode ierr; 142 PetscTruth symset,flg; 143 PetscInt i,m,m_local,firstRow,lastRow; 144 PetscMPIInt size; 145 MatReuse scall = MAT_REUSE_MATRIX; 146 IS isl; 147 KSP ksp; 148 PC subpc; 149 const char *prefix,*pprefix; 150 Vec vec; 151 152 PetscFunctionBegin; 153 if (!pc->setupcalled) { 154 155 if (!osm->type_set) { 156 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 157 if (symset && flg) { osm->type = PC_ASM_BASIC; } 158 } 159 160 if (osm->n == PETSC_DECIDE && osm->n_local_true < 1) { 161 /* no subdomains given, use one per processor */ 162 osm->n_local = osm->n_local_true = 1; 163 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 164 osm->n = size; 165 } else if (osm->n == PETSC_DECIDE) { 166 /* determine global number of subdomains */ 167 PetscInt inwork[2],outwork[2]; 168 inwork[0] = inwork[1] = osm->n_local_true; 169 ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr); 170 osm->n_local = outwork[0]; 171 osm->n = outwork[1]; 172 } 173 174 if (!osm->is){ /* create the index sets */ 175 ierr = PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);CHKERRQ(ierr); 176 } 177 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 178 flg = PETSC_FALSE; 179 ierr = PetscOptionsGetTruth(prefix,"-pc_asm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr); 180 if (flg) { ierr = PCASMPrintSubdomains(pc);CHKERRQ(ierr); } 181 182 /* Extend the "overlapping" regions by a number of steps */ 183 ierr = MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);CHKERRQ(ierr); 184 for (i=0; i<osm->n_local_true; i++) { 185 ierr = ISSort(osm->is[i]);CHKERRQ(ierr); 186 if (osm->is_local) { 187 ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr); 188 } 189 } 190 191 /* Create the local work vectors and scatter contexts */ 192 ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr); 193 ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->restriction);CHKERRQ(ierr); 194 if (osm->is_local) {ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->localization);CHKERRQ(ierr);} 195 ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->prolongation);CHKERRQ(ierr); 196 ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->x);CHKERRQ(ierr); 197 ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y);CHKERRQ(ierr); 198 ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y_local);CHKERRQ(ierr); 199 ierr = VecGetOwnershipRange(vec, &firstRow, &lastRow);CHKERRQ(ierr); 200 for (i=0; i<osm->n_local_true; ++i, firstRow += m_local) { 201 ierr = ISGetLocalSize(osm->is[i],&m);CHKERRQ(ierr); 202 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);CHKERRQ(ierr); 203 ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr); 204 ierr = VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr); 205 ierr = ISDestroy(isl);CHKERRQ(ierr); 206 ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr); 207 if (osm->is_local) { 208 IS isShift; 209 const PetscInt *idx_local; 210 PetscInt *idx, j; 211 212 ierr = ISGetLocalSize(osm->is_local[i],&m_local);CHKERRQ(ierr); 213 ierr = ISGetIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr); 214 ierr = PetscMalloc(m*sizeof(PetscInt), &idx);CHKERRQ(ierr); 215 for(j = 0; j < m_local; ++j) { 216 idx[j] = idx_local[j] - firstRow; 217 } 218 ierr = ISRestoreIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr); 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,m_local,idx,&isShift);CHKERRQ(ierr); 220 ierr = PetscFree(idx);CHKERRQ(ierr); 221 ierr = ISCreateStride(PETSC_COMM_SELF,m_local,0,1,&isl);CHKERRQ(ierr); 222 ierr = VecCreateSeq(PETSC_COMM_SELF,m_local,&osm->y_local[i]);CHKERRQ(ierr); 223 ierr = VecScatterCreate(osm->y[i],isShift,osm->y_local[i],isl,&osm->localization[i]);CHKERRQ(ierr); 224 ierr = ISDestroy(isShift);CHKERRQ(ierr); 225 226 ierr = VecScatterCreate(vec,osm->is_local[i],osm->y_local[i],isl,&osm->prolongation[i]);CHKERRQ(ierr); 227 ierr = ISDestroy(isl);CHKERRQ(ierr); 228 } else { 229 ierr = VecGetLocalSize(vec,&m_local);CHKERRQ(ierr); 230 osm->y_local[i] = osm->y[i]; 231 ierr = PetscObjectReference((PetscObject) osm->y[i]);CHKERRQ(ierr); 232 osm->prolongation[i] = osm->restriction[i]; 233 ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr); 234 } 235 } 236 if (firstRow != lastRow) SETERRQ2(PETSC_ERR_PLIB, "Specified ASM subdomain sizes were invalid: %d != %d", firstRow, lastRow); 237 for (i=osm->n_local_true; i<osm->n_local; i++) { 238 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);CHKERRQ(ierr); 239 ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr); 240 ierr = VecDuplicate(osm->x[i],&osm->y_local[i]);CHKERRQ(ierr); 241 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);CHKERRQ(ierr); 242 ierr = VecScatterCreate(vec,isl,osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr); 243 osm->prolongation[i] = osm->restriction[i]; 244 ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr); 245 ierr = ISDestroy(isl);CHKERRQ(ierr); 246 } 247 ierr = VecDestroy(vec);CHKERRQ(ierr); 248 249 /* Create the local solvers */ 250 ierr = PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr); 251 for (i=0; i<osm->n_local_true; i++) { 252 ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr); 253 ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr); 254 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr); 255 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr); 256 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); 257 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 258 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr); 259 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr); 260 osm->ksp[i] = ksp; 261 } 262 scall = MAT_INITIAL_MATRIX; 263 264 } else { 265 /* 266 Destroy the blocks from the previous iteration 267 */ 268 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 269 ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr); 270 scall = MAT_INITIAL_MATRIX; 271 } 272 } 273 274 /* 275 Extract out the submatrices 276 */ 277 ierr = MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);CHKERRQ(ierr); 278 if (scall == MAT_INITIAL_MATRIX) { 279 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr); 280 for (i=0; i<osm->n_local_true; i++) { 281 ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr); 282 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr); 283 } 284 } 285 286 /* Return control to the user so that the submatrices can be modified (e.g., to apply 287 different boundary conditions for the submatrices than for the global problem) */ 288 ierr = PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr); 289 290 /* 291 Loop over subdomains putting them into local ksp 292 */ 293 for (i=0; i<osm->n_local_true; i++) { 294 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr); 295 if (!pc->setupcalled) { 296 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr); 297 } 298 } 299 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "PCSetUpOnBlocks_ASM" 305 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc) 306 { 307 PC_ASM *osm = (PC_ASM*)pc->data; 308 PetscErrorCode ierr; 309 PetscInt i; 310 311 PetscFunctionBegin; 312 for (i=0; i<osm->n_local_true; i++) { 313 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr); 314 } 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "PCApply_ASM" 320 static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y) 321 { 322 PC_ASM *osm = (PC_ASM*)pc->data; 323 PetscErrorCode ierr; 324 PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true; 325 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 326 327 PetscFunctionBegin; 328 /* 329 Support for limiting the restriction or interpolation to only local 330 subdomain values (leaving the other values 0). 331 */ 332 if (!(osm->type & PC_ASM_RESTRICT)) { 333 forward = SCATTER_FORWARD_LOCAL; 334 /* have to zero the work RHS since scatter may leave some slots empty */ 335 for (i=0; i<n_local_true; i++) { 336 ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr); 337 } 338 } 339 if (!(osm->type & PC_ASM_INTERPOLATE)) { 340 reverse = SCATTER_REVERSE_LOCAL; 341 } 342 343 for (i=0; i<n_local; i++) { 344 ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 345 } 346 ierr = VecSet(y,0.0);CHKERRQ(ierr); 347 /* do the local solves */ 348 for (i=0; i<n_local_true; i++) { 349 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 350 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 351 if (osm->localization) { 352 ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 353 ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 354 } 355 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 356 } 357 /* handle the rest of the scatters that do not have local solves */ 358 for (i=n_local_true; i<n_local; i++) { 359 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 360 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 361 } 362 for (i=0; i<n_local; i++) { 363 ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 364 } 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "PCApplyTranspose_ASM" 370 static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y) 371 { 372 PC_ASM *osm = (PC_ASM*)pc->data; 373 PetscErrorCode ierr; 374 PetscInt i,n_local = osm->n_local,n_local_true = osm->n_local_true; 375 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE; 376 377 PetscFunctionBegin; 378 /* 379 Support for limiting the restriction or interpolation to only local 380 subdomain values (leaving the other values 0). 381 382 Note: these are reversed from the PCApply_ASM() because we are applying the 383 transpose of the three terms 384 */ 385 if (!(osm->type & PC_ASM_INTERPOLATE)) { 386 forward = SCATTER_FORWARD_LOCAL; 387 /* have to zero the work RHS since scatter may leave some slots empty */ 388 for (i=0; i<n_local_true; i++) { 389 ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr); 390 } 391 } 392 if (!(osm->type & PC_ASM_RESTRICT)) { 393 reverse = SCATTER_REVERSE_LOCAL; 394 } 395 396 for (i=0; i<n_local; i++) { 397 ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 398 } 399 ierr = VecSet(y,0.0);CHKERRQ(ierr); 400 /* do the local solves */ 401 for (i=0; i<n_local_true; i++) { 402 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 403 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr); 404 if (osm->localization) { 405 ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 406 ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr); 407 } 408 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 409 } 410 /* handle the rest of the scatters that do not have local solves */ 411 for (i=n_local_true; i<n_local; i++) { 412 ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr); 413 ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 414 } 415 for (i=0; i<n_local; i++) { 416 ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr); 417 } 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "PCDestroy_ASM" 423 static PetscErrorCode PCDestroy_ASM(PC pc) 424 { 425 PC_ASM *osm = (PC_ASM*)pc->data; 426 PetscErrorCode ierr; 427 PetscInt i; 428 429 PetscFunctionBegin; 430 if (osm->ksp) { 431 for (i=0; i<osm->n_local_true; i++) { 432 ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr); 433 } 434 ierr = PetscFree(osm->ksp);CHKERRQ(ierr); 435 } 436 if (osm->pmat) { 437 if (osm->n_local_true > 0) { 438 ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr); 439 } 440 } 441 if (osm->restriction) { 442 for (i=0; i<osm->n_local; i++) { 443 ierr = VecScatterDestroy(osm->restriction[i]);CHKERRQ(ierr); 444 if (osm->localization) {ierr = VecScatterDestroy(osm->localization[i]);CHKERRQ(ierr);} 445 ierr = VecScatterDestroy(osm->prolongation[i]);CHKERRQ(ierr); 446 ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr); 447 ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr); 448 ierr = VecDestroy(osm->y_local[i]);CHKERRQ(ierr); 449 } 450 ierr = PetscFree(osm->restriction);CHKERRQ(ierr); 451 if (osm->localization) {ierr = PetscFree(osm->localization);CHKERRQ(ierr);} 452 ierr = PetscFree(osm->prolongation);CHKERRQ(ierr); 453 ierr = PetscFree(osm->x);CHKERRQ(ierr); 454 ierr = PetscFree(osm->y);CHKERRQ(ierr); 455 ierr = PetscFree(osm->y_local);CHKERRQ(ierr); 456 } 457 if (osm->is) { 458 ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr); 459 } 460 ierr = PetscFree(osm);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "PCSetFromOptions_ASM" 466 static PetscErrorCode PCSetFromOptions_ASM(PC pc) 467 { 468 PC_ASM *osm = (PC_ASM*)pc->data; 469 PetscErrorCode ierr; 470 PetscInt blocks,ovl; 471 PetscTruth symset,flg; 472 PCASMType asmtype; 473 474 PetscFunctionBegin; 475 /* set the type to symmetric if matrix is symmetric */ 476 if (!osm->type_set && pc->pmat) { 477 ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr); 478 if (symset && flg) { osm->type = PC_ASM_BASIC; } 479 } 480 ierr = PetscOptionsHead("Additive Schwarz options");CHKERRQ(ierr); 481 ierr = PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr); 482 if (flg) {ierr = PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); } 483 ierr = PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr); 484 if (flg) {ierr = PCASMSetOverlap(pc,ovl);CHKERRQ(ierr); } 485 flg = PETSC_FALSE; 486 ierr = PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);CHKERRQ(ierr); 487 if (flg) {ierr = PCASMSetType(pc,asmtype);CHKERRQ(ierr); } 488 ierr = PetscOptionsTail();CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 /*------------------------------------------------------------------------------------*/ 493 494 EXTERN_C_BEGIN 495 #undef __FUNCT__ 496 #define __FUNCT__ "PCASMSetLocalSubdomains_ASM" 497 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[]) 498 { 499 PC_ASM *osm = (PC_ASM*)pc->data; 500 PetscErrorCode ierr; 501 PetscInt i; 502 503 PetscFunctionBegin; 504 if (n < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n); 505 if (pc->setupcalled && (n != osm->n_local_true || is)) { 506 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetup()."); 507 } 508 if (!pc->setupcalled) { 509 if (is) { 510 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);} 511 } 512 if (is_local) { 513 for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);} 514 } 515 if (osm->is) { 516 ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr); 517 } 518 osm->n_local_true = n; 519 osm->is = 0; 520 osm->is_local = 0; 521 if (is) { 522 ierr = PetscMalloc(n*sizeof(IS *),&osm->is);CHKERRQ(ierr); 523 for (i=0; i<n; i++) { osm->is[i] = is[i]; } 524 } 525 if (is_local) { 526 ierr = PetscMalloc(n*sizeof(IS *),&osm->is_local);CHKERRQ(ierr); 527 for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; } 528 } 529 } 530 PetscFunctionReturn(0); 531 } 532 EXTERN_C_END 533 534 EXTERN_C_BEGIN 535 #undef __FUNCT__ 536 #define __FUNCT__ "PCASMSetTotalSubdomains_ASM" 537 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is) 538 { 539 PC_ASM *osm = (PC_ASM*)pc->data; 540 PetscErrorCode ierr; 541 PetscMPIInt rank,size; 542 PetscInt n; 543 544 PetscFunctionBegin; 545 if (N < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N); 546 if (is) SETERRQ(PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet."); 547 548 /* 549 Split the subdomains equally amoung all processors 550 */ 551 ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr); 552 ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr); 553 n = N/size + ((N % size) > rank); 554 if (!n) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N); 555 if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetup()."); 556 if (!pc->setupcalled) { 557 if (osm->is) { 558 ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr); 559 } 560 osm->n_local_true = n; 561 osm->is = 0; 562 osm->is_local = 0; 563 } 564 PetscFunctionReturn(0); 565 } 566 EXTERN_C_END 567 568 EXTERN_C_BEGIN 569 #undef __FUNCT__ 570 #define __FUNCT__ "PCASMSetOverlap_ASM" 571 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap_ASM(PC pc,PetscInt ovl) 572 { 573 PC_ASM *osm = (PC_ASM*)pc->data; 574 575 PetscFunctionBegin; 576 if (ovl < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested"); 577 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetup()."); 578 if (!pc->setupcalled) { 579 osm->overlap = ovl; 580 } 581 PetscFunctionReturn(0); 582 } 583 EXTERN_C_END 584 585 EXTERN_C_BEGIN 586 #undef __FUNCT__ 587 #define __FUNCT__ "PCASMSetType_ASM" 588 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType_ASM(PC pc,PCASMType type) 589 { 590 PC_ASM *osm = (PC_ASM*)pc->data; 591 592 PetscFunctionBegin; 593 osm->type = type; 594 osm->type_set = PETSC_TRUE; 595 PetscFunctionReturn(0); 596 } 597 EXTERN_C_END 598 599 EXTERN_C_BEGIN 600 #undef __FUNCT__ 601 #define __FUNCT__ "PCASMGetSubKSP_ASM" 602 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp) 603 { 604 PC_ASM *osm = (PC_ASM*)pc->data; 605 PetscErrorCode ierr; 606 607 PetscFunctionBegin; 608 if (osm->n_local_true < 1) { 609 SETERRQ(PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 610 } 611 612 if (n_local) { 613 *n_local = osm->n_local_true; 614 } 615 if (first_local) { 616 ierr = MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr); 617 *first_local -= osm->n_local_true; 618 } 619 if (ksp) { 620 /* Assume that local solves are now different; not necessarily 621 true though! This flag is used only for PCView_ASM() */ 622 *ksp = osm->ksp; 623 osm->same_local_solves = PETSC_FALSE; 624 } 625 PetscFunctionReturn(0); 626 } 627 EXTERN_C_END 628 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "PCASMSetLocalSubdomains" 632 /*@C 633 PCASMSetLocalSubdomains - Sets the local subdomains (for this processor 634 only) for the additive Schwarz preconditioner. 635 636 Collective on PC 637 638 Input Parameters: 639 + pc - the preconditioner context 640 . n - the number of subdomains for this processor (default value = 1) 641 . is - the index sets that define the subdomains for this processor 642 (or PETSC_NULL for PETSc to determine subdomains) 643 - is_local - the index sets that define the local part of the subdomains for this processor 644 (or PETSC_NULL to use the default of 1 subdomain per process) 645 646 Notes: 647 The IS numbering is in the parallel, global numbering of the vector. 648 649 By default the ASM preconditioner uses 1 block per processor. 650 651 These index sets cannot be destroyed until after completion of the 652 linear solves for which the ASM preconditioner is being used. 653 654 Use PCASMSetTotalSubdomains() to set the subdomains for all processors. 655 656 Level: advanced 657 658 .keywords: PC, ASM, set, local, subdomains, additive Schwarz 659 660 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 661 PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains() 662 @*/ 663 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[]) 664 { 665 PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]); 666 667 PetscFunctionBegin; 668 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 669 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr); 670 if (f) { 671 ierr = (*f)(pc,n,is,is_local);CHKERRQ(ierr); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "PCASMSetTotalSubdomains" 678 /*@C 679 PCASMSetTotalSubdomains - Sets the subdomains for all processor for the 680 additive Schwarz preconditioner. Either all or no processors in the 681 PC communicator must call this routine, with the same index sets. 682 683 Collective on PC 684 685 Input Parameters: 686 + pc - the preconditioner context 687 . n - the number of subdomains for all processors 688 . is - the index sets that define the subdomains for all processor 689 (or PETSC_NULL for PETSc to determine subdomains) 690 - is_local - the index sets that define the local part of the subdomains for this processor 691 (or PETSC_NULL to use the default of 1 subdomain per process) 692 693 Options Database Key: 694 To set the total number of subdomain blocks rather than specify the 695 index sets, use the option 696 . -pc_asm_blocks <blks> - Sets total blocks 697 698 Notes: 699 Currently you cannot use this to set the actual subdomains with the argument is. 700 701 By default the ASM preconditioner uses 1 block per processor. 702 703 These index sets cannot be destroyed until after completion of the 704 linear solves for which the ASM preconditioner is being used. 705 706 Use PCASMSetLocalSubdomains() to set local subdomains. 707 708 Level: advanced 709 710 .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz 711 712 .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 713 PCASMCreateSubdomains2D() 714 @*/ 715 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[]) 716 { 717 PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]); 718 719 PetscFunctionBegin; 720 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 721 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr); 722 if (f) { 723 ierr = (*f)(pc,N,is,is_local);CHKERRQ(ierr); 724 } 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "PCASMSetOverlap" 730 /*@ 731 PCASMSetOverlap - Sets the overlap between a pair of subdomains for the 732 additive Schwarz preconditioner. Either all or no processors in the 733 PC communicator must call this routine. 734 735 Collective on PC 736 737 Input Parameters: 738 + pc - the preconditioner context 739 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 740 741 Options Database Key: 742 . -pc_asm_overlap <ovl> - Sets overlap 743 744 Notes: 745 By default the ASM preconditioner uses 1 block per processor. To use 746 multiple blocks per perocessor, see PCASMSetTotalSubdomains() and 747 PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>). 748 749 The overlap defaults to 1, so if one desires that no additional 750 overlap be computed beyond what may have been set with a call to 751 PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl 752 must be set to be 0. In particular, if one does not explicitly set 753 the subdomains an application code, then all overlap would be computed 754 internally by PETSc, and using an overlap of 0 would result in an ASM 755 variant that is equivalent to the block Jacobi preconditioner. 756 757 Note that one can define initial index sets with any overlap via 758 PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine 759 PCASMSetOverlap() merely allows PETSc to extend that overlap further 760 if desired. 761 762 Level: intermediate 763 764 .keywords: PC, ASM, set, overlap 765 766 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(), 767 PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains() 768 @*/ 769 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap(PC pc,PetscInt ovl) 770 { 771 PetscErrorCode ierr,(*f)(PC,PetscInt); 772 773 PetscFunctionBegin; 774 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 775 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetOverlap_C",(void (**)(void))&f);CHKERRQ(ierr); 776 if (f) { 777 ierr = (*f)(pc,ovl);CHKERRQ(ierr); 778 } 779 PetscFunctionReturn(0); 780 } 781 782 #undef __FUNCT__ 783 #define __FUNCT__ "PCASMSetType" 784 /*@ 785 PCASMSetType - Sets the type of restriction and interpolation used 786 for local problems in the additive Schwarz method. 787 788 Collective on PC 789 790 Input Parameters: 791 + pc - the preconditioner context 792 - type - variant of ASM, one of 793 .vb 794 PC_ASM_BASIC - full interpolation and restriction 795 PC_ASM_RESTRICT - full restriction, local processor interpolation 796 PC_ASM_INTERPOLATE - full interpolation, local processor restriction 797 PC_ASM_NONE - local processor restriction and interpolation 798 .ve 799 800 Options Database Key: 801 . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type 802 803 Level: intermediate 804 805 .keywords: PC, ASM, set, type 806 807 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(), 808 PCASMCreateSubdomains2D() 809 @*/ 810 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType(PC pc,PCASMType type) 811 { 812 PetscErrorCode ierr,(*f)(PC,PCASMType); 813 814 PetscFunctionBegin; 815 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 816 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetType_C",(void (**)(void))&f);CHKERRQ(ierr); 817 if (f) { 818 ierr = (*f)(pc,type);CHKERRQ(ierr); 819 } 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "PCASMGetSubKSP" 825 /*@C 826 PCASMGetSubKSP - Gets the local KSP contexts for all blocks on 827 this processor. 828 829 Collective on PC iff first_local is requested 830 831 Input Parameter: 832 . pc - the preconditioner context 833 834 Output Parameters: 835 + n_local - the number of blocks on this processor or PETSC_NULL 836 . first_local - the global number of the first block on this processor or PETSC_NULL, 837 all processors must request or all must pass PETSC_NULL 838 - ksp - the array of KSP contexts 839 840 Note: 841 After PCASMGetSubKSP() the array of KSPes is not to be freed 842 843 Currently for some matrix implementations only 1 block per processor 844 is supported. 845 846 You must call KSPSetUp() before calling PCASMGetSubKSP(). 847 848 Level: advanced 849 850 .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context 851 852 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(), 853 PCASMCreateSubdomains2D(), 854 @*/ 855 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[]) 856 { 857 PetscErrorCode ierr,(*f)(PC,PetscInt*,PetscInt*,KSP **); 858 859 PetscFunctionBegin; 860 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 861 ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMGetSubKSP_C",(void (**)(void))&f);CHKERRQ(ierr); 862 if (f) { 863 ierr = (*f)(pc,n_local,first_local,ksp);CHKERRQ(ierr); 864 } else { 865 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot get subksp for this type of PC"); 866 } 867 868 PetscFunctionReturn(0); 869 } 870 871 /* -------------------------------------------------------------------------------------*/ 872 /*MC 873 PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 874 its own KSP object. 875 876 Options Database Keys: 877 + -pc_asm_truelocal - Activates PCASMSetUseTrueLocal() 878 . -pc_asm_blocks <blks> - Sets total blocks 879 . -pc_asm_overlap <ovl> - Sets overlap 880 - -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type 881 882 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 883 will get a different convergence rate due to the default option of -pc_asm_type restrict. Use 884 -pc_asm_type basic to use the standard ASM. 885 886 Notes: Each processor can have one or more blocks, but a block cannot be shared by more 887 than one processor. Defaults to one block per processor. 888 889 To set options on the solvers for each block append -sub_ to all the KSP, and PC 890 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 891 892 To set the options on the solvers separate for each block call PCASMGetSubKSP() 893 and set the options directly on the resulting KSP object (you can access its PC 894 with KSPGetPC()) 895 896 897 Level: beginner 898 899 Concepts: additive Schwarz method 900 901 References: 902 An additive variant of the Schwarz alternating method for the case of many subregions 903 M Dryja, OB Widlund - Courant Institute, New York University Technical report 904 905 Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 906 Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X. 907 908 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 909 PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(), 910 PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType() 911 912 M*/ 913 914 EXTERN_C_BEGIN 915 #undef __FUNCT__ 916 #define __FUNCT__ "PCCreate_ASM" 917 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_ASM(PC pc) 918 { 919 PetscErrorCode ierr; 920 PC_ASM *osm; 921 922 PetscFunctionBegin; 923 ierr = PetscNewLog(pc,PC_ASM,&osm);CHKERRQ(ierr); 924 osm->n = PETSC_DECIDE; 925 osm->n_local = 0; 926 osm->n_local_true = 0; 927 osm->overlap = 1; 928 osm->ksp = 0; 929 osm->restriction = 0; 930 osm->localization = 0; 931 osm->prolongation = 0; 932 osm->x = 0; 933 osm->y = 0; 934 osm->y_local = 0; 935 osm->is = 0; 936 osm->is_local = 0; 937 osm->mat = 0; 938 osm->pmat = 0; 939 osm->type = PC_ASM_RESTRICT; 940 osm->same_local_solves = PETSC_TRUE; 941 942 pc->data = (void*)osm; 943 pc->ops->apply = PCApply_ASM; 944 pc->ops->applytranspose = PCApplyTranspose_ASM; 945 pc->ops->setup = PCSetUp_ASM; 946 pc->ops->destroy = PCDestroy_ASM; 947 pc->ops->setfromoptions = PCSetFromOptions_ASM; 948 pc->ops->setuponblocks = PCSetUpOnBlocks_ASM; 949 pc->ops->view = PCView_ASM; 950 pc->ops->applyrichardson = 0; 951 952 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM", 953 PCASMSetLocalSubdomains_ASM);CHKERRQ(ierr); 954 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM", 955 PCASMSetTotalSubdomains_ASM);CHKERRQ(ierr); 956 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM", 957 PCASMSetOverlap_ASM);CHKERRQ(ierr); 958 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM", 959 PCASMSetType_ASM);CHKERRQ(ierr); 960 ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM", 961 PCASMGetSubKSP_ASM);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 EXTERN_C_END 965 966 967 #undef __FUNCT__ 968 #define __FUNCT__ "PCASMCreateSubdomains" 969 /*@C 970 PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 971 preconditioner for a any problem on a general grid. 972 973 Collective 974 975 Input Parameters: 976 + A - The global matrix operator 977 - n - the number of local blocks 978 979 Output Parameters: 980 . outis - the array of index sets defining the subdomains 981 982 Level: advanced 983 984 Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap 985 from these if you use PCASMSetLocalSubdomains() 986 987 In the Fortran version you must provide the array outis[] already allocated of length n. 988 989 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid 990 991 .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains() 992 @*/ 993 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[]) 994 { 995 MatPartitioning mpart; 996 const char *prefix; 997 PetscErrorCode (*f)(Mat,PetscTruth*,MatReuse,Mat*); 998 PetscMPIInt size; 999 PetscInt i,j,rstart,rend,bs; 1000 PetscTruth iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE; 1001 Mat Ad = PETSC_NULL, adj; 1002 IS ispart,isnumb,*is; 1003 PetscErrorCode ierr; 1004 1005 PetscFunctionBegin; 1006 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 1007 PetscValidPointer(outis,3); 1008 if (n < 1) {SETERRQ1(PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);} 1009 1010 /* Get prefix, row distribution, and block size */ 1011 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr); 1012 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1013 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 1014 if (rstart/bs*bs != rstart || rend/bs*bs != rend) { 1015 SETERRQ3(PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs); 1016 } 1017 /* Get diagonal block from matrix if possible */ 1018 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1019 ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr); 1020 if (f) { 1021 ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr); 1022 } else if (size == 1) { 1023 iscopy = PETSC_FALSE; Ad = A; 1024 } else { 1025 iscopy = PETSC_FALSE; Ad = PETSC_NULL; 1026 } 1027 if (Ad) { 1028 ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 1029 if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);} 1030 } 1031 if (Ad && n > 1) { 1032 PetscTruth match,done; 1033 /* Try to setup a good matrix partitioning if available */ 1034 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr); 1035 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr); 1036 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr); 1037 ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_CURRENT,&match);CHKERRQ(ierr); 1038 if (!match) { 1039 ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_SQUARE,&match);CHKERRQ(ierr); 1040 } 1041 if (!match) { /* assume a "good" partitioner is available */ 1042 PetscInt na,*ia,*ja; 1043 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1044 if (done) { 1045 /* Build adjacency matrix by hand. Unfortunately a call to 1046 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1047 remove the block-aij structure and we cannot expect 1048 MatPartitioning to split vertices as we need */ 1049 PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0; 1050 nnz = 0; 1051 for (i=0; i<na; i++) { /* count number of nonzeros */ 1052 len = ia[i+1] - ia[i]; 1053 row = ja + ia[i]; 1054 for (j=0; j<len; j++) { 1055 if (row[j] == i) { /* don't count diagonal */ 1056 len--; break; 1057 } 1058 } 1059 nnz += len; 1060 } 1061 ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr); 1062 ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr); 1063 nnz = 0; 1064 iia[0] = 0; 1065 for (i=0; i<na; i++) { /* fill adjacency */ 1066 cnt = 0; 1067 len = ia[i+1] - ia[i]; 1068 row = ja + ia[i]; 1069 for (j=0; j<len; j++) { 1070 if (row[j] != i) { /* if not diagonal */ 1071 jja[nnz+cnt++] = row[j]; 1072 } 1073 } 1074 nnz += cnt; 1075 iia[i+1] = nnz; 1076 } 1077 /* Partitioning of the adjacency matrix */ 1078 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr); 1079 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr); 1080 ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr); 1081 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr); 1082 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr); 1083 ierr = MatDestroy(adj);CHKERRQ(ierr); 1084 foundpart = PETSC_TRUE; 1085 } 1086 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr); 1087 } 1088 ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr); 1089 } 1090 if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);} 1091 1092 ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr); 1093 *outis = is; 1094 1095 if (!foundpart) { 1096 1097 /* Partitioning by contiguous chunks of rows */ 1098 1099 PetscInt mbs = (rend-rstart)/bs; 1100 PetscInt start = rstart; 1101 for (i=0; i<n; i++) { 1102 PetscInt count = (mbs/n + ((mbs % n) > i)) * bs; 1103 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr); 1104 start += count; 1105 } 1106 1107 } else { 1108 1109 /* Partitioning by adjacency of diagonal block */ 1110 1111 const PetscInt *numbering; 1112 PetscInt *count,nidx,*indices,*newidx,start=0; 1113 /* Get node count in each partition */ 1114 ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr); 1115 ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr); 1116 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1117 for (i=0; i<n; i++) count[i] *= bs; 1118 } 1119 /* Build indices from node numbering */ 1120 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr); 1121 ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr); 1122 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */ 1123 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr); 1124 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr); 1125 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr); 1126 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1127 ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr); 1128 for (i=0; i<nidx; i++) 1129 for (j=0; j<bs; j++) 1130 newidx[i*bs+j] = indices[i]*bs + j; 1131 ierr = PetscFree(indices);CHKERRQ(ierr); 1132 nidx *= bs; 1133 indices = newidx; 1134 } 1135 /* Shift to get global indices */ 1136 for (i=0; i<nidx; i++) indices[i] += rstart; 1137 1138 /* Build the index sets for each block */ 1139 for (i=0; i<n; i++) { 1140 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],&is[i]);CHKERRQ(ierr); 1141 ierr = ISSort(is[i]);CHKERRQ(ierr); 1142 start += count[i]; 1143 } 1144 1145 ierr = PetscFree(count); 1146 ierr = PetscFree(indices); 1147 ierr = ISDestroy(isnumb);CHKERRQ(ierr); 1148 ierr = ISDestroy(ispart);CHKERRQ(ierr); 1149 1150 } 1151 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "PCASMDestroySubdomains" 1157 /*@C 1158 PCASMDestroySubdomains - Destroys the index sets created with 1159 PCASMCreateSubdomains(). Should be called after setting subdomains 1160 with PCASMSetLocalSubdomains(). 1161 1162 Collective 1163 1164 Input Parameters: 1165 + n - the number of index sets 1166 . is - the array of index sets 1167 - is_local - the array of local index sets, can be PETSC_NULL 1168 1169 Level: advanced 1170 1171 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid 1172 1173 .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains() 1174 @*/ 1175 PetscErrorCode PETSCKSP_DLLEXPORT PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) 1176 { 1177 PetscInt i; 1178 PetscErrorCode ierr; 1179 PetscFunctionBegin; 1180 if (n <= 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n); 1181 PetscValidPointer(is,2); 1182 for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); } 1183 ierr = PetscFree(is);CHKERRQ(ierr); 1184 if (is_local) { 1185 PetscValidPointer(is_local,3); 1186 for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); } 1187 ierr = PetscFree(is_local);CHKERRQ(ierr); 1188 } 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "PCASMCreateSubdomains2D" 1194 /*@ 1195 PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1196 preconditioner for a two-dimensional problem on a regular grid. 1197 1198 Not Collective 1199 1200 Input Parameters: 1201 + m, n - the number of mesh points in the x and y directions 1202 . M, N - the number of subdomains in the x and y directions 1203 . dof - degrees of freedom per node 1204 - overlap - overlap in mesh lines 1205 1206 Output Parameters: 1207 + Nsub - the number of subdomains created 1208 - is - the array of index sets defining the subdomains 1209 1210 Note: 1211 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1212 preconditioners. More general related routines are 1213 PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains(). 1214 1215 Level: advanced 1216 1217 .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid 1218 1219 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(), 1220 PCASMSetOverlap() 1221 @*/ 1222 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is) 1223 { 1224 PetscInt i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outter; 1225 PetscErrorCode ierr; 1226 PetscInt nidx,*idx,loc,ii,jj,count; 1227 1228 PetscFunctionBegin; 1229 if (dof != 1) SETERRQ(PETSC_ERR_SUP," "); 1230 1231 *Nsub = N*M; 1232 ierr = PetscMalloc((*Nsub)*sizeof(IS *),is);CHKERRQ(ierr); 1233 ystart = 0; 1234 loc_outter = 0; 1235 for (i=0; i<N; i++) { 1236 height = n/N + ((n % N) > i); /* height of subdomain */ 1237 if (height < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n"); 1238 yleft = ystart - overlap; if (yleft < 0) yleft = 0; 1239 yright = ystart + height + overlap; if (yright > n) yright = n; 1240 xstart = 0; 1241 for (j=0; j<M; j++) { 1242 width = m/M + ((m % M) > j); /* width of subdomain */ 1243 if (width < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m"); 1244 xleft = xstart - overlap; if (xleft < 0) xleft = 0; 1245 xright = xstart + width + overlap; if (xright > m) xright = m; 1246 nidx = (xright - xleft)*(yright - yleft); 1247 ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr); 1248 loc = 0; 1249 for (ii=yleft; ii<yright; ii++) { 1250 count = m*ii + xleft; 1251 for (jj=xleft; jj<xright; jj++) { 1252 idx[loc++] = count++; 1253 } 1254 } 1255 ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,(*is)+loc_outter++);CHKERRQ(ierr); 1256 ierr = PetscFree(idx);CHKERRQ(ierr); 1257 xstart += width; 1258 } 1259 ystart += height; 1260 } 1261 for (i=0; i<*Nsub; i++) { ierr = ISSort((*is)[i]);CHKERRQ(ierr); } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "PCASMGetLocalSubdomains" 1267 /*@C 1268 PCASMGetLocalSubdomains - Gets the local subdomains (for this processor 1269 only) for the additive Schwarz preconditioner. 1270 1271 Collective on PC 1272 1273 Input Parameter: 1274 . pc - the preconditioner context 1275 1276 Output Parameters: 1277 + n - the number of subdomains for this processor (default value = 1) 1278 . is - the index sets that define the subdomains for this processor 1279 - is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL) 1280 1281 1282 Notes: 1283 The IS numbering is in the parallel, global numbering of the vector. 1284 1285 Level: advanced 1286 1287 .keywords: PC, ASM, set, local, subdomains, additive Schwarz 1288 1289 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 1290 PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices() 1291 @*/ 1292 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[]) 1293 { 1294 PC_ASM *osm; 1295 PetscErrorCode ierr; 1296 PetscTruth match; 1297 1298 PetscFunctionBegin; 1299 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 1300 PetscValidIntPointer(n,2); 1301 if (is) PetscValidPointer(is,3); 1302 ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr); 1303 if (!match) { 1304 if (n) *n = 0; 1305 if (is) *is = PETSC_NULL; 1306 } else { 1307 osm = (PC_ASM*)pc->data; 1308 if (n) *n = osm->n_local_true; 1309 if (is) *is = osm->is; 1310 if (is_local) *is_local = osm->is_local; 1311 } 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "PCASMGetLocalSubmatrices" 1317 /*@C 1318 PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1319 only) for the additive Schwarz preconditioner. 1320 1321 Collective on PC 1322 1323 Input Parameter: 1324 . pc - the preconditioner context 1325 1326 Output Parameters: 1327 + n - the number of matrices for this processor (default value = 1) 1328 - mat - the matrices 1329 1330 1331 Level: advanced 1332 1333 .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi 1334 1335 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(), 1336 PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains() 1337 @*/ 1338 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[]) 1339 { 1340 PC_ASM *osm; 1341 PetscErrorCode ierr; 1342 PetscTruth match; 1343 1344 PetscFunctionBegin; 1345 PetscValidHeaderSpecific(pc,PC_COOKIE,1); 1346 PetscValidIntPointer(n,2); 1347 if (mat) PetscValidPointer(mat,3); 1348 if (!pc->setupcalled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp()."); 1349 ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr); 1350 if (!match) { 1351 if (n) *n = 0; 1352 if (mat) *mat = PETSC_NULL; 1353 } else { 1354 osm = (PC_ASM*)pc->data; 1355 if (n) *n = osm->n_local_true; 1356 if (mat) *mat = osm->pmat; 1357 } 1358 PetscFunctionReturn(0); 1359 } 1360