1 /* 2 This file defines an additive Schwarz preconditioner for any Mat implementation. 3 4 Note that each processor may have any number of subdomains. But in order to 5 deal easily with the VecScatter(), we treat each processor as if it has the 6 same number of subdomains. 7 8 n - total number of true subdomains on all processors 9 n_local_true - actual number of subdomains on this processor 10 n_local = maximum over all processors of n_local_true 11 */ 12 13 #include <petsc/private/pcasmimpl.h> /*I "petscpc.h" I*/ 14 #include "petsc/private/matimpl.h" 15 16 static PetscErrorCode PCView_ASM(PC pc, PetscViewer viewer) { 17 PC_ASM *osm = (PC_ASM *)pc->data; 18 PetscMPIInt rank; 19 PetscInt i, bsz; 20 PetscBool iascii, isstring; 21 PetscViewer sviewer; 22 PetscViewerFormat format; 23 const char *prefix; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 27 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 28 if (iascii) { 29 char overlaps[256] = "user-defined overlap", blocks[256] = "total subdomain blocks not yet set"; 30 if (osm->overlap >= 0) PetscCall(PetscSNPrintf(overlaps, sizeof(overlaps), "amount of overlap = %" PetscInt_FMT, osm->overlap)); 31 if (osm->n > 0) PetscCall(PetscSNPrintf(blocks, sizeof(blocks), "total subdomain blocks = %" PetscInt_FMT, osm->n)); 32 PetscCall(PetscViewerASCIIPrintf(viewer, " %s, %s\n", blocks, overlaps)); 33 PetscCall(PetscViewerASCIIPrintf(viewer, " restriction/interpolation type - %s\n", PCASMTypes[osm->type])); 34 if (osm->dm_subdomains) PetscCall(PetscViewerASCIIPrintf(viewer, " Additive Schwarz: using DM to define subdomains\n")); 35 if (osm->loctype != PC_COMPOSITE_ADDITIVE) PetscCall(PetscViewerASCIIPrintf(viewer, " Additive Schwarz: local solve composition type - %s\n", PCCompositeTypes[osm->loctype])); 36 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 37 PetscCall(PetscViewerGetFormat(viewer, &format)); 38 if (format != PETSC_VIEWER_ASCII_INFO_DETAIL) { 39 if (osm->ksp) { 40 PetscCall(PetscViewerASCIIPrintf(viewer, " Local solver information for first block is in the following KSP and PC objects on rank 0:\n")); 41 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 42 PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all blocks\n", prefix ? prefix : "")); 43 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 44 if (rank == 0) { 45 PetscCall(PetscViewerASCIIPushTab(viewer)); 46 PetscCall(KSPView(osm->ksp[0], sviewer)); 47 PetscCall(PetscViewerASCIIPopTab(viewer)); 48 } 49 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 50 } 51 } else { 52 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 53 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] number of local blocks = %" PetscInt_FMT "\n", (int)rank, osm->n_local_true)); 54 PetscCall(PetscViewerFlush(viewer)); 55 PetscCall(PetscViewerASCIIPrintf(viewer, " Local solver information for each block is in the following KSP and PC objects:\n")); 56 PetscCall(PetscViewerASCIIPushTab(viewer)); 57 PetscCall(PetscViewerASCIIPrintf(viewer, "- - - - - - - - - - - - - - - - - -\n")); 58 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 59 for (i = 0; i < osm->n_local_true; i++) { 60 PetscCall(ISGetLocalSize(osm->is[i], &bsz)); 61 PetscCall(PetscViewerASCIISynchronizedPrintf(sviewer, "[%d] local block number %" PetscInt_FMT ", size = %" PetscInt_FMT "\n", (int)rank, i, bsz)); 62 PetscCall(KSPView(osm->ksp[i], sviewer)); 63 PetscCall(PetscViewerASCIISynchronizedPrintf(sviewer, "- - - - - - - - - - - - - - - - - -\n")); 64 } 65 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 66 PetscCall(PetscViewerASCIIPopTab(viewer)); 67 PetscCall(PetscViewerFlush(viewer)); 68 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 69 } 70 } else if (isstring) { 71 PetscCall(PetscViewerStringSPrintf(viewer, " blocks=%" PetscInt_FMT ", overlap=%" PetscInt_FMT ", type=%s", osm->n, osm->overlap, PCASMTypes[osm->type])); 72 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 73 if (osm->ksp) PetscCall(KSPView(osm->ksp[0], sviewer)); 74 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 static PetscErrorCode PCASMPrintSubdomains(PC pc) { 80 PC_ASM *osm = (PC_ASM *)pc->data; 81 const char *prefix; 82 char fname[PETSC_MAX_PATH_LEN + 1]; 83 PetscViewer viewer, sviewer; 84 char *s; 85 PetscInt i, j, nidx; 86 const PetscInt *idx; 87 PetscMPIInt rank, size; 88 89 PetscFunctionBegin; 90 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 91 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 92 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 93 PetscCall(PetscOptionsGetString(NULL, prefix, "-pc_asm_print_subdomains", fname, sizeof(fname), NULL)); 94 if (fname[0] == 0) PetscCall(PetscStrcpy(fname, "stdout")); 95 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc), fname, &viewer)); 96 for (i = 0; i < osm->n_local; i++) { 97 if (i < osm->n_local_true) { 98 PetscCall(ISGetLocalSize(osm->is[i], &nidx)); 99 PetscCall(ISGetIndices(osm->is[i], &idx)); 100 /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/ 101 #define len 16 * (nidx + 1) + 512 102 PetscCall(PetscMalloc1(len, &s)); 103 PetscCall(PetscViewerStringOpen(PETSC_COMM_SELF, s, len, &sviewer)); 104 #undef len 105 PetscCall(PetscViewerStringSPrintf(sviewer, "[%d:%d] Subdomain %" PetscInt_FMT " with overlap:\n", rank, size, i)); 106 for (j = 0; j < nidx; j++) PetscCall(PetscViewerStringSPrintf(sviewer, "%" PetscInt_FMT " ", idx[j])); 107 PetscCall(ISRestoreIndices(osm->is[i], &idx)); 108 PetscCall(PetscViewerStringSPrintf(sviewer, "\n")); 109 PetscCall(PetscViewerDestroy(&sviewer)); 110 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 111 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s", s)); 112 PetscCall(PetscViewerFlush(viewer)); 113 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 114 PetscCall(PetscFree(s)); 115 if (osm->is_local) { 116 /* Print to a string viewer; no more than 15 characters per index plus 512 char for the header.*/ 117 #define len 16 * (nidx + 1) + 512 118 PetscCall(PetscMalloc1(len, &s)); 119 PetscCall(PetscViewerStringOpen(PETSC_COMM_SELF, s, len, &sviewer)); 120 #undef len 121 PetscCall(PetscViewerStringSPrintf(sviewer, "[%d:%d] Subdomain %" PetscInt_FMT " without overlap:\n", rank, size, i)); 122 PetscCall(ISGetLocalSize(osm->is_local[i], &nidx)); 123 PetscCall(ISGetIndices(osm->is_local[i], &idx)); 124 for (j = 0; j < nidx; j++) PetscCall(PetscViewerStringSPrintf(sviewer, "%" PetscInt_FMT " ", idx[j])); 125 PetscCall(ISRestoreIndices(osm->is_local[i], &idx)); 126 PetscCall(PetscViewerStringSPrintf(sviewer, "\n")); 127 PetscCall(PetscViewerDestroy(&sviewer)); 128 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 129 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s", s)); 130 PetscCall(PetscViewerFlush(viewer)); 131 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 132 PetscCall(PetscFree(s)); 133 } 134 } else { 135 /* Participate in collective viewer calls. */ 136 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 137 PetscCall(PetscViewerFlush(viewer)); 138 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 139 /* Assume either all ranks have is_local or none do. */ 140 if (osm->is_local) { 141 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 142 PetscCall(PetscViewerFlush(viewer)); 143 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 144 } 145 } 146 } 147 PetscCall(PetscViewerFlush(viewer)); 148 PetscCall(PetscViewerDestroy(&viewer)); 149 PetscFunctionReturn(0); 150 } 151 152 static PetscErrorCode PCSetUp_ASM(PC pc) { 153 PC_ASM *osm = (PC_ASM *)pc->data; 154 PetscBool flg; 155 PetscInt i, m, m_local; 156 MatReuse scall = MAT_REUSE_MATRIX; 157 IS isl; 158 KSP ksp; 159 PC subpc; 160 const char *prefix, *pprefix; 161 Vec vec; 162 DM *domain_dm = NULL; 163 164 PetscFunctionBegin; 165 if (!pc->setupcalled) { 166 PetscInt m; 167 168 /* Note: if subdomains have been set either via PCASMSetTotalSubdomains() or via PCASMSetLocalSubdomains(), osm->n_local_true will not be PETSC_DECIDE */ 169 if (osm->n_local_true == PETSC_DECIDE) { 170 /* no subdomains given */ 171 /* try pc->dm first, if allowed */ 172 if (osm->dm_subdomains && pc->dm) { 173 PetscInt num_domains, d; 174 char **domain_names; 175 IS *inner_domain_is, *outer_domain_is; 176 PetscCall(DMCreateDomainDecomposition(pc->dm, &num_domains, &domain_names, &inner_domain_is, &outer_domain_is, &domain_dm)); 177 osm->overlap = -1; /* We do not want to increase the overlap of the IS. 178 A future improvement of this code might allow one to use 179 DM-defined subdomains and also increase the overlap, 180 but that is not currently supported */ 181 if (num_domains) PetscCall(PCASMSetLocalSubdomains(pc, num_domains, outer_domain_is, inner_domain_is)); 182 for (d = 0; d < num_domains; ++d) { 183 if (domain_names) PetscCall(PetscFree(domain_names[d])); 184 if (inner_domain_is) PetscCall(ISDestroy(&inner_domain_is[d])); 185 if (outer_domain_is) PetscCall(ISDestroy(&outer_domain_is[d])); 186 } 187 PetscCall(PetscFree(domain_names)); 188 PetscCall(PetscFree(inner_domain_is)); 189 PetscCall(PetscFree(outer_domain_is)); 190 } 191 if (osm->n_local_true == PETSC_DECIDE) { 192 /* still no subdomains; use one subdomain per processor */ 193 osm->n_local_true = 1; 194 } 195 } 196 { /* determine the global and max number of subdomains */ 197 struct { 198 PetscInt max, sum; 199 } inwork, outwork; 200 PetscMPIInt size; 201 202 inwork.max = osm->n_local_true; 203 inwork.sum = osm->n_local_true; 204 PetscCall(MPIU_Allreduce(&inwork, &outwork, 1, MPIU_2INT, MPIU_MAXSUM_OP, PetscObjectComm((PetscObject)pc))); 205 osm->n_local = outwork.max; 206 osm->n = outwork.sum; 207 208 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 209 if (outwork.max == 1 && outwork.sum == size) { 210 /* osm->n_local_true = 1 on all processes, set this option may enable use of optimized MatCreateSubMatrices() implementation */ 211 PetscCall(MatSetOption(pc->pmat, MAT_SUBMAT_SINGLEIS, PETSC_TRUE)); 212 } 213 } 214 if (!osm->is) { /* create the index sets */ 215 PetscCall(PCASMCreateSubdomains(pc->pmat, osm->n_local_true, &osm->is)); 216 } 217 if (osm->n_local_true > 1 && !osm->is_local) { 218 PetscCall(PetscMalloc1(osm->n_local_true, &osm->is_local)); 219 for (i = 0; i < osm->n_local_true; i++) { 220 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 221 PetscCall(ISDuplicate(osm->is[i], &osm->is_local[i])); 222 PetscCall(ISCopy(osm->is[i], osm->is_local[i])); 223 } else { 224 PetscCall(PetscObjectReference((PetscObject)osm->is[i])); 225 osm->is_local[i] = osm->is[i]; 226 } 227 } 228 } 229 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 230 if (osm->overlap > 0) { 231 /* Extend the "overlapping" regions by a number of steps */ 232 PetscCall(MatIncreaseOverlap(pc->pmat, osm->n_local_true, osm->is, osm->overlap)); 233 } 234 if (osm->sort_indices) { 235 for (i = 0; i < osm->n_local_true; i++) { 236 PetscCall(ISSort(osm->is[i])); 237 if (osm->is_local) PetscCall(ISSort(osm->is_local[i])); 238 } 239 } 240 flg = PETSC_FALSE; 241 PetscCall(PetscOptionsHasName(NULL, prefix, "-pc_asm_print_subdomains", &flg)); 242 if (flg) PetscCall(PCASMPrintSubdomains(pc)); 243 if (!osm->ksp) { 244 /* Create the local solvers */ 245 PetscCall(PetscMalloc1(osm->n_local_true, &osm->ksp)); 246 if (domain_dm) PetscCall(PetscInfo(pc, "Setting up ASM subproblems using the embedded DM\n")); 247 for (i = 0; i < osm->n_local_true; i++) { 248 PetscCall(KSPCreate(PETSC_COMM_SELF, &ksp)); 249 PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure)); 250 PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)ksp)); 251 PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1)); 252 PetscCall(KSPSetType(ksp, KSPPREONLY)); 253 PetscCall(KSPGetPC(ksp, &subpc)); 254 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 255 PetscCall(KSPSetOptionsPrefix(ksp, prefix)); 256 PetscCall(KSPAppendOptionsPrefix(ksp, "sub_")); 257 if (domain_dm) { 258 PetscCall(KSPSetDM(ksp, domain_dm[i])); 259 PetscCall(KSPSetDMActive(ksp, PETSC_FALSE)); 260 PetscCall(DMDestroy(&domain_dm[i])); 261 } 262 osm->ksp[i] = ksp; 263 } 264 if (domain_dm) PetscCall(PetscFree(domain_dm)); 265 } 266 267 PetscCall(ISConcatenate(PETSC_COMM_SELF, osm->n_local_true, osm->is, &osm->lis)); 268 PetscCall(ISSortRemoveDups(osm->lis)); 269 PetscCall(ISGetLocalSize(osm->lis, &m)); 270 271 scall = MAT_INITIAL_MATRIX; 272 } else { 273 /* 274 Destroy the blocks from the previous iteration 275 */ 276 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 277 PetscCall(MatDestroyMatrices(osm->n_local_true, &osm->pmat)); 278 scall = MAT_INITIAL_MATRIX; 279 } 280 } 281 282 /* Destroy previous submatrices of a different type than pc->pmat since MAT_REUSE_MATRIX won't work in that case */ 283 if (scall == MAT_REUSE_MATRIX && osm->sub_mat_type) { 284 if (osm->n_local_true > 0) PetscCall(MatDestroySubMatrices(osm->n_local_true, &osm->pmat)); 285 scall = MAT_INITIAL_MATRIX; 286 } 287 288 /* 289 Extract out the submatrices 290 */ 291 PetscCall(MatCreateSubMatrices(pc->pmat, osm->n_local_true, osm->is, osm->is, scall, &osm->pmat)); 292 if (scall == MAT_INITIAL_MATRIX) { 293 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)pc->pmat, &pprefix)); 294 for (i = 0; i < osm->n_local_true; i++) { 295 PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)osm->pmat[i])); 296 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i], pprefix)); 297 } 298 } 299 300 /* Convert the types of the submatrices (if needbe) */ 301 if (osm->sub_mat_type) { 302 for (i = 0; i < osm->n_local_true; i++) PetscCall(MatConvert(osm->pmat[i], osm->sub_mat_type, MAT_INPLACE_MATRIX, &(osm->pmat[i]))); 303 } 304 305 if (!pc->setupcalled) { 306 VecType vtype; 307 308 /* Create the local work vectors (from the local matrices) and scatter contexts */ 309 PetscCall(MatCreateVecs(pc->pmat, &vec, NULL)); 310 311 PetscCheck(!osm->is_local || (osm->type != PC_ASM_INTERPOLATE && osm->type != PC_ASM_NONE), PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot use interpolate or none PCASMType if is_local was provided to PCASMSetLocalSubdomains()"); 312 if (osm->is_local && osm->type == PC_ASM_RESTRICT && osm->loctype == PC_COMPOSITE_ADDITIVE) PetscCall(PetscMalloc1(osm->n_local_true, &osm->lprolongation)); 313 PetscCall(PetscMalloc1(osm->n_local_true, &osm->lrestriction)); 314 PetscCall(PetscMalloc1(osm->n_local_true, &osm->x)); 315 PetscCall(PetscMalloc1(osm->n_local_true, &osm->y)); 316 317 PetscCall(ISGetLocalSize(osm->lis, &m)); 318 PetscCall(ISCreateStride(PETSC_COMM_SELF, m, 0, 1, &isl)); 319 PetscCall(MatGetVecType(osm->pmat[0], &vtype)); 320 PetscCall(VecCreate(PETSC_COMM_SELF, &osm->lx)); 321 PetscCall(VecSetSizes(osm->lx, m, m)); 322 PetscCall(VecSetType(osm->lx, vtype)); 323 PetscCall(VecDuplicate(osm->lx, &osm->ly)); 324 PetscCall(VecScatterCreate(vec, osm->lis, osm->lx, isl, &osm->restriction)); 325 PetscCall(ISDestroy(&isl)); 326 327 for (i = 0; i < osm->n_local_true; ++i) { 328 ISLocalToGlobalMapping ltog; 329 IS isll; 330 const PetscInt *idx_is; 331 PetscInt *idx_lis, nout; 332 333 PetscCall(ISGetLocalSize(osm->is[i], &m)); 334 PetscCall(MatCreateVecs(osm->pmat[i], &osm->x[i], NULL)); 335 PetscCall(VecDuplicate(osm->x[i], &osm->y[i])); 336 337 /* generate a scatter from ly to y[i] picking all the overlapping is[i] entries */ 338 PetscCall(ISLocalToGlobalMappingCreateIS(osm->lis, <og)); 339 PetscCall(ISGetLocalSize(osm->is[i], &m)); 340 PetscCall(ISGetIndices(osm->is[i], &idx_is)); 341 PetscCall(PetscMalloc1(m, &idx_lis)); 342 PetscCall(ISGlobalToLocalMappingApply(ltog, IS_GTOLM_DROP, m, idx_is, &nout, idx_lis)); 343 PetscCheck(nout == m, PETSC_COMM_SELF, PETSC_ERR_PLIB, "is not a subset of lis"); 344 PetscCall(ISRestoreIndices(osm->is[i], &idx_is)); 345 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx_lis, PETSC_OWN_POINTER, &isll)); 346 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 347 PetscCall(ISCreateStride(PETSC_COMM_SELF, m, 0, 1, &isl)); 348 PetscCall(VecScatterCreate(osm->ly, isll, osm->y[i], isl, &osm->lrestriction[i])); 349 PetscCall(ISDestroy(&isll)); 350 PetscCall(ISDestroy(&isl)); 351 if (osm->lprolongation) { /* generate a scatter from y[i] to ly picking only the the non-overlapping is_local[i] entries */ 352 ISLocalToGlobalMapping ltog; 353 IS isll, isll_local; 354 const PetscInt *idx_local; 355 PetscInt *idx1, *idx2, nout; 356 357 PetscCall(ISGetLocalSize(osm->is_local[i], &m_local)); 358 PetscCall(ISGetIndices(osm->is_local[i], &idx_local)); 359 360 PetscCall(ISLocalToGlobalMappingCreateIS(osm->is[i], <og)); 361 PetscCall(PetscMalloc1(m_local, &idx1)); 362 PetscCall(ISGlobalToLocalMappingApply(ltog, IS_GTOLM_DROP, m_local, idx_local, &nout, idx1)); 363 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 364 PetscCheck(nout == m_local, PETSC_COMM_SELF, PETSC_ERR_PLIB, "is_local not a subset of is"); 365 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m_local, idx1, PETSC_OWN_POINTER, &isll)); 366 367 PetscCall(ISLocalToGlobalMappingCreateIS(osm->lis, <og)); 368 PetscCall(PetscMalloc1(m_local, &idx2)); 369 PetscCall(ISGlobalToLocalMappingApply(ltog, IS_GTOLM_DROP, m_local, idx_local, &nout, idx2)); 370 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 371 PetscCheck(nout == m_local, PETSC_COMM_SELF, PETSC_ERR_PLIB, "is_local not a subset of lis"); 372 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m_local, idx2, PETSC_OWN_POINTER, &isll_local)); 373 374 PetscCall(ISRestoreIndices(osm->is_local[i], &idx_local)); 375 PetscCall(VecScatterCreate(osm->y[i], isll, osm->ly, isll_local, &osm->lprolongation[i])); 376 377 PetscCall(ISDestroy(&isll)); 378 PetscCall(ISDestroy(&isll_local)); 379 } 380 } 381 PetscCall(VecDestroy(&vec)); 382 } 383 384 if (osm->loctype == PC_COMPOSITE_MULTIPLICATIVE) { 385 IS *cis; 386 PetscInt c; 387 388 PetscCall(PetscMalloc1(osm->n_local_true, &cis)); 389 for (c = 0; c < osm->n_local_true; ++c) cis[c] = osm->lis; 390 PetscCall(MatCreateSubMatrices(pc->pmat, osm->n_local_true, osm->is, cis, scall, &osm->lmats)); 391 PetscCall(PetscFree(cis)); 392 } 393 394 /* Return control to the user so that the submatrices can be modified (e.g., to apply 395 different boundary conditions for the submatrices than for the global problem) */ 396 PetscCall(PCModifySubMatrices(pc, osm->n_local_true, osm->is, osm->is, osm->pmat, pc->modifysubmatricesP)); 397 398 /* 399 Loop over subdomains putting them into local ksp 400 */ 401 PetscCall(KSPGetOptionsPrefix(osm->ksp[0], &prefix)); 402 for (i = 0; i < osm->n_local_true; i++) { 403 PetscCall(KSPSetOperators(osm->ksp[i], osm->pmat[i], osm->pmat[i])); 404 PetscCall(MatSetOptionsPrefix(osm->pmat[i], prefix)); 405 if (!pc->setupcalled) PetscCall(KSPSetFromOptions(osm->ksp[i])); 406 } 407 PetscFunctionReturn(0); 408 } 409 410 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc) { 411 PC_ASM *osm = (PC_ASM *)pc->data; 412 PetscInt i; 413 KSPConvergedReason reason; 414 415 PetscFunctionBegin; 416 for (i = 0; i < osm->n_local_true; i++) { 417 PetscCall(KSPSetUp(osm->ksp[i])); 418 PetscCall(KSPGetConvergedReason(osm->ksp[i], &reason)); 419 if (reason == KSP_DIVERGED_PC_FAILED) pc->failedreason = PC_SUBPC_ERROR; 420 } 421 PetscFunctionReturn(0); 422 } 423 424 static PetscErrorCode PCApply_ASM(PC pc, Vec x, Vec y) { 425 PC_ASM *osm = (PC_ASM *)pc->data; 426 PetscInt i, n_local_true = osm->n_local_true; 427 ScatterMode forward = SCATTER_FORWARD, reverse = SCATTER_REVERSE; 428 429 PetscFunctionBegin; 430 /* 431 support for limiting the restriction or interpolation to only local 432 subdomain values (leaving the other values 0). 433 */ 434 if (!(osm->type & PC_ASM_RESTRICT)) { 435 forward = SCATTER_FORWARD_LOCAL; 436 /* have to zero the work RHS since scatter may leave some slots empty */ 437 PetscCall(VecSet(osm->lx, 0.0)); 438 } 439 if (!(osm->type & PC_ASM_INTERPOLATE)) reverse = SCATTER_REVERSE_LOCAL; 440 441 if (osm->loctype == PC_COMPOSITE_MULTIPLICATIVE || osm->loctype == PC_COMPOSITE_ADDITIVE) { 442 /* zero the global and the local solutions */ 443 PetscCall(VecSet(y, 0.0)); 444 PetscCall(VecSet(osm->ly, 0.0)); 445 446 /* copy the global RHS to local RHS including the ghost nodes */ 447 PetscCall(VecScatterBegin(osm->restriction, x, osm->lx, INSERT_VALUES, forward)); 448 PetscCall(VecScatterEnd(osm->restriction, x, osm->lx, INSERT_VALUES, forward)); 449 450 /* restrict local RHS to the overlapping 0-block RHS */ 451 PetscCall(VecScatterBegin(osm->lrestriction[0], osm->lx, osm->x[0], INSERT_VALUES, forward)); 452 PetscCall(VecScatterEnd(osm->lrestriction[0], osm->lx, osm->x[0], INSERT_VALUES, forward)); 453 454 /* do the local solves */ 455 for (i = 0; i < n_local_true; ++i) { 456 /* solve the overlapping i-block */ 457 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, osm->ksp[i], osm->x[i], osm->y[i], 0)); 458 PetscCall(KSPSolve(osm->ksp[i], osm->x[i], osm->y[i])); 459 PetscCall(KSPCheckSolve(osm->ksp[i], pc, osm->y[i])); 460 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, osm->ksp[i], osm->x[i], osm->y[i], 0)); 461 462 if (osm->lprolongation) { /* interpolate the non-overlapping i-block solution to the local solution (only for restrictive additive) */ 463 PetscCall(VecScatterBegin(osm->lprolongation[i], osm->y[i], osm->ly, ADD_VALUES, forward)); 464 PetscCall(VecScatterEnd(osm->lprolongation[i], osm->y[i], osm->ly, ADD_VALUES, forward)); 465 } else { /* interpolate the overlapping i-block solution to the local solution */ 466 PetscCall(VecScatterBegin(osm->lrestriction[i], osm->y[i], osm->ly, ADD_VALUES, reverse)); 467 PetscCall(VecScatterEnd(osm->lrestriction[i], osm->y[i], osm->ly, ADD_VALUES, reverse)); 468 } 469 470 if (i < n_local_true - 1) { 471 /* restrict local RHS to the overlapping (i+1)-block RHS */ 472 PetscCall(VecScatterBegin(osm->lrestriction[i + 1], osm->lx, osm->x[i + 1], INSERT_VALUES, forward)); 473 PetscCall(VecScatterEnd(osm->lrestriction[i + 1], osm->lx, osm->x[i + 1], INSERT_VALUES, forward)); 474 475 if (osm->loctype == PC_COMPOSITE_MULTIPLICATIVE) { 476 /* update the overlapping (i+1)-block RHS using the current local solution */ 477 PetscCall(MatMult(osm->lmats[i + 1], osm->ly, osm->y[i + 1])); 478 PetscCall(VecAXPBY(osm->x[i + 1], -1., 1., osm->y[i + 1])); 479 } 480 } 481 } 482 /* add the local solution to the global solution including the ghost nodes */ 483 PetscCall(VecScatterBegin(osm->restriction, osm->ly, y, ADD_VALUES, reverse)); 484 PetscCall(VecScatterEnd(osm->restriction, osm->ly, y, ADD_VALUES, reverse)); 485 } else SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Invalid local composition type: %s", PCCompositeTypes[osm->loctype]); 486 PetscFunctionReturn(0); 487 } 488 489 static PetscErrorCode PCMatApply_ASM(PC pc, Mat X, Mat Y) { 490 PC_ASM *osm = (PC_ASM *)pc->data; 491 Mat Z, W; 492 Vec x; 493 PetscInt i, m, N; 494 ScatterMode forward = SCATTER_FORWARD, reverse = SCATTER_REVERSE; 495 496 PetscFunctionBegin; 497 PetscCheck(osm->n_local_true <= 1, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented"); 498 /* 499 support for limiting the restriction or interpolation to only local 500 subdomain values (leaving the other values 0). 501 */ 502 if (!(osm->type & PC_ASM_RESTRICT)) { 503 forward = SCATTER_FORWARD_LOCAL; 504 /* have to zero the work RHS since scatter may leave some slots empty */ 505 PetscCall(VecSet(osm->lx, 0.0)); 506 } 507 if (!(osm->type & PC_ASM_INTERPOLATE)) reverse = SCATTER_REVERSE_LOCAL; 508 PetscCall(VecGetLocalSize(osm->x[0], &m)); 509 PetscCall(MatGetSize(X, NULL, &N)); 510 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, m, N, NULL, &Z)); 511 if (osm->loctype == PC_COMPOSITE_MULTIPLICATIVE || osm->loctype == PC_COMPOSITE_ADDITIVE) { 512 /* zero the global and the local solutions */ 513 PetscCall(MatZeroEntries(Y)); 514 PetscCall(VecSet(osm->ly, 0.0)); 515 516 for (i = 0; i < N; ++i) { 517 PetscCall(MatDenseGetColumnVecRead(X, i, &x)); 518 /* copy the global RHS to local RHS including the ghost nodes */ 519 PetscCall(VecScatterBegin(osm->restriction, x, osm->lx, INSERT_VALUES, forward)); 520 PetscCall(VecScatterEnd(osm->restriction, x, osm->lx, INSERT_VALUES, forward)); 521 PetscCall(MatDenseRestoreColumnVecRead(X, i, &x)); 522 523 PetscCall(MatDenseGetColumnVecWrite(Z, i, &x)); 524 /* restrict local RHS to the overlapping 0-block RHS */ 525 PetscCall(VecScatterBegin(osm->lrestriction[0], osm->lx, x, INSERT_VALUES, forward)); 526 PetscCall(VecScatterEnd(osm->lrestriction[0], osm->lx, x, INSERT_VALUES, forward)); 527 PetscCall(MatDenseRestoreColumnVecWrite(Z, i, &x)); 528 } 529 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, m, N, NULL, &W)); 530 /* solve the overlapping 0-block */ 531 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, osm->ksp[0], Z, W, 0)); 532 PetscCall(KSPMatSolve(osm->ksp[0], Z, W)); 533 PetscCall(KSPCheckSolve(osm->ksp[0], pc, NULL)); 534 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, osm->ksp[0], Z, W, 0)); 535 PetscCall(MatDestroy(&Z)); 536 537 for (i = 0; i < N; ++i) { 538 PetscCall(VecSet(osm->ly, 0.0)); 539 PetscCall(MatDenseGetColumnVecRead(W, i, &x)); 540 if (osm->lprolongation) { /* interpolate the non-overlapping 0-block solution to the local solution (only for restrictive additive) */ 541 PetscCall(VecScatterBegin(osm->lprolongation[0], x, osm->ly, ADD_VALUES, forward)); 542 PetscCall(VecScatterEnd(osm->lprolongation[0], x, osm->ly, ADD_VALUES, forward)); 543 } else { /* interpolate the overlapping 0-block solution to the local solution */ 544 PetscCall(VecScatterBegin(osm->lrestriction[0], x, osm->ly, ADD_VALUES, reverse)); 545 PetscCall(VecScatterEnd(osm->lrestriction[0], x, osm->ly, ADD_VALUES, reverse)); 546 } 547 PetscCall(MatDenseRestoreColumnVecRead(W, i, &x)); 548 549 PetscCall(MatDenseGetColumnVecWrite(Y, i, &x)); 550 /* add the local solution to the global solution including the ghost nodes */ 551 PetscCall(VecScatterBegin(osm->restriction, osm->ly, x, ADD_VALUES, reverse)); 552 PetscCall(VecScatterEnd(osm->restriction, osm->ly, x, ADD_VALUES, reverse)); 553 PetscCall(MatDenseRestoreColumnVecWrite(Y, i, &x)); 554 } 555 PetscCall(MatDestroy(&W)); 556 } else SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Invalid local composition type: %s", PCCompositeTypes[osm->loctype]); 557 PetscFunctionReturn(0); 558 } 559 560 static PetscErrorCode PCApplyTranspose_ASM(PC pc, Vec x, Vec y) { 561 PC_ASM *osm = (PC_ASM *)pc->data; 562 PetscInt i, n_local_true = osm->n_local_true; 563 ScatterMode forward = SCATTER_FORWARD, reverse = SCATTER_REVERSE; 564 565 PetscFunctionBegin; 566 /* 567 Support for limiting the restriction or interpolation to only local 568 subdomain values (leaving the other values 0). 569 570 Note: these are reversed from the PCApply_ASM() because we are applying the 571 transpose of the three terms 572 */ 573 574 if (!(osm->type & PC_ASM_INTERPOLATE)) { 575 forward = SCATTER_FORWARD_LOCAL; 576 /* have to zero the work RHS since scatter may leave some slots empty */ 577 PetscCall(VecSet(osm->lx, 0.0)); 578 } 579 if (!(osm->type & PC_ASM_RESTRICT)) reverse = SCATTER_REVERSE_LOCAL; 580 581 /* zero the global and the local solutions */ 582 PetscCall(VecSet(y, 0.0)); 583 PetscCall(VecSet(osm->ly, 0.0)); 584 585 /* Copy the global RHS to local RHS including the ghost nodes */ 586 PetscCall(VecScatterBegin(osm->restriction, x, osm->lx, INSERT_VALUES, forward)); 587 PetscCall(VecScatterEnd(osm->restriction, x, osm->lx, INSERT_VALUES, forward)); 588 589 /* Restrict local RHS to the overlapping 0-block RHS */ 590 PetscCall(VecScatterBegin(osm->lrestriction[0], osm->lx, osm->x[0], INSERT_VALUES, forward)); 591 PetscCall(VecScatterEnd(osm->lrestriction[0], osm->lx, osm->x[0], INSERT_VALUES, forward)); 592 593 /* do the local solves */ 594 for (i = 0; i < n_local_true; ++i) { 595 /* solve the overlapping i-block */ 596 PetscCall(PetscLogEventBegin(PC_ApplyOnBlocks, osm->ksp[i], osm->x[i], osm->y[i], 0)); 597 PetscCall(KSPSolveTranspose(osm->ksp[i], osm->x[i], osm->y[i])); 598 PetscCall(KSPCheckSolve(osm->ksp[i], pc, osm->y[i])); 599 PetscCall(PetscLogEventEnd(PC_ApplyOnBlocks, osm->ksp[i], osm->x[i], osm->y[i], 0)); 600 601 if (osm->lprolongation) { /* interpolate the non-overlapping i-block solution to the local solution */ 602 PetscCall(VecScatterBegin(osm->lprolongation[i], osm->y[i], osm->ly, ADD_VALUES, forward)); 603 PetscCall(VecScatterEnd(osm->lprolongation[i], osm->y[i], osm->ly, ADD_VALUES, forward)); 604 } else { /* interpolate the overlapping i-block solution to the local solution */ 605 PetscCall(VecScatterBegin(osm->lrestriction[i], osm->y[i], osm->ly, ADD_VALUES, reverse)); 606 PetscCall(VecScatterEnd(osm->lrestriction[i], osm->y[i], osm->ly, ADD_VALUES, reverse)); 607 } 608 609 if (i < n_local_true - 1) { 610 /* Restrict local RHS to the overlapping (i+1)-block RHS */ 611 PetscCall(VecScatterBegin(osm->lrestriction[i + 1], osm->lx, osm->x[i + 1], INSERT_VALUES, forward)); 612 PetscCall(VecScatterEnd(osm->lrestriction[i + 1], osm->lx, osm->x[i + 1], INSERT_VALUES, forward)); 613 } 614 } 615 /* Add the local solution to the global solution including the ghost nodes */ 616 PetscCall(VecScatterBegin(osm->restriction, osm->ly, y, ADD_VALUES, reverse)); 617 PetscCall(VecScatterEnd(osm->restriction, osm->ly, y, ADD_VALUES, reverse)); 618 PetscFunctionReturn(0); 619 } 620 621 static PetscErrorCode PCReset_ASM(PC pc) { 622 PC_ASM *osm = (PC_ASM *)pc->data; 623 PetscInt i; 624 625 PetscFunctionBegin; 626 if (osm->ksp) { 627 for (i = 0; i < osm->n_local_true; i++) PetscCall(KSPReset(osm->ksp[i])); 628 } 629 if (osm->pmat) { 630 if (osm->n_local_true > 0) PetscCall(MatDestroySubMatrices(osm->n_local_true, &osm->pmat)); 631 } 632 if (osm->lrestriction) { 633 PetscCall(VecScatterDestroy(&osm->restriction)); 634 for (i = 0; i < osm->n_local_true; i++) { 635 PetscCall(VecScatterDestroy(&osm->lrestriction[i])); 636 if (osm->lprolongation) PetscCall(VecScatterDestroy(&osm->lprolongation[i])); 637 PetscCall(VecDestroy(&osm->x[i])); 638 PetscCall(VecDestroy(&osm->y[i])); 639 } 640 PetscCall(PetscFree(osm->lrestriction)); 641 if (osm->lprolongation) PetscCall(PetscFree(osm->lprolongation)); 642 PetscCall(PetscFree(osm->x)); 643 PetscCall(PetscFree(osm->y)); 644 } 645 PetscCall(PCASMDestroySubdomains(osm->n_local_true, osm->is, osm->is_local)); 646 PetscCall(ISDestroy(&osm->lis)); 647 PetscCall(VecDestroy(&osm->lx)); 648 PetscCall(VecDestroy(&osm->ly)); 649 if (osm->loctype == PC_COMPOSITE_MULTIPLICATIVE) PetscCall(MatDestroyMatrices(osm->n_local_true, &osm->lmats)); 650 651 PetscCall(PetscFree(osm->sub_mat_type)); 652 653 osm->is = NULL; 654 osm->is_local = NULL; 655 PetscFunctionReturn(0); 656 } 657 658 static PetscErrorCode PCDestroy_ASM(PC pc) { 659 PC_ASM *osm = (PC_ASM *)pc->data; 660 PetscInt i; 661 662 PetscFunctionBegin; 663 PetscCall(PCReset_ASM(pc)); 664 if (osm->ksp) { 665 for (i = 0; i < osm->n_local_true; i++) PetscCall(KSPDestroy(&osm->ksp[i])); 666 PetscCall(PetscFree(osm->ksp)); 667 } 668 PetscCall(PetscFree(pc->data)); 669 670 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetLocalSubdomains_C", NULL)); 671 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetTotalSubdomains_C", NULL)); 672 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetOverlap_C", NULL)); 673 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetType_C", NULL)); 674 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMGetType_C", NULL)); 675 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetLocalType_C", NULL)); 676 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMGetLocalType_C", NULL)); 677 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetSortIndices_C", NULL)); 678 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMGetSubKSP_C", NULL)); 679 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMGetSubMatType_C", NULL)); 680 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetSubMatType_C", NULL)); 681 PetscFunctionReturn(0); 682 } 683 684 static PetscErrorCode PCSetFromOptions_ASM(PC pc, PetscOptionItems *PetscOptionsObject) { 685 PC_ASM *osm = (PC_ASM *)pc->data; 686 PetscInt blocks, ovl; 687 PetscBool flg; 688 PCASMType asmtype; 689 PCCompositeType loctype; 690 char sub_mat_type[256]; 691 692 PetscFunctionBegin; 693 PetscOptionsHeadBegin(PetscOptionsObject, "Additive Schwarz options"); 694 PetscCall(PetscOptionsBool("-pc_asm_dm_subdomains", "Use DMCreateDomainDecomposition() to define subdomains", "PCASMSetDMSubdomains", osm->dm_subdomains, &osm->dm_subdomains, &flg)); 695 PetscCall(PetscOptionsInt("-pc_asm_blocks", "Number of subdomains", "PCASMSetTotalSubdomains", osm->n, &blocks, &flg)); 696 if (flg) { 697 PetscCall(PCASMSetTotalSubdomains(pc, blocks, NULL, NULL)); 698 osm->dm_subdomains = PETSC_FALSE; 699 } 700 PetscCall(PetscOptionsInt("-pc_asm_local_blocks", "Number of local subdomains", "PCASMSetLocalSubdomains", osm->n_local_true, &blocks, &flg)); 701 if (flg) { 702 PetscCall(PCASMSetLocalSubdomains(pc, blocks, NULL, NULL)); 703 osm->dm_subdomains = PETSC_FALSE; 704 } 705 PetscCall(PetscOptionsInt("-pc_asm_overlap", "Number of grid points overlap", "PCASMSetOverlap", osm->overlap, &ovl, &flg)); 706 if (flg) { 707 PetscCall(PCASMSetOverlap(pc, ovl)); 708 osm->dm_subdomains = PETSC_FALSE; 709 } 710 flg = PETSC_FALSE; 711 PetscCall(PetscOptionsEnum("-pc_asm_type", "Type of restriction/extension", "PCASMSetType", PCASMTypes, (PetscEnum)osm->type, (PetscEnum *)&asmtype, &flg)); 712 if (flg) PetscCall(PCASMSetType(pc, asmtype)); 713 flg = PETSC_FALSE; 714 PetscCall(PetscOptionsEnum("-pc_asm_local_type", "Type of local solver composition", "PCASMSetLocalType", PCCompositeTypes, (PetscEnum)osm->loctype, (PetscEnum *)&loctype, &flg)); 715 if (flg) PetscCall(PCASMSetLocalType(pc, loctype)); 716 PetscCall(PetscOptionsFList("-pc_asm_sub_mat_type", "Subsolve Matrix Type", "PCASMSetSubMatType", MatList, NULL, sub_mat_type, 256, &flg)); 717 if (flg) PetscCall(PCASMSetSubMatType(pc, sub_mat_type)); 718 PetscOptionsHeadEnd(); 719 PetscFunctionReturn(0); 720 } 721 722 /*------------------------------------------------------------------------------------*/ 723 724 static PetscErrorCode PCASMSetLocalSubdomains_ASM(PC pc, PetscInt n, IS is[], IS is_local[]) { 725 PC_ASM *osm = (PC_ASM *)pc->data; 726 PetscInt i; 727 728 PetscFunctionBegin; 729 PetscCheck(n >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Each process must have 1 or more blocks, n = %" PetscInt_FMT, n); 730 PetscCheck(!pc->setupcalled || (n == osm->n_local_true && !is), PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "PCASMSetLocalSubdomains() should be called before calling PCSetUp()."); 731 732 if (!pc->setupcalled) { 733 if (is) { 734 for (i = 0; i < n; i++) PetscCall(PetscObjectReference((PetscObject)is[i])); 735 } 736 if (is_local) { 737 for (i = 0; i < n; i++) PetscCall(PetscObjectReference((PetscObject)is_local[i])); 738 } 739 PetscCall(PCASMDestroySubdomains(osm->n_local_true, osm->is, osm->is_local)); 740 741 osm->n_local_true = n; 742 osm->is = NULL; 743 osm->is_local = NULL; 744 if (is) { 745 PetscCall(PetscMalloc1(n, &osm->is)); 746 for (i = 0; i < n; i++) osm->is[i] = is[i]; 747 /* Flag indicating that the user has set overlapping subdomains so PCASM should not increase their size. */ 748 osm->overlap = -1; 749 } 750 if (is_local) { 751 PetscCall(PetscMalloc1(n, &osm->is_local)); 752 for (i = 0; i < n; i++) osm->is_local[i] = is_local[i]; 753 if (!is) { 754 PetscCall(PetscMalloc1(osm->n_local_true, &osm->is)); 755 for (i = 0; i < osm->n_local_true; i++) { 756 if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */ 757 PetscCall(ISDuplicate(osm->is_local[i], &osm->is[i])); 758 PetscCall(ISCopy(osm->is_local[i], osm->is[i])); 759 } else { 760 PetscCall(PetscObjectReference((PetscObject)osm->is_local[i])); 761 osm->is[i] = osm->is_local[i]; 762 } 763 } 764 } 765 } 766 } 767 PetscFunctionReturn(0); 768 } 769 770 static PetscErrorCode PCASMSetTotalSubdomains_ASM(PC pc, PetscInt N, IS *is, IS *is_local) { 771 PC_ASM *osm = (PC_ASM *)pc->data; 772 PetscMPIInt rank, size; 773 PetscInt n; 774 775 PetscFunctionBegin; 776 PetscCheck(N >= 1, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Number of total blocks must be > 0, N = %" PetscInt_FMT, N); 777 PetscCheck(!is && !is_local, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet."); 778 779 /* 780 Split the subdomains equally among all processors 781 */ 782 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 783 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 784 n = N / size + ((N % size) > rank); 785 PetscCheck(n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Process %d must have at least one block: total processors %d total blocks %" PetscInt_FMT, (int)rank, (int)size, N); 786 PetscCheck(!pc->setupcalled || n == osm->n_local_true, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PCASMSetTotalSubdomains() should be called before PCSetUp()."); 787 if (!pc->setupcalled) { 788 PetscCall(PCASMDestroySubdomains(osm->n_local_true, osm->is, osm->is_local)); 789 790 osm->n_local_true = n; 791 osm->is = NULL; 792 osm->is_local = NULL; 793 } 794 PetscFunctionReturn(0); 795 } 796 797 static PetscErrorCode PCASMSetOverlap_ASM(PC pc, PetscInt ovl) { 798 PC_ASM *osm = (PC_ASM *)pc->data; 799 800 PetscFunctionBegin; 801 PetscCheck(ovl >= 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Negative overlap value requested"); 802 PetscCheck(!pc->setupcalled || ovl == osm->overlap, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "PCASMSetOverlap() should be called before PCSetUp()."); 803 if (!pc->setupcalled) osm->overlap = ovl; 804 PetscFunctionReturn(0); 805 } 806 807 static PetscErrorCode PCASMSetType_ASM(PC pc, PCASMType type) { 808 PC_ASM *osm = (PC_ASM *)pc->data; 809 810 PetscFunctionBegin; 811 osm->type = type; 812 osm->type_set = PETSC_TRUE; 813 PetscFunctionReturn(0); 814 } 815 816 static PetscErrorCode PCASMGetType_ASM(PC pc, PCASMType *type) { 817 PC_ASM *osm = (PC_ASM *)pc->data; 818 819 PetscFunctionBegin; 820 *type = osm->type; 821 PetscFunctionReturn(0); 822 } 823 824 static PetscErrorCode PCASMSetLocalType_ASM(PC pc, PCCompositeType type) { 825 PC_ASM *osm = (PC_ASM *)pc->data; 826 827 PetscFunctionBegin; 828 PetscCheck(type == PC_COMPOSITE_ADDITIVE || type == PC_COMPOSITE_MULTIPLICATIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Only supports additive or multiplicative as the local type"); 829 osm->loctype = type; 830 PetscFunctionReturn(0); 831 } 832 833 static PetscErrorCode PCASMGetLocalType_ASM(PC pc, PCCompositeType *type) { 834 PC_ASM *osm = (PC_ASM *)pc->data; 835 836 PetscFunctionBegin; 837 *type = osm->loctype; 838 PetscFunctionReturn(0); 839 } 840 841 static PetscErrorCode PCASMSetSortIndices_ASM(PC pc, PetscBool doSort) { 842 PC_ASM *osm = (PC_ASM *)pc->data; 843 844 PetscFunctionBegin; 845 osm->sort_indices = doSort; 846 PetscFunctionReturn(0); 847 } 848 849 static PetscErrorCode PCASMGetSubKSP_ASM(PC pc, PetscInt *n_local, PetscInt *first_local, KSP **ksp) { 850 PC_ASM *osm = (PC_ASM *)pc->data; 851 852 PetscFunctionBegin; 853 PetscCheck(osm->n_local_true >= 1, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here"); 854 855 if (n_local) *n_local = osm->n_local_true; 856 if (first_local) { 857 PetscCallMPI(MPI_Scan(&osm->n_local_true, first_local, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 858 *first_local -= osm->n_local_true; 859 } 860 if (ksp) *ksp = osm->ksp; 861 PetscFunctionReturn(0); 862 } 863 864 static PetscErrorCode PCASMGetSubMatType_ASM(PC pc, MatType *sub_mat_type) { 865 PC_ASM *osm = (PC_ASM *)pc->data; 866 867 PetscFunctionBegin; 868 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 869 PetscValidPointer(sub_mat_type, 2); 870 *sub_mat_type = osm->sub_mat_type; 871 PetscFunctionReturn(0); 872 } 873 874 static PetscErrorCode PCASMSetSubMatType_ASM(PC pc, MatType sub_mat_type) { 875 PC_ASM *osm = (PC_ASM *)pc->data; 876 877 PetscFunctionBegin; 878 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 879 PetscCall(PetscFree(osm->sub_mat_type)); 880 PetscCall(PetscStrallocpy(sub_mat_type, (char **)&osm->sub_mat_type)); 881 PetscFunctionReturn(0); 882 } 883 884 /*@C 885 PCASMSetLocalSubdomains - Sets the local subdomains (for this processor only) for the additive Schwarz preconditioner. 886 887 Collective on pc 888 889 Input Parameters: 890 + pc - the preconditioner context 891 . n - the number of subdomains for this processor (default value = 1) 892 . is - the index set that defines the subdomains for this processor 893 (or NULL for PETSc to determine subdomains) 894 - is_local - the index sets that define the local part of the subdomains for this processor, not used unless PCASMType is PC_ASM_RESTRICT 895 (or NULL to not provide these) 896 897 Options Database Key: 898 To set the total number of subdomain blocks rather than specify the 899 index sets, use the option 900 . -pc_asm_local_blocks <blks> - Sets local blocks 901 902 Notes: 903 The IS numbering is in the parallel, global numbering of the vector for both is and is_local 904 905 By default the ASM preconditioner uses 1 block per processor. 906 907 Use PCASMSetTotalSubdomains() to set the subdomains for all processors. 908 909 If is_local is provided and PCASMType is PC_ASM_RESTRICT then the solution only over the is_local region is interpolated 910 back to form the global solution (this is the standard restricted additive Schwarz method) 911 912 If the is_local is provided and PCASMType is PC_ASM_INTERPOLATE or PC_ASM_NONE then an error is generated since there is 913 no code to handle that case. 914 915 Level: advanced 916 917 .seealso: `PCASMSetTotalSubdomains()`, `PCASMSetOverlap()`, `PCASMGetSubKSP()`, 918 `PCASMCreateSubdomains2D()`, `PCASMGetLocalSubdomains()`, `PCASMType`, `PCASMSetType()` 919 @*/ 920 PetscErrorCode PCASMSetLocalSubdomains(PC pc, PetscInt n, IS is[], IS is_local[]) { 921 PetscFunctionBegin; 922 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 923 PetscTryMethod(pc, "PCASMSetLocalSubdomains_C", (PC, PetscInt, IS[], IS[]), (pc, n, is, is_local)); 924 PetscFunctionReturn(0); 925 } 926 927 /*@C 928 PCASMSetTotalSubdomains - Sets the subdomains for all processors for the 929 additive Schwarz preconditioner. Either all or no processors in the 930 PC communicator must call this routine, with the same index sets. 931 932 Collective on pc 933 934 Input Parameters: 935 + pc - the preconditioner context 936 . N - the number of subdomains for all processors 937 . is - the index sets that define the subdomains for all processors 938 (or NULL to ask PETSc to determine the subdomains) 939 - is_local - the index sets that define the local part of the subdomains for this processor 940 (or NULL to not provide this information) 941 942 Options Database Key: 943 To set the total number of subdomain blocks rather than specify the 944 index sets, use the option 945 . -pc_asm_blocks <blks> - Sets total blocks 946 947 Notes: 948 Currently you cannot use this to set the actual subdomains with the argument is or is_local. 949 950 By default the ASM preconditioner uses 1 block per processor. 951 952 These index sets cannot be destroyed until after completion of the 953 linear solves for which the ASM preconditioner is being used. 954 955 Use PCASMSetLocalSubdomains() to set local subdomains. 956 957 The IS numbering is in the parallel, global numbering of the vector for both is and is_local 958 959 Level: advanced 960 961 .seealso: `PCASMSetLocalSubdomains()`, `PCASMSetOverlap()`, `PCASMGetSubKSP()`, 962 `PCASMCreateSubdomains2D()` 963 @*/ 964 PetscErrorCode PCASMSetTotalSubdomains(PC pc, PetscInt N, IS is[], IS is_local[]) { 965 PetscFunctionBegin; 966 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 967 PetscTryMethod(pc, "PCASMSetTotalSubdomains_C", (PC, PetscInt, IS[], IS[]), (pc, N, is, is_local)); 968 PetscFunctionReturn(0); 969 } 970 971 /*@ 972 PCASMSetOverlap - Sets the overlap between a pair of subdomains for the 973 additive Schwarz preconditioner. Either all or no processors in the 974 PC communicator must call this routine. 975 976 Logically Collective on pc 977 978 Input Parameters: 979 + pc - the preconditioner context 980 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1) 981 982 Options Database Key: 983 . -pc_asm_overlap <ovl> - Sets overlap 984 985 Notes: 986 By default the ASM preconditioner uses 1 block per processor. To use 987 multiple blocks per perocessor, see PCASMSetTotalSubdomains() and 988 PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>). 989 990 The overlap defaults to 1, so if one desires that no additional 991 overlap be computed beyond what may have been set with a call to 992 PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl 993 must be set to be 0. In particular, if one does not explicitly set 994 the subdomains an application code, then all overlap would be computed 995 internally by PETSc, and using an overlap of 0 would result in an ASM 996 variant that is equivalent to the block Jacobi preconditioner. 997 998 The default algorithm used by PETSc to increase overlap is fast, but not scalable, 999 use the option -mat_increase_overlap_scalable when the problem and number of processes is large. 1000 1001 Note that one can define initial index sets with any overlap via 1002 PCASMSetLocalSubdomains(); the routine 1003 PCASMSetOverlap() merely allows PETSc to extend that overlap further 1004 if desired. 1005 1006 Level: intermediate 1007 1008 .seealso: `PCASMSetTotalSubdomains()`, `PCASMSetLocalSubdomains()`, `PCASMGetSubKSP()`, 1009 `PCASMCreateSubdomains2D()`, `PCASMGetLocalSubdomains()`, `MatIncreaseOverlap()` 1010 @*/ 1011 PetscErrorCode PCASMSetOverlap(PC pc, PetscInt ovl) { 1012 PetscFunctionBegin; 1013 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1014 PetscValidLogicalCollectiveInt(pc, ovl, 2); 1015 PetscTryMethod(pc, "PCASMSetOverlap_C", (PC, PetscInt), (pc, ovl)); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 /*@ 1020 PCASMSetType - Sets the type of restriction and interpolation used 1021 for local problems in the additive Schwarz method. 1022 1023 Logically Collective on pc 1024 1025 Input Parameters: 1026 + pc - the preconditioner context 1027 - type - variant of ASM, one of 1028 .vb 1029 PC_ASM_BASIC - full interpolation and restriction 1030 PC_ASM_RESTRICT - full restriction, local processor interpolation (default) 1031 PC_ASM_INTERPOLATE - full interpolation, local processor restriction 1032 PC_ASM_NONE - local processor restriction and interpolation 1033 .ve 1034 1035 Options Database Key: 1036 . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type 1037 1038 Notes: 1039 if the is_local arguments are passed to PCASMSetLocalSubdomains() then they are used when PC_ASM_RESTRICT has been selected 1040 to limit the local processor interpolation 1041 1042 Level: intermediate 1043 1044 .seealso: `PCASMSetTotalSubdomains()`, `PCASMSetTotalSubdomains()`, `PCASMGetSubKSP()`, 1045 `PCASMCreateSubdomains2D()`, `PCASMType`, `PCASMSetLocalType()`, `PCASMGetLocalType()` 1046 @*/ 1047 PetscErrorCode PCASMSetType(PC pc, PCASMType type) { 1048 PetscFunctionBegin; 1049 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1050 PetscValidLogicalCollectiveEnum(pc, type, 2); 1051 PetscTryMethod(pc, "PCASMSetType_C", (PC, PCASMType), (pc, type)); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 /*@ 1056 PCASMGetType - Gets the type of restriction and interpolation used 1057 for local problems in the additive Schwarz method. 1058 1059 Logically Collective on pc 1060 1061 Input Parameter: 1062 . pc - the preconditioner context 1063 1064 Output Parameter: 1065 . type - variant of ASM, one of 1066 1067 .vb 1068 PC_ASM_BASIC - full interpolation and restriction 1069 PC_ASM_RESTRICT - full restriction, local processor interpolation 1070 PC_ASM_INTERPOLATE - full interpolation, local processor restriction 1071 PC_ASM_NONE - local processor restriction and interpolation 1072 .ve 1073 1074 Options Database Key: 1075 . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type 1076 1077 Level: intermediate 1078 1079 .seealso: `PCASMSetTotalSubdomains()`, `PCASMSetTotalSubdomains()`, `PCASMGetSubKSP()`, 1080 `PCASMCreateSubdomains2D()`, `PCASMType`, `PCASMSetType()`, `PCASMSetLocalType()`, `PCASMGetLocalType()` 1081 @*/ 1082 PetscErrorCode PCASMGetType(PC pc, PCASMType *type) { 1083 PetscFunctionBegin; 1084 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1085 PetscUseMethod(pc, "PCASMGetType_C", (PC, PCASMType *), (pc, type)); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 /*@ 1090 PCASMSetLocalType - Sets the type of composition used for local problems in the additive Schwarz method. 1091 1092 Logically Collective on pc 1093 1094 Input Parameters: 1095 + pc - the preconditioner context 1096 - type - type of composition, one of 1097 .vb 1098 PC_COMPOSITE_ADDITIVE - local additive combination 1099 PC_COMPOSITE_MULTIPLICATIVE - local multiplicative combination 1100 .ve 1101 1102 Options Database Key: 1103 . -pc_asm_local_type [additive,multiplicative] - Sets local solver composition type 1104 1105 Level: intermediate 1106 1107 .seealso: `PCASMSetType()`, `PCASMGetType()`, `PCASMGetLocalType()`, `PCASM`, `PCASMType`, `PCASMSetType()`, `PCASMGetType()`, `PCCompositeType` 1108 @*/ 1109 PetscErrorCode PCASMSetLocalType(PC pc, PCCompositeType type) { 1110 PetscFunctionBegin; 1111 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1112 PetscValidLogicalCollectiveEnum(pc, type, 2); 1113 PetscTryMethod(pc, "PCASMSetLocalType_C", (PC, PCCompositeType), (pc, type)); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 /*@ 1118 PCASMGetLocalType - Gets the type of composition used for local problems in the additive Schwarz method. 1119 1120 Logically Collective on pc 1121 1122 Input Parameter: 1123 . pc - the preconditioner context 1124 1125 Output Parameter: 1126 . type - type of composition, one of 1127 .vb 1128 PC_COMPOSITE_ADDITIVE - local additive combination 1129 PC_COMPOSITE_MULTIPLICATIVE - local multiplicative combination 1130 .ve 1131 1132 Options Database Key: 1133 . -pc_asm_local_type [additive,multiplicative] - Sets local solver composition type 1134 1135 Level: intermediate 1136 1137 .seealso: `PCASMSetType()`, `PCASMGetType()`, `PCASMSetLocalType()`, `PCASMCreate()`, `PCASMType`, `PCASMSetType()`, `PCASMGetType()`, `PCCompositeType` 1138 @*/ 1139 PetscErrorCode PCASMGetLocalType(PC pc, PCCompositeType *type) { 1140 PetscFunctionBegin; 1141 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1142 PetscValidPointer(type, 2); 1143 PetscUseMethod(pc, "PCASMGetLocalType_C", (PC, PCCompositeType *), (pc, type)); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 /*@ 1148 PCASMSetSortIndices - Determines whether subdomain indices are sorted. 1149 1150 Logically Collective on pc 1151 1152 Input Parameters: 1153 + pc - the preconditioner context 1154 - doSort - sort the subdomain indices 1155 1156 Level: intermediate 1157 1158 .seealso: `PCASMSetLocalSubdomains()`, `PCASMSetTotalSubdomains()`, `PCASMGetSubKSP()`, 1159 `PCASMCreateSubdomains2D()` 1160 @*/ 1161 PetscErrorCode PCASMSetSortIndices(PC pc, PetscBool doSort) { 1162 PetscFunctionBegin; 1163 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1164 PetscValidLogicalCollectiveBool(pc, doSort, 2); 1165 PetscTryMethod(pc, "PCASMSetSortIndices_C", (PC, PetscBool), (pc, doSort)); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 /*@C 1170 PCASMGetSubKSP - Gets the local KSP contexts for all blocks on 1171 this processor. 1172 1173 Collective on pc iff first_local is requested 1174 1175 Input Parameter: 1176 . pc - the preconditioner context 1177 1178 Output Parameters: 1179 + n_local - the number of blocks on this processor or NULL 1180 . first_local - the global number of the first block on this processor or NULL, 1181 all processors must request or all must pass NULL 1182 - ksp - the array of KSP contexts 1183 1184 Note: 1185 After PCASMGetSubKSP() the array of KSPes is not to be freed. 1186 1187 You must call KSPSetUp() before calling PCASMGetSubKSP(). 1188 1189 Fortran note: 1190 The output argument 'ksp' must be an array of sufficient length or PETSC_NULL_KSP. The latter can be used to learn the necessary length. 1191 1192 Level: advanced 1193 1194 .seealso: `PCASMSetTotalSubdomains()`, `PCASMSetTotalSubdomains()`, `PCASMSetOverlap()`, 1195 `PCASMCreateSubdomains2D()`, 1196 @*/ 1197 PetscErrorCode PCASMGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[]) { 1198 PetscFunctionBegin; 1199 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1200 PetscUseMethod(pc, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp)); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 /* -------------------------------------------------------------------------------------*/ 1205 /*MC 1206 PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with 1207 its own KSP object. 1208 1209 Options Database Keys: 1210 + -pc_asm_blocks <blks> - Sets total blocks 1211 . -pc_asm_overlap <ovl> - Sets overlap 1212 . -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type, default is restrict 1213 - -pc_asm_local_type [additive, multiplicative] - Sets ASM type, default is additive 1214 1215 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you 1216 will get a different convergence rate due to the default option of -pc_asm_type restrict. Use 1217 -pc_asm_type basic to use the standard ASM. 1218 1219 Notes: 1220 Each processor can have one or more blocks, but a block cannot be shared by more 1221 than one processor. Use PCGASM for subdomains shared by multiple processes. Defaults to one block per processor. 1222 1223 To set options on the solvers for each block append -sub_ to all the KSP, and PC 1224 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly 1225 1226 To set the options on the solvers separate for each block call PCASMGetSubKSP() 1227 and set the options directly on the resulting KSP object (you can access its PC 1228 with KSPGetPC()) 1229 1230 Level: beginner 1231 1232 References: 1233 + * - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions 1234 Courant Institute, New York University Technical report 1235 - * - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations, 1236 Cambridge University Press. 1237 1238 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, 1239 `PCBJACOBI`, `PCASMGetSubKSP()`, `PCASMSetLocalSubdomains()`, `PCASMType`, `PCASMGetType()`, `PCASMSetLocalType()`, `PCASMGetLocalType()` 1240 `PCASMSetTotalSubdomains()`, `PCSetModifySubMatrices()`, `PCASMSetOverlap()`, `PCASMSetType()`, `PCCompositeType` 1241 1242 M*/ 1243 1244 PETSC_EXTERN PetscErrorCode PCCreate_ASM(PC pc) { 1245 PC_ASM *osm; 1246 1247 PetscFunctionBegin; 1248 PetscCall(PetscNewLog(pc, &osm)); 1249 1250 osm->n = PETSC_DECIDE; 1251 osm->n_local = 0; 1252 osm->n_local_true = PETSC_DECIDE; 1253 osm->overlap = 1; 1254 osm->ksp = NULL; 1255 osm->restriction = NULL; 1256 osm->lprolongation = NULL; 1257 osm->lrestriction = NULL; 1258 osm->x = NULL; 1259 osm->y = NULL; 1260 osm->is = NULL; 1261 osm->is_local = NULL; 1262 osm->mat = NULL; 1263 osm->pmat = NULL; 1264 osm->type = PC_ASM_RESTRICT; 1265 osm->loctype = PC_COMPOSITE_ADDITIVE; 1266 osm->sort_indices = PETSC_TRUE; 1267 osm->dm_subdomains = PETSC_FALSE; 1268 osm->sub_mat_type = NULL; 1269 1270 pc->data = (void *)osm; 1271 pc->ops->apply = PCApply_ASM; 1272 pc->ops->matapply = PCMatApply_ASM; 1273 pc->ops->applytranspose = PCApplyTranspose_ASM; 1274 pc->ops->setup = PCSetUp_ASM; 1275 pc->ops->reset = PCReset_ASM; 1276 pc->ops->destroy = PCDestroy_ASM; 1277 pc->ops->setfromoptions = PCSetFromOptions_ASM; 1278 pc->ops->setuponblocks = PCSetUpOnBlocks_ASM; 1279 pc->ops->view = PCView_ASM; 1280 pc->ops->applyrichardson = NULL; 1281 1282 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetLocalSubdomains_C", PCASMSetLocalSubdomains_ASM)); 1283 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetTotalSubdomains_C", PCASMSetTotalSubdomains_ASM)); 1284 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetOverlap_C", PCASMSetOverlap_ASM)); 1285 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetType_C", PCASMSetType_ASM)); 1286 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMGetType_C", PCASMGetType_ASM)); 1287 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetLocalType_C", PCASMSetLocalType_ASM)); 1288 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMGetLocalType_C", PCASMGetLocalType_ASM)); 1289 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetSortIndices_C", PCASMSetSortIndices_ASM)); 1290 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMGetSubKSP_C", PCASMGetSubKSP_ASM)); 1291 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMGetSubMatType_C", PCASMGetSubMatType_ASM)); 1292 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCASMSetSubMatType_C", PCASMSetSubMatType_ASM)); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 /*@C 1297 PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz 1298 preconditioner for any problem on a general grid. 1299 1300 Collective 1301 1302 Input Parameters: 1303 + A - The global matrix operator 1304 - n - the number of local blocks 1305 1306 Output Parameters: 1307 . outis - the array of index sets defining the subdomains 1308 1309 Level: advanced 1310 1311 Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap 1312 from these if you use PCASMSetLocalSubdomains() 1313 1314 In the Fortran version you must provide the array outis[] already allocated of length n. 1315 1316 .seealso: `PCASMSetLocalSubdomains()`, `PCASMDestroySubdomains()` 1317 @*/ 1318 PetscErrorCode PCASMCreateSubdomains(Mat A, PetscInt n, IS *outis[]) { 1319 MatPartitioning mpart; 1320 const char *prefix; 1321 PetscInt i, j, rstart, rend, bs; 1322 PetscBool hasop, isbaij = PETSC_FALSE, foundpart = PETSC_FALSE; 1323 Mat Ad = NULL, adj; 1324 IS ispart, isnumb, *is; 1325 1326 PetscFunctionBegin; 1327 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 1328 PetscValidPointer(outis, 3); 1329 PetscCheck(n >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "number of local blocks must be > 0, n = %" PetscInt_FMT, n); 1330 1331 /* Get prefix, row distribution, and block size */ 1332 PetscCall(MatGetOptionsPrefix(A, &prefix)); 1333 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 1334 PetscCall(MatGetBlockSize(A, &bs)); 1335 PetscCheck(rstart / bs * bs == rstart && rend / bs * bs == rend, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "bad row distribution [%" PetscInt_FMT ",%" PetscInt_FMT ") for matrix block size %" PetscInt_FMT, rstart, rend, bs); 1336 1337 /* Get diagonal block from matrix if possible */ 1338 PetscCall(MatHasOperation(A, MATOP_GET_DIAGONAL_BLOCK, &hasop)); 1339 if (hasop) PetscCall(MatGetDiagonalBlock(A, &Ad)); 1340 if (Ad) { 1341 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Ad, MATSEQBAIJ, &isbaij)); 1342 if (!isbaij) PetscCall(PetscObjectBaseTypeCompare((PetscObject)Ad, MATSEQSBAIJ, &isbaij)); 1343 } 1344 if (Ad && n > 1) { 1345 PetscBool match, done; 1346 /* Try to setup a good matrix partitioning if available */ 1347 PetscCall(MatPartitioningCreate(PETSC_COMM_SELF, &mpart)); 1348 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)mpart, prefix)); 1349 PetscCall(MatPartitioningSetFromOptions(mpart)); 1350 PetscCall(PetscObjectTypeCompare((PetscObject)mpart, MATPARTITIONINGCURRENT, &match)); 1351 if (!match) PetscCall(PetscObjectTypeCompare((PetscObject)mpart, MATPARTITIONINGSQUARE, &match)); 1352 if (!match) { /* assume a "good" partitioner is available */ 1353 PetscInt na; 1354 const PetscInt *ia, *ja; 1355 PetscCall(MatGetRowIJ(Ad, 0, PETSC_TRUE, isbaij, &na, &ia, &ja, &done)); 1356 if (done) { 1357 /* Build adjacency matrix by hand. Unfortunately a call to 1358 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will 1359 remove the block-aij structure and we cannot expect 1360 MatPartitioning to split vertices as we need */ 1361 PetscInt i, j, len, nnz, cnt, *iia = NULL, *jja = NULL; 1362 const PetscInt *row; 1363 nnz = 0; 1364 for (i = 0; i < na; i++) { /* count number of nonzeros */ 1365 len = ia[i + 1] - ia[i]; 1366 row = ja + ia[i]; 1367 for (j = 0; j < len; j++) { 1368 if (row[j] == i) { /* don't count diagonal */ 1369 len--; 1370 break; 1371 } 1372 } 1373 nnz += len; 1374 } 1375 PetscCall(PetscMalloc1(na + 1, &iia)); 1376 PetscCall(PetscMalloc1(nnz, &jja)); 1377 nnz = 0; 1378 iia[0] = 0; 1379 for (i = 0; i < na; i++) { /* fill adjacency */ 1380 cnt = 0; 1381 len = ia[i + 1] - ia[i]; 1382 row = ja + ia[i]; 1383 for (j = 0; j < len; j++) { 1384 if (row[j] != i) { /* if not diagonal */ 1385 jja[nnz + cnt++] = row[j]; 1386 } 1387 } 1388 nnz += cnt; 1389 iia[i + 1] = nnz; 1390 } 1391 /* Partitioning of the adjacency matrix */ 1392 PetscCall(MatCreateMPIAdj(PETSC_COMM_SELF, na, na, iia, jja, NULL, &adj)); 1393 PetscCall(MatPartitioningSetAdjacency(mpart, adj)); 1394 PetscCall(MatPartitioningSetNParts(mpart, n)); 1395 PetscCall(MatPartitioningApply(mpart, &ispart)); 1396 PetscCall(ISPartitioningToNumbering(ispart, &isnumb)); 1397 PetscCall(MatDestroy(&adj)); 1398 foundpart = PETSC_TRUE; 1399 } 1400 PetscCall(MatRestoreRowIJ(Ad, 0, PETSC_TRUE, isbaij, &na, &ia, &ja, &done)); 1401 } 1402 PetscCall(MatPartitioningDestroy(&mpart)); 1403 } 1404 1405 PetscCall(PetscMalloc1(n, &is)); 1406 *outis = is; 1407 1408 if (!foundpart) { 1409 /* Partitioning by contiguous chunks of rows */ 1410 1411 PetscInt mbs = (rend - rstart) / bs; 1412 PetscInt start = rstart; 1413 for (i = 0; i < n; i++) { 1414 PetscInt count = (mbs / n + ((mbs % n) > i)) * bs; 1415 PetscCall(ISCreateStride(PETSC_COMM_SELF, count, start, 1, &is[i])); 1416 start += count; 1417 } 1418 1419 } else { 1420 /* Partitioning by adjacency of diagonal block */ 1421 1422 const PetscInt *numbering; 1423 PetscInt *count, nidx, *indices, *newidx, start = 0; 1424 /* Get node count in each partition */ 1425 PetscCall(PetscMalloc1(n, &count)); 1426 PetscCall(ISPartitioningCount(ispart, n, count)); 1427 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1428 for (i = 0; i < n; i++) count[i] *= bs; 1429 } 1430 /* Build indices from node numbering */ 1431 PetscCall(ISGetLocalSize(isnumb, &nidx)); 1432 PetscCall(PetscMalloc1(nidx, &indices)); 1433 for (i = 0; i < nidx; i++) indices[i] = i; /* needs to be initialized */ 1434 PetscCall(ISGetIndices(isnumb, &numbering)); 1435 PetscCall(PetscSortIntWithPermutation(nidx, numbering, indices)); 1436 PetscCall(ISRestoreIndices(isnumb, &numbering)); 1437 if (isbaij && bs > 1) { /* adjust for the block-aij case */ 1438 PetscCall(PetscMalloc1(nidx * bs, &newidx)); 1439 for (i = 0; i < nidx; i++) { 1440 for (j = 0; j < bs; j++) newidx[i * bs + j] = indices[i] * bs + j; 1441 } 1442 PetscCall(PetscFree(indices)); 1443 nidx *= bs; 1444 indices = newidx; 1445 } 1446 /* Shift to get global indices */ 1447 for (i = 0; i < nidx; i++) indices[i] += rstart; 1448 1449 /* Build the index sets for each block */ 1450 for (i = 0; i < n; i++) { 1451 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count[i], &indices[start], PETSC_COPY_VALUES, &is[i])); 1452 PetscCall(ISSort(is[i])); 1453 start += count[i]; 1454 } 1455 1456 PetscCall(PetscFree(count)); 1457 PetscCall(PetscFree(indices)); 1458 PetscCall(ISDestroy(&isnumb)); 1459 PetscCall(ISDestroy(&ispart)); 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 1464 /*@C 1465 PCASMDestroySubdomains - Destroys the index sets created with 1466 PCASMCreateSubdomains(). Should be called after setting subdomains 1467 with PCASMSetLocalSubdomains(). 1468 1469 Collective 1470 1471 Input Parameters: 1472 + n - the number of index sets 1473 . is - the array of index sets 1474 - is_local - the array of local index sets, can be NULL 1475 1476 Level: advanced 1477 1478 .seealso: `PCASMCreateSubdomains()`, `PCASMSetLocalSubdomains()` 1479 @*/ 1480 PetscErrorCode PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[]) { 1481 PetscInt i; 1482 1483 PetscFunctionBegin; 1484 if (n <= 0) PetscFunctionReturn(0); 1485 if (is) { 1486 PetscValidPointer(is, 2); 1487 for (i = 0; i < n; i++) PetscCall(ISDestroy(&is[i])); 1488 PetscCall(PetscFree(is)); 1489 } 1490 if (is_local) { 1491 PetscValidPointer(is_local, 3); 1492 for (i = 0; i < n; i++) PetscCall(ISDestroy(&is_local[i])); 1493 PetscCall(PetscFree(is_local)); 1494 } 1495 PetscFunctionReturn(0); 1496 } 1497 1498 /*@ 1499 PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz 1500 preconditioner for a two-dimensional problem on a regular grid. 1501 1502 Not Collective 1503 1504 Input Parameters: 1505 + m - the number of mesh points in the x direction 1506 . n - the number of mesh points in the y direction 1507 . M - the number of subdomains in the x direction 1508 . N - the number of subdomains in the y direction 1509 . dof - degrees of freedom per node 1510 - overlap - overlap in mesh lines 1511 1512 Output Parameters: 1513 + Nsub - the number of subdomains created 1514 . is - array of index sets defining overlapping (if overlap > 0) subdomains 1515 - is_local - array of index sets defining non-overlapping subdomains 1516 1517 Note: 1518 Presently PCAMSCreateSubdomains2d() is valid only for sequential 1519 preconditioners. More general related routines are 1520 PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains(). 1521 1522 Level: advanced 1523 1524 .seealso: `PCASMSetTotalSubdomains()`, `PCASMSetLocalSubdomains()`, `PCASMGetSubKSP()`, 1525 `PCASMSetOverlap()` 1526 @*/ 1527 PetscErrorCode PCASMCreateSubdomains2D(PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt dof, PetscInt overlap, PetscInt *Nsub, IS **is, IS **is_local) { 1528 PetscInt i, j, height, width, ystart, xstart, yleft, yright, xleft, xright, loc_outer; 1529 PetscInt nidx, *idx, loc, ii, jj, count; 1530 1531 PetscFunctionBegin; 1532 PetscCheck(dof == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "dof must be 1"); 1533 1534 *Nsub = N * M; 1535 PetscCall(PetscMalloc1(*Nsub, is)); 1536 PetscCall(PetscMalloc1(*Nsub, is_local)); 1537 ystart = 0; 1538 loc_outer = 0; 1539 for (i = 0; i < N; i++) { 1540 height = n / N + ((n % N) > i); /* height of subdomain */ 1541 PetscCheck(height >= 2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Too many N subdomains for mesh dimension n"); 1542 yleft = ystart - overlap; 1543 if (yleft < 0) yleft = 0; 1544 yright = ystart + height + overlap; 1545 if (yright > n) yright = n; 1546 xstart = 0; 1547 for (j = 0; j < M; j++) { 1548 width = m / M + ((m % M) > j); /* width of subdomain */ 1549 PetscCheck(width >= 2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Too many M subdomains for mesh dimension m"); 1550 xleft = xstart - overlap; 1551 if (xleft < 0) xleft = 0; 1552 xright = xstart + width + overlap; 1553 if (xright > m) xright = m; 1554 nidx = (xright - xleft) * (yright - yleft); 1555 PetscCall(PetscMalloc1(nidx, &idx)); 1556 loc = 0; 1557 for (ii = yleft; ii < yright; ii++) { 1558 count = m * ii + xleft; 1559 for (jj = xleft; jj < xright; jj++) idx[loc++] = count++; 1560 } 1561 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nidx, idx, PETSC_COPY_VALUES, (*is) + loc_outer)); 1562 if (overlap == 0) { 1563 PetscCall(PetscObjectReference((PetscObject)(*is)[loc_outer])); 1564 1565 (*is_local)[loc_outer] = (*is)[loc_outer]; 1566 } else { 1567 for (loc = 0, ii = ystart; ii < ystart + height; ii++) { 1568 for (jj = xstart; jj < xstart + width; jj++) idx[loc++] = m * ii + jj; 1569 } 1570 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, loc, idx, PETSC_COPY_VALUES, *is_local + loc_outer)); 1571 } 1572 PetscCall(PetscFree(idx)); 1573 xstart += width; 1574 loc_outer++; 1575 } 1576 ystart += height; 1577 } 1578 for (i = 0; i < *Nsub; i++) PetscCall(ISSort((*is)[i])); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 /*@C 1583 PCASMGetLocalSubdomains - Gets the local subdomains (for this processor 1584 only) for the additive Schwarz preconditioner. 1585 1586 Not Collective 1587 1588 Input Parameter: 1589 . pc - the preconditioner context 1590 1591 Output Parameters: 1592 + n - if requested, the number of subdomains for this processor (default value = 1) 1593 . is - if requested, the index sets that define the subdomains for this processor 1594 - is_local - if requested, the index sets that define the local part of the subdomains for this processor (can be NULL) 1595 1596 Notes: 1597 The IS numbering is in the parallel, global numbering of the vector. 1598 1599 Level: advanced 1600 1601 .seealso: `PCASMSetTotalSubdomains()`, `PCASMSetOverlap()`, `PCASMGetSubKSP()`, 1602 `PCASMCreateSubdomains2D()`, `PCASMSetLocalSubdomains()`, `PCASMGetLocalSubmatrices()` 1603 @*/ 1604 PetscErrorCode PCASMGetLocalSubdomains(PC pc, PetscInt *n, IS *is[], IS *is_local[]) { 1605 PC_ASM *osm = (PC_ASM *)pc->data; 1606 PetscBool match; 1607 1608 PetscFunctionBegin; 1609 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1610 if (n) PetscValidIntPointer(n, 2); 1611 if (is) PetscValidPointer(is, 3); 1612 if (is_local) PetscValidPointer(is_local, 4); 1613 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &match)); 1614 PetscCheck(match, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "PC is not a PCASM"); 1615 if (n) *n = osm->n_local_true; 1616 if (is) *is = osm->is; 1617 if (is_local) *is_local = osm->is_local; 1618 PetscFunctionReturn(0); 1619 } 1620 1621 /*@C 1622 PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor 1623 only) for the additive Schwarz preconditioner. 1624 1625 Not Collective 1626 1627 Input Parameter: 1628 . pc - the preconditioner context 1629 1630 Output Parameters: 1631 + n - if requested, the number of matrices for this processor (default value = 1) 1632 - mat - if requested, the matrices 1633 1634 Level: advanced 1635 1636 Notes: 1637 Call after PCSetUp() (or KSPSetUp()) but before PCApply() and before PCSetUpOnBlocks()) 1638 1639 Usually one would use PCSetModifySubMatrices() to change the submatrices in building the preconditioner. 1640 1641 .seealso: `PCASMSetTotalSubdomains()`, `PCASMSetOverlap()`, `PCASMGetSubKSP()`, 1642 `PCASMCreateSubdomains2D()`, `PCASMSetLocalSubdomains()`, `PCASMGetLocalSubdomains()`, `PCSetModifySubMatrices()` 1643 @*/ 1644 PetscErrorCode PCASMGetLocalSubmatrices(PC pc, PetscInt *n, Mat *mat[]) { 1645 PC_ASM *osm; 1646 PetscBool match; 1647 1648 PetscFunctionBegin; 1649 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1650 if (n) PetscValidIntPointer(n, 2); 1651 if (mat) PetscValidPointer(mat, 3); 1652 PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call after KSPSetUp() or PCSetUp()."); 1653 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &match)); 1654 if (!match) { 1655 if (n) *n = 0; 1656 if (mat) *mat = NULL; 1657 } else { 1658 osm = (PC_ASM *)pc->data; 1659 if (n) *n = osm->n_local_true; 1660 if (mat) *mat = osm->pmat; 1661 } 1662 PetscFunctionReturn(0); 1663 } 1664 1665 /*@ 1666 PCASMSetDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1667 1668 Logically Collective 1669 1670 Input Parameters: 1671 + pc - the preconditioner 1672 - flg - boolean indicating whether to use subdomains defined by the DM 1673 1674 Options Database Key: 1675 . -pc_asm_dm_subdomains <bool> - use subdomains defined by the DM 1676 1677 Level: intermediate 1678 1679 Notes: 1680 PCASMSetTotalSubdomains() and PCASMSetOverlap() take precedence over PCASMSetDMSubdomains(), 1681 so setting either of the first two effectively turns the latter off. 1682 1683 .seealso: `PCASMGetDMSubdomains()`, `PCASMSetTotalSubdomains()`, `PCASMSetOverlap()` 1684 `PCASMCreateSubdomains2D()`, `PCASMSetLocalSubdomains()`, `PCASMGetLocalSubdomains()` 1685 @*/ 1686 PetscErrorCode PCASMSetDMSubdomains(PC pc, PetscBool flg) { 1687 PC_ASM *osm = (PC_ASM *)pc->data; 1688 PetscBool match; 1689 1690 PetscFunctionBegin; 1691 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1692 PetscValidLogicalCollectiveBool(pc, flg, 2); 1693 PetscCheck(!pc->setupcalled, ((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Not for a setup PC."); 1694 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &match)); 1695 if (match) osm->dm_subdomains = flg; 1696 PetscFunctionReturn(0); 1697 } 1698 1699 /*@ 1700 PCASMGetDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible. 1701 Not Collective 1702 1703 Input Parameter: 1704 . pc - the preconditioner 1705 1706 Output Parameter: 1707 . flg - boolean indicating whether to use subdomains defined by the DM 1708 1709 Level: intermediate 1710 1711 .seealso: `PCASMSetDMSubdomains()`, `PCASMSetTotalSubdomains()`, `PCASMSetOverlap()` 1712 `PCASMCreateSubdomains2D()`, `PCASMSetLocalSubdomains()`, `PCASMGetLocalSubdomains()` 1713 @*/ 1714 PetscErrorCode PCASMGetDMSubdomains(PC pc, PetscBool *flg) { 1715 PC_ASM *osm = (PC_ASM *)pc->data; 1716 PetscBool match; 1717 1718 PetscFunctionBegin; 1719 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1720 PetscValidBoolPointer(flg, 2); 1721 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &match)); 1722 if (match) *flg = osm->dm_subdomains; 1723 else *flg = PETSC_FALSE; 1724 PetscFunctionReturn(0); 1725 } 1726 1727 /*@ 1728 PCASMGetSubMatType - Gets the matrix type used for ASM subsolves, as a string. 1729 1730 Not Collective 1731 1732 Input Parameter: 1733 . pc - the PC 1734 1735 Output Parameter: 1736 . -pc_asm_sub_mat_type - name of matrix type 1737 1738 Level: advanced 1739 1740 .seealso: `PCASMSetSubMatType()`, `PCASM`, `PCSetType()`, `VecSetType()`, `MatType`, `Mat` 1741 @*/ 1742 PetscErrorCode PCASMGetSubMatType(PC pc, MatType *sub_mat_type) { 1743 PetscFunctionBegin; 1744 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1745 PetscTryMethod(pc, "PCASMGetSubMatType_C", (PC, MatType *), (pc, sub_mat_type)); 1746 PetscFunctionReturn(0); 1747 } 1748 1749 /*@ 1750 PCASMSetSubMatType - Set the type of matrix used for ASM subsolves 1751 1752 Collective on Mat 1753 1754 Input Parameters: 1755 + pc - the PC object 1756 - sub_mat_type - matrix type 1757 1758 Options Database Key: 1759 . -pc_asm_sub_mat_type <sub_mat_type> - Sets the matrix type used for subsolves, for example, seqaijviennacl. If you specify a base name like aijviennacl, the corresponding sequential type is assumed. 1760 1761 Notes: 1762 See "${PETSC_DIR}/include/petscmat.h" for available types 1763 1764 Level: advanced 1765 1766 .seealso: `PCASMGetSubMatType()`, `PCASM`, `PCSetType()`, `VecSetType()`, `MatType`, `Mat` 1767 @*/ 1768 PetscErrorCode PCASMSetSubMatType(PC pc, MatType sub_mat_type) { 1769 PetscFunctionBegin; 1770 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 1771 PetscTryMethod(pc, "PCASMSetSubMatType_C", (PC, MatType), (pc, sub_mat_type)); 1772 PetscFunctionReturn(0); 1773 } 1774