1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <../src/mat/impls/dense/seq/dense.h> 6 #include <petscdmplex.h> 7 #include <petscblaslapack.h> 8 #include <petsc/private/sfimpl.h> 9 #include <petsc/private/dmpleximpl.h> 10 #include <petscdmda.h> 11 12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 13 14 /* if range is true, it returns B s.t. span{B} = range(A) 15 if range is false, it returns B s.t. range(B) _|_ range(A) */ 16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 17 { 18 PetscScalar *uwork, *data, *U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM, bN, lwork, lierr, di = 1; 21 PetscInt ulw, i, nr, nc, n; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &nr, &nc)); 28 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 33 PetscCall(PetscMalloc1(ulw, &uwork)); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr, nc); 39 if (!rwork) { 40 PetscCall(PetscMalloc1(n, &sing)); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 PetscCall(PetscMalloc1(nr * nr, &U)); 47 PetscCall(PetscBLASIntCast(nr, &bM)); 48 PetscCall(PetscBLASIntCast(nc, &bN)); 49 PetscCall(PetscBLASIntCast(ulw, &lwork)); 50 PetscCall(MatDenseGetArray(A, &data)); 51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 54 #else 55 PetscCall(PetscMalloc1(5 * n, &rwork2)); 56 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 57 PetscCall(PetscFree(rwork2)); 58 #endif 59 PetscCall(PetscFPTrapPop()); 60 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 61 PetscCall(MatDenseRestoreArray(A, &data)); 62 for (i = 0; i < n; i++) 63 if (sing[i] < PETSC_SMALL) break; 64 if (!rwork) PetscCall(PetscFree(sing)); 65 if (!work) PetscCall(PetscFree(uwork)); 66 /* create B */ 67 if (!range) { 68 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 69 PetscCall(MatDenseGetArray(*B, &data)); 70 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 71 } else { 72 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 73 PetscCall(MatDenseGetArray(*B, &data)); 74 PetscCall(PetscArraycpy(data, U, i * nr)); 75 } 76 PetscCall(MatDenseRestoreArray(*B, &data)); 77 PetscCall(PetscFree(U)); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 Mat GE, GEd; 90 PetscInt rsize, csize, esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 PetscCall(ISGetSize(edge, &esize)); 95 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 96 PetscCall(ISGetSize(extrow, &rsize)); 97 PetscCall(ISGetSize(extcol, &csize)); 98 99 /* gradients */ 100 ptr = work + 5 * esize; 101 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 102 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 103 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 104 PetscCall(MatDestroy(&GE)); 105 106 /* constants */ 107 ptr += rsize * csize; 108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 109 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 110 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 111 PetscCall(MatDestroy(&GE)); 112 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 113 PetscCall(MatDestroy(&GEd)); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 121 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 122 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 123 /* v = PetscAbsScalar(vals[0]); */ 124 v = 1.; 125 cvals[0] = vals[0] / v; 126 cvals[1] = vals[1] / v; 127 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 128 PetscCall(MatScale(*GKins, 1. / v)); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 134 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 135 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 136 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 137 PetscCall(MatView(GEc, viewer)); 138 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK")); 139 PetscCall(MatView(*GKins, viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 141 PetscCall(MatView(GEd, viewer)); 142 PetscCall(PetscViewerDestroy(&viewer)); 143 } 144 #endif 145 PetscCall(MatDestroy(&GEd)); 146 PetscCall(MatDestroy(&GEc)); 147 } 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *); 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 156 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 157 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 160 MPI_Comm comm; 161 IS lned, primals, allprimals, nedfieldlocal, elements_corners = NULL; 162 IS *eedges, *extrows, *extcols, *alleedges; 163 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 164 PetscScalar *vals, *work; 165 PetscReal *rwork; 166 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 167 PetscInt ne, nv, Lv, order, n, field; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 187 /* Command line customization */ 188 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 189 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 190 /* print debug info and adaptive order TODO: to be removed */ 191 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 192 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 193 PetscOptionsEnd(); 194 195 /* Return if there are no edges in the decomposition */ 196 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 197 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 198 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 199 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 200 lrc[0] = PETSC_FALSE; 201 for (i = 0; i < n; i++) { 202 if (PetscRealPart(vals[i]) > 2.) { 203 lrc[0] = PETSC_TRUE; 204 break; 205 } 206 } 207 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 208 PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPI_C_BOOL, MPI_LOR, comm)); 209 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 210 211 /* Get Nedelec field */ 212 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 213 if (pcbddc->n_ISForDofsLocal && field >= 0) { 214 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 215 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 216 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 217 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 218 ne = n; 219 nedfieldlocal = NULL; 220 global = PETSC_TRUE; 221 } else if (field == PETSC_DECIDE) { 222 PetscInt rst, ren, *idx; 223 224 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 225 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 226 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 227 for (i = rst; i < ren; i++) { 228 PetscInt nc; 229 230 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 231 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 232 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 233 } 234 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 235 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 236 PetscCall(PetscMalloc1(n, &idx)); 237 for (i = 0, ne = 0; i < n; i++) 238 if (matis->sf_leafdata[i]) idx[ne++] = i; 239 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 240 } else { 241 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 242 } 243 244 /* Sanity checks */ 245 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 246 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 247 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 248 249 /* Just set primal dofs and return */ 250 if (setprimal) { 251 IS enedfieldlocal; 252 PetscInt *eidxs; 253 254 PetscCall(PetscMalloc1(ne, &eidxs)); 255 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 256 if (nedfieldlocal) { 257 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 258 for (i = 0, cum = 0; i < ne; i++) { 259 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 260 } 261 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 262 } else { 263 for (i = 0, cum = 0; i < ne; i++) { 264 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 265 } 266 } 267 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 268 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 269 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 270 PetscCall(PetscFree(eidxs)); 271 PetscCall(ISDestroy(&nedfieldlocal)); 272 PetscCall(ISDestroy(&enedfieldlocal)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 /* Compute some l2g maps */ 277 if (nedfieldlocal) { 278 IS is; 279 280 /* need to map from the local Nedelec field to local numbering */ 281 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 282 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 283 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 284 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 285 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 286 if (global) { 287 PetscCall(PetscObjectReference((PetscObject)al2g)); 288 el2g = al2g; 289 } else { 290 IS gis; 291 292 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 293 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 294 PetscCall(ISDestroy(&gis)); 295 } 296 PetscCall(ISDestroy(&is)); 297 } else { 298 /* one ref for the destruction of al2g, one for el2g */ 299 PetscCall(PetscObjectReference((PetscObject)al2g)); 300 PetscCall(PetscObjectReference((PetscObject)al2g)); 301 el2g = al2g; 302 fl2g = NULL; 303 } 304 305 /* Start communication to drop connections for interior edges (for cc analysis only) */ 306 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 307 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 308 if (nedfieldlocal) { 309 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 310 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 311 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 312 } else { 313 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 314 } 315 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 316 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 317 318 /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting 319 Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */ 320 if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners)); 321 322 /* drop connections with interior edges to avoid unneeded communications and memory movements */ 323 PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view")); 324 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 325 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 326 if (global) { 327 PetscInt rst; 328 329 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 330 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 331 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 332 } 333 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 334 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 335 } else { 336 PetscInt *tbz; 337 338 PetscCall(PetscMalloc1(ne, &tbz)); 339 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 340 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 342 for (i = 0, cum = 0; i < ne; i++) 343 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 344 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 345 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 346 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 347 PetscCall(PetscFree(tbz)); 348 } 349 350 /* Extract subdomain relevant rows of G */ 351 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 352 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 353 PetscCall(MatAIJExtractRows(G, lned, &lGall)); 354 /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */ 355 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 356 PetscCall(ISDestroy(&lned)); 357 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 358 PetscCall(MatDestroy(&lGall)); 359 PetscCall(MatISGetLocalMat(lGis, &lG)); 360 if (matis->allow_repeated) { /* multi-element support */ 361 Mat *lGn, B; 362 IS *is_rows, *tcols, tmap, nmap; 363 PetscInt subnv; 364 const PetscInt *subvidxs; 365 ISLocalToGlobalMapping mapn; 366 367 PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn)); 368 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows)); 369 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols)); 370 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 371 if (fl2g) { 372 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i])); 373 } else { 374 PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i])); 375 is_rows[i] = pcbddc->local_subs[i]; 376 } 377 PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)])); 378 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn)); 379 PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs)); 381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i])); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs)); 383 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 384 } 385 386 /* Create new MATIS with repeated vertices */ 387 PetscCall(MatCreate(comm, &B)); 388 PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N)); 389 PetscCall(MatSetType(B, MATIS)); 390 PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE)); 391 PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap)); 392 PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap)); 393 PetscCall(ISDestroy(&tmap)); 394 PetscCall(ISGetLocalSize(nmap, &subnv)); 395 PetscCall(ISGetIndices(nmap, &subvidxs)); 396 PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap)); 397 PetscCall(ISRestoreIndices(nmap, &subvidxs)); 398 PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn)); 399 PetscCall(ISDestroy(&tmap)); 400 PetscCall(ISDestroy(&nmap)); 401 PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn)); 402 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 403 PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG)); 404 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 405 PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)])); 406 PetscCall(ISDestroy(&is_rows[i])); 407 PetscCall(ISDestroy(&tcols[i])); 408 } 409 PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG)); 410 PetscCall(PetscFree(lGn)); 411 PetscCall(PetscFree(is_rows)); 412 PetscCall(PetscFree(tcols)); 413 PetscCall(MatISSetLocalMat(B, lG)); 414 PetscCall(MatDestroy(&lG)); 415 416 PetscCall(MatDestroy(&lGis)); 417 lGis = B; 418 419 lGis->assembled = PETSC_TRUE; 420 } 421 PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view")); 422 423 /* SF for nodal dofs communications */ 424 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 425 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 426 PetscCall(PetscObjectReference((PetscObject)vl2g)); 427 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 428 PetscCall(PetscSFCreate(comm, &sfv)); 429 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 430 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 431 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 432 433 if (elements_corners) { 434 IS tmp; 435 Vec global, local; 436 Mat_IS *tGis = (Mat_IS *)lGis->data; 437 438 PetscCall(MatCreateVecs(lGis, &global, NULL)); 439 PetscCall(MatCreateVecs(tGis->A, &local, NULL)); 440 PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp)); 441 PetscCall(VecDestroy(&global)); 442 PetscCall(VecDestroy(&local)); 443 elements_corners = tmp; 444 } 445 446 /* Destroy temporary G */ 447 PetscCall(MatISGetLocalMat(lGis, &lG)); 448 PetscCall(PetscObjectReference((PetscObject)lG)); 449 PetscCall(MatDestroy(&G)); 450 PetscCall(MatDestroy(&lGis)); 451 452 if (print) { 453 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 454 PetscCall(MatView(lG, NULL)); 455 } 456 457 /* Save lG for values insertion in change of basis */ 458 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 459 460 /* Analyze the edge-nodes connections (duplicate lG) */ 461 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 462 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 463 PetscCall(PetscBTCreate(nv, &btv)); 464 PetscCall(PetscBTCreate(ne, &bte)); 465 PetscCall(PetscBTCreate(ne, &btb)); 466 PetscCall(PetscBTCreate(ne, &btbd)); 467 /* need to import the boundary specification to ensure the 468 proper detection of coarse edges' endpoints */ 469 if (pcbddc->DirichletBoundariesLocal) { 470 IS is; 471 472 if (fl2g) { 473 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 474 } else { 475 is = pcbddc->DirichletBoundariesLocal; 476 } 477 PetscCall(ISGetLocalSize(is, &cum)); 478 PetscCall(ISGetIndices(is, &idxs)); 479 for (i = 0; i < cum; i++) { 480 if (idxs[i] >= 0 && idxs[i] < ne) { 481 PetscCall(PetscBTSet(btb, idxs[i])); 482 PetscCall(PetscBTSet(btbd, idxs[i])); 483 } 484 } 485 PetscCall(ISRestoreIndices(is, &idxs)); 486 if (fl2g) PetscCall(ISDestroy(&is)); 487 } 488 if (pcbddc->NeumannBoundariesLocal) { 489 IS is; 490 491 if (fl2g) { 492 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 493 } else { 494 is = pcbddc->NeumannBoundariesLocal; 495 } 496 PetscCall(ISGetLocalSize(is, &cum)); 497 PetscCall(ISGetIndices(is, &idxs)); 498 for (i = 0; i < cum; i++) { 499 if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i])); 500 } 501 PetscCall(ISRestoreIndices(is, &idxs)); 502 if (fl2g) PetscCall(ISDestroy(&is)); 503 } 504 505 /* Count neighs per dof */ 506 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL)); 507 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL)); 508 509 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 510 for proper detection of coarse edges' endpoints */ 511 PetscCall(PetscBTCreate(ne, &btee)); 512 for (i = 0; i < ne; i++) { 513 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 514 } 515 PetscCall(PetscMalloc1(ne, &marks)); 516 if (!conforming) { 517 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 518 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 519 } 520 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 521 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 522 cum = 0; 523 for (i = 0; i < ne; i++) { 524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 525 if (!PetscBTLookup(btee, i)) { 526 marks[cum++] = i; 527 continue; 528 } 529 /* set badly connected edge dofs as primal */ 530 if (!conforming) { 531 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 532 marks[cum++] = i; 533 PetscCall(PetscBTSet(bte, i)); 534 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 535 } else { 536 /* every edge dofs should be connected through a certain number of nodal dofs 537 to other edge dofs belonging to coarse edges 538 - at most 2 endpoints 539 - order-1 interior nodal dofs 540 - no undefined nodal dofs (nconn < order) 541 */ 542 PetscInt ends = 0, ints = 0, undef = 0; 543 for (j = ii[i]; j < ii[i + 1]; j++) { 544 PetscInt v = jj[j], k; 545 PetscInt nconn = iit[v + 1] - iit[v]; 546 for (k = iit[v]; k < iit[v + 1]; k++) 547 if (!PetscBTLookup(btee, jjt[k])) nconn--; 548 if (nconn > order) ends++; 549 else if (nconn == order) ints++; 550 else undef++; 551 } 552 if (undef || ends > 2 || ints != order - 1) { 553 marks[cum++] = i; 554 PetscCall(PetscBTSet(bte, i)); 555 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i + 1] != ii[i]) { 561 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 562 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 563 } 564 } 565 PetscCall(PetscBTDestroy(&btee)); 566 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 567 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 568 if (!conforming) { 569 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 570 PetscCall(MatDestroy(&lGt)); 571 } 572 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 573 574 /* identify splitpoints and corner candidates */ 575 PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots)); 576 PetscCall(PetscBTCreate(nv, &btvcand)); 577 if (elements_corners) { 578 PetscCall(ISGetLocalSize(elements_corners, &cum)); 579 PetscCall(ISGetIndices(elements_corners, &idxs)); 580 for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i])); 581 PetscCall(ISRestoreIndices(elements_corners, &idxs)); 582 } 583 584 if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */ 585 PetscSF emlsf, vmlsf; 586 PetscInt *eleaves, *vleaves, *meleaves, *mvleaves; 587 PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl; 588 589 PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs)); 590 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 591 PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs)); 592 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 593 594 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf)); 595 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf)); 596 597 PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL)); 598 for (i = 0, j = 0; i < ne; i++) j += ecount[i]; 599 PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne); 600 PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j); 601 602 PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL)); 603 for (i = 0, j = 0; i < nv; i++) j += vcount[i]; 604 PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv); 605 PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j); 606 607 PetscCall(PetscMalloc1(ne, &eleaves)); 608 PetscCall(PetscMalloc1(nv, &vleaves)); 609 for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX; 610 for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX; 611 PetscCall(PetscMalloc1(emnl, &meleaves)); 612 PetscCall(PetscMalloc1(vmnl, &mvleaves)); 613 614 PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm)); 615 PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 616 for (i = 0; i < n_subs; i++) { 617 const PetscInt *idxs; 618 const PetscInt subid = cum_subs + i; 619 PetscInt ns; 620 621 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns)); 622 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 623 for (j = 0; j < ns; j++) { 624 const PetscInt e = idxs[j]; 625 626 eleaves[e] = subid; 627 for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid; 628 } 629 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 630 } 631 PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 632 PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 633 PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 634 PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 635 PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 636 PetscCall(PetscFree(eleaves)); 637 PetscCall(PetscFree(vleaves)); 638 639 PetscCall(PetscMalloc1(ne + 1, &eneighs)); 640 eneighs[0] = meleaves; 641 for (i = 0; i < ne; i++) { 642 PetscCall(PetscSortInt(ecount[i], eneighs[i])); 643 eneighs[i + 1] = eneighs[i] + ecount[i]; 644 } 645 PetscCall(PetscMalloc1(nv + 1, &vneighs)); 646 vneighs[0] = mvleaves; 647 for (i = 0; i < nv; i++) { 648 PetscCall(PetscSortInt(vcount[i], vneighs[i])); 649 vneighs[i + 1] = vneighs[i] + vcount[i]; 650 } 651 } else { 652 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs)); 653 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs)); 654 } 655 656 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 657 if (print) { 658 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 659 PetscCall(MatView(lGe, NULL)); 660 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 661 PetscCall(MatView(lGt, NULL)); 662 } 663 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 664 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 665 for (i = 0; i < nv; i++) { 666 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 667 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 668 if (!order) { /* variable order */ 669 PetscReal vorder = 0.; 670 671 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 672 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 673 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 674 ord = 1; 675 } 676 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 677 const PetscInt e = jj[j]; 678 679 if (PetscBTLookup(btbd, e)) { 680 bdir = PETSC_TRUE; 681 break; 682 } 683 if (vc != ecount[e]) { 684 sneighs = PETSC_FALSE; 685 } else { 686 const PetscInt *vn = vneighs[i], *en = eneighs[e]; 687 688 for (PetscInt k = 0; k < vc; k++) { 689 if (vn[k] != en[k]) { 690 sneighs = PETSC_FALSE; 691 break; 692 } 693 } 694 } 695 } 696 if (elements_corners) test = 0; 697 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 698 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 699 PetscCall(PetscBTSet(btv, i)); 700 } else if (test == ord) { 701 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 702 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 703 PetscCall(PetscBTSet(btv, i)); 704 } else if (!elements_corners) { 705 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 706 PetscCall(PetscBTSet(btvcand, i)); 707 } 708 } 709 } 710 PetscCall(PetscBTDestroy(&btbd)); 711 712 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 713 if (order != 1) { 714 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 715 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 716 for (i = 0; i < nv; i++) { 717 if (PetscBTLookup(btvcand, i)) { 718 PetscBool found = PETSC_FALSE; 719 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 720 PetscInt k, e = jj[j]; 721 if (PetscBTLookup(bte, e)) continue; 722 for (k = iit[e]; k < iit[e + 1]; k++) { 723 PetscInt v = jjt[k]; 724 if (v != i && PetscBTLookup(btvcand, v)) { 725 found = PETSC_TRUE; 726 break; 727 } 728 } 729 } 730 if (!found) { 731 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 732 PetscCall(PetscBTClear(btvcand, i)); 733 } else { 734 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 735 } 736 } 737 } 738 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 739 } 740 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 741 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 742 PetscCall(MatDestroy(&lGe)); 743 744 /* Get the local G^T explicitly */ 745 PetscCall(MatDestroy(&lGt)); 746 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 747 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 748 749 /* Mark shared nodal dofs */ 750 PetscCall(PetscBTCreate(nv, &btvi)); 751 for (i = 0; i < nv; i++) { 752 if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i)); 753 } 754 755 if (matis->allow_repeated) { 756 PetscCall(PetscFree(eneighs[0])); 757 PetscCall(PetscFree(vneighs[0])); 758 PetscCall(PetscFree(eneighs)); 759 PetscCall(PetscFree(vneighs)); 760 } 761 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 762 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 763 764 /* communicate corners and splitpoints */ 765 PetscCall(PetscMalloc1(nv, &vmarks)); 766 PetscCall(PetscArrayzero(sfvleaves, nv)); 767 PetscCall(PetscArrayzero(sfvroots, Lv)); 768 for (i = 0; i < nv; i++) 769 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 770 771 if (print) { 772 IS tbz; 773 774 cum = 0; 775 for (i = 0; i < nv; i++) 776 if (sfvleaves[i]) vmarks[cum++] = i; 777 778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 779 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 780 PetscCall(ISView(tbz, NULL)); 781 PetscCall(ISDestroy(&tbz)); 782 } 783 784 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 785 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 786 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 787 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 788 789 /* Zero rows of lGt corresponding to identified corners 790 and interior nodal dofs */ 791 cum = 0; 792 for (i = 0; i < nv; i++) { 793 if (sfvleaves[i]) { 794 vmarks[cum++] = i; 795 PetscCall(PetscBTSet(btv, i)); 796 } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 797 } 798 PetscCall(PetscBTDestroy(&btvi)); 799 if (print) { 800 IS tbz; 801 802 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 803 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 804 PetscCall(ISView(tbz, NULL)); 805 PetscCall(ISDestroy(&tbz)); 806 } 807 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 808 PetscCall(PetscFree(vmarks)); 809 PetscCall(PetscSFDestroy(&sfv)); 810 PetscCall(PetscFree2(sfvleaves, sfvroots)); 811 812 /* Recompute G */ 813 PetscCall(MatDestroy(&lG)); 814 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 815 if (print) { 816 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 817 PetscCall(MatView(lG, NULL)); 818 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 819 PetscCall(MatView(lGt, NULL)); 820 } 821 822 /* Get primal dofs (if any) */ 823 cum = 0; 824 for (i = 0; i < ne; i++) { 825 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 826 } 827 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 828 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 829 if (print) { 830 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 831 PetscCall(ISView(primals, NULL)); 832 } 833 PetscCall(PetscBTDestroy(&bte)); 834 /* TODO: what if the user passed in some of them ? */ 835 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 836 PetscCall(ISDestroy(&primals)); 837 838 /* Compute edge connectivity */ 839 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 840 841 /* Symbolic conn = lG*lGt */ 842 if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */ 843 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 844 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 845 PetscCall(MatProductSetAlgorithm(conn, "default")); 846 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 847 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 848 PetscCall(MatProductSetFromOptions(conn)); 849 PetscCall(MatProductSymbolic(conn)); 850 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 851 if (fl2g) { 852 PetscBT btf; 853 PetscInt *iia, *jja, *iiu, *jju; 854 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 855 856 /* create CSR for all local dofs */ 857 PetscCall(PetscMalloc1(n + 1, &iia)); 858 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 859 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 860 iiu = pcbddc->mat_graph->xadj; 861 jju = pcbddc->mat_graph->adjncy; 862 } else if (pcbddc->use_local_adj) { 863 rest = PETSC_TRUE; 864 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 865 } else { 866 free = PETSC_TRUE; 867 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 868 iiu[0] = 0; 869 for (i = 0; i < n; i++) { 870 iiu[i + 1] = i + 1; 871 jju[i] = -1; 872 } 873 } 874 875 /* import sizes of CSR */ 876 iia[0] = 0; 877 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 878 879 /* overwrite entries corresponding to the Nedelec field */ 880 PetscCall(PetscBTCreate(n, &btf)); 881 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 882 for (i = 0; i < ne; i++) { 883 PetscCall(PetscBTSet(btf, idxs[i])); 884 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 885 } 886 887 /* iia in CSR */ 888 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 889 890 /* jja in CSR */ 891 PetscCall(PetscMalloc1(iia[n], &jja)); 892 for (i = 0; i < n; i++) 893 if (!PetscBTLookup(btf, i)) 894 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 895 896 /* map edge dofs connectivity */ 897 if (jj) { 898 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 899 for (i = 0; i < ne; i++) { 900 PetscInt e = idxs[i]; 901 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 902 } 903 } 904 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 905 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES)); 906 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 907 if (free) PetscCall(PetscFree2(iiu, jju)); 908 PetscCall(PetscBTDestroy(&btf)); 909 } else { 910 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES)); 911 } 912 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 913 PetscCall(MatDestroy(&conn)); 914 } 915 916 /* Analyze interface for edge dofs */ 917 PetscCall(PCBDDCAnalyzeInterface(pc)); 918 pcbddc->mat_graph->twodim = PETSC_FALSE; 919 920 /* Get coarse edges in the edge space */ 921 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 922 923 if (fl2g) { 924 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 925 PetscCall(PetscMalloc1(nee, &eedges)); 926 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 927 } else { 928 eedges = alleedges; 929 primals = allprimals; 930 } 931 932 /* Mark fine edge dofs with their coarse edge id */ 933 PetscCall(PetscArrayzero(marks, ne)); 934 PetscCall(ISGetLocalSize(primals, &cum)); 935 PetscCall(ISGetIndices(primals, &idxs)); 936 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 937 PetscCall(ISRestoreIndices(primals, &idxs)); 938 if (print) { 939 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 940 PetscCall(ISView(primals, NULL)); 941 } 942 943 maxsize = 0; 944 for (i = 0; i < nee; i++) { 945 PetscInt size, mark = i + 1; 946 947 PetscCall(ISGetLocalSize(eedges[i], &size)); 948 PetscCall(ISGetIndices(eedges[i], &idxs)); 949 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 950 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 951 maxsize = PetscMax(maxsize, size); 952 } 953 954 /* Find coarse edge endpoints */ 955 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 956 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 957 for (i = 0; i < nee; i++) { 958 PetscInt mark = i + 1, size; 959 960 PetscCall(ISGetLocalSize(eedges[i], &size)); 961 if (!size && nedfieldlocal) continue; 962 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 963 PetscCall(ISGetIndices(eedges[i], &idxs)); 964 if (print) { 965 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 966 PetscCall(ISView(eedges[i], NULL)); 967 } 968 for (j = 0; j < size; j++) { 969 PetscInt k, ee = idxs[j]; 970 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 971 for (k = ii[ee]; k < ii[ee + 1]; k++) { 972 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 973 if (PetscBTLookup(btv, jj[k])) { 974 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 975 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 976 PetscInt k2; 977 PetscBool corner = PETSC_FALSE; 978 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 979 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 980 /* it's a corner if either is connected with an edge dof belonging to a different cc or 981 if the edge dof lie on the natural part of the boundary */ 982 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 983 corner = PETSC_TRUE; 984 break; 985 } 986 } 987 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 988 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 989 PetscCall(PetscBTSet(btv, jj[k])); 990 } else { 991 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 992 } 993 } 994 } 995 } 996 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 997 } 998 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 999 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1000 PetscCall(PetscBTDestroy(&btb)); 1001 1002 /* Reset marked primal dofs */ 1003 PetscCall(ISGetLocalSize(primals, &cum)); 1004 PetscCall(ISGetIndices(primals, &idxs)); 1005 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 1006 PetscCall(ISRestoreIndices(primals, &idxs)); 1007 1008 /* Now use the initial lG */ 1009 PetscCall(MatDestroy(&lG)); 1010 PetscCall(MatDestroy(&lGt)); 1011 lG = lGinit; 1012 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 1013 1014 /* Compute extended cols indices */ 1015 PetscCall(PetscBTCreate(nv, &btvc)); 1016 PetscCall(PetscBTCreate(nee, &bter)); 1017 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1018 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 1019 i *= maxsize; 1020 PetscCall(PetscCalloc1(nee, &extcols)); 1021 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 1022 eerr = PETSC_FALSE; 1023 for (i = 0; i < nee; i++) { 1024 PetscInt size, found = 0; 1025 1026 cum = 0; 1027 PetscCall(ISGetLocalSize(eedges[i], &size)); 1028 if (!size && nedfieldlocal) continue; 1029 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1030 PetscCall(ISGetIndices(eedges[i], &idxs)); 1031 PetscCall(PetscBTMemzero(nv, btvc)); 1032 for (j = 0; j < size; j++) { 1033 PetscInt k, ee = idxs[j]; 1034 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1035 PetscInt vv = jj[k]; 1036 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 1037 else if (!PetscBTLookupSet(btvc, vv)) found++; 1038 } 1039 } 1040 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1041 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1042 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1043 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1044 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1045 /* it may happen that endpoints are not defined at this point 1046 if it is the case, mark this edge for a second pass */ 1047 if (cum != size - 1 || found != 2) { 1048 PetscCall(PetscBTSet(bter, i)); 1049 if (print) { 1050 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 1051 PetscCall(ISView(eedges[i], NULL)); 1052 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 1053 PetscCall(ISView(extcols[i], NULL)); 1054 } 1055 eerr = PETSC_TRUE; 1056 } 1057 } 1058 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 1059 PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPI_C_BOOL, MPI_LOR, comm)); 1060 if (done) { 1061 PetscInt *newprimals; 1062 1063 PetscCall(PetscMalloc1(ne, &newprimals)); 1064 PetscCall(ISGetLocalSize(primals, &cum)); 1065 PetscCall(ISGetIndices(primals, &idxs)); 1066 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 1067 PetscCall(ISRestoreIndices(primals, &idxs)); 1068 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1069 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 1070 for (i = 0; i < nee; i++) { 1071 PetscBool has_candidates = PETSC_FALSE; 1072 if (PetscBTLookup(bter, i)) { 1073 PetscInt size, mark = i + 1; 1074 1075 PetscCall(ISGetLocalSize(eedges[i], &size)); 1076 PetscCall(ISGetIndices(eedges[i], &idxs)); 1077 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1078 for (j = 0; j < size; j++) { 1079 PetscInt k, ee = idxs[j]; 1080 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 1081 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1082 /* set all candidates located on the edge as corners */ 1083 if (PetscBTLookup(btvcand, jj[k])) { 1084 PetscInt k2, vv = jj[k]; 1085 has_candidates = PETSC_TRUE; 1086 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 1087 PetscCall(PetscBTSet(btv, vv)); 1088 /* set all edge dofs connected to candidate as primals */ 1089 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 1090 if (marks[jjt[k2]] == mark) { 1091 PetscInt k3, ee2 = jjt[k2]; 1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 1093 newprimals[cum++] = ee2; 1094 /* finally set the new corners */ 1095 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 1096 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 1097 PetscCall(PetscBTSet(btv, jj[k3])); 1098 } 1099 } 1100 } 1101 } else { 1102 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 1103 } 1104 } 1105 } 1106 if (!has_candidates) { /* circular edge */ 1107 PetscInt k, ee = idxs[0], *tmarks; 1108 1109 PetscCall(PetscCalloc1(ne, &tmarks)); 1110 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 1111 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1112 PetscInt k2; 1113 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 1114 PetscCall(PetscBTSet(btv, jj[k])); 1115 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 1116 } 1117 for (j = 0; j < size; j++) { 1118 if (tmarks[idxs[j]] > 1) { 1119 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 1120 newprimals[cum++] = idxs[j]; 1121 } 1122 } 1123 PetscCall(PetscFree(tmarks)); 1124 } 1125 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1126 } 1127 PetscCall(ISDestroy(&extcols[i])); 1128 } 1129 PetscCall(PetscFree(extcols)); 1130 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1131 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 1132 if (fl2g) { 1133 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 1134 PetscCall(ISDestroy(&primals)); 1135 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1136 PetscCall(PetscFree(eedges)); 1137 } 1138 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1139 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 1140 PetscCall(PetscFree(newprimals)); 1141 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 1142 PetscCall(ISDestroy(&primals)); 1143 PetscCall(PCBDDCAnalyzeInterface(pc)); 1144 pcbddc->mat_graph->twodim = PETSC_FALSE; 1145 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1146 if (fl2g) { 1147 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 1148 PetscCall(PetscMalloc1(nee, &eedges)); 1149 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 1150 } else { 1151 eedges = alleedges; 1152 primals = allprimals; 1153 } 1154 PetscCall(PetscCalloc1(nee, &extcols)); 1155 1156 /* Mark again */ 1157 PetscCall(PetscArrayzero(marks, ne)); 1158 for (i = 0; i < nee; i++) { 1159 PetscInt size, mark = i + 1; 1160 1161 PetscCall(ISGetLocalSize(eedges[i], &size)); 1162 PetscCall(ISGetIndices(eedges[i], &idxs)); 1163 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1164 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1165 } 1166 if (print) { 1167 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1168 PetscCall(ISView(primals, NULL)); 1169 } 1170 1171 /* Recompute extended cols */ 1172 eerr = PETSC_FALSE; 1173 for (i = 0; i < nee; i++) { 1174 PetscInt size; 1175 1176 cum = 0; 1177 PetscCall(ISGetLocalSize(eedges[i], &size)); 1178 if (!size && nedfieldlocal) continue; 1179 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1180 PetscCall(ISGetIndices(eedges[i], &idxs)); 1181 for (j = 0; j < size; j++) { 1182 PetscInt k, ee = idxs[j]; 1183 for (k = ii[ee]; k < ii[ee + 1]; k++) 1184 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1185 } 1186 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1187 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1188 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1189 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1190 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1191 if (cum != size - 1) { 1192 if (print) { 1193 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1194 PetscCall(ISView(eedges[i], NULL)); 1195 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1196 PetscCall(ISView(extcols[i], NULL)); 1197 } 1198 eerr = PETSC_TRUE; 1199 } 1200 } 1201 } 1202 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1203 PetscCall(PetscFree2(extrow, gidxs)); 1204 PetscCall(PetscBTDestroy(&bter)); 1205 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1206 /* an error should not occur at this point */ 1207 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1208 1209 /* Check the number of endpoints */ 1210 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1211 PetscCall(PetscMalloc1(2 * nee, &corners)); 1212 PetscCall(PetscMalloc1(nee, &cedges)); 1213 for (i = 0; i < nee; i++) { 1214 PetscInt size, found = 0, gc[2]; 1215 1216 /* init with defaults */ 1217 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1218 PetscCall(ISGetLocalSize(eedges[i], &size)); 1219 if (!size && nedfieldlocal) continue; 1220 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1221 PetscCall(ISGetIndices(eedges[i], &idxs)); 1222 PetscCall(PetscBTMemzero(nv, btvc)); 1223 for (j = 0; j < size; j++) { 1224 PetscInt k, ee = idxs[j]; 1225 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1226 PetscInt vv = jj[k]; 1227 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1228 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1229 corners[i * 2 + found++] = vv; 1230 } 1231 } 1232 } 1233 if (found != 2) { 1234 PetscInt e; 1235 if (fl2g) { 1236 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1237 } else { 1238 e = idxs[0]; 1239 } 1240 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1241 } 1242 1243 /* get primal dof index on this coarse edge */ 1244 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1245 if (gc[0] > gc[1]) { 1246 PetscInt swap = corners[2 * i]; 1247 corners[2 * i] = corners[2 * i + 1]; 1248 corners[2 * i + 1] = swap; 1249 } 1250 cedges[i] = idxs[size - 1]; 1251 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1252 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1253 } 1254 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1255 PetscCall(PetscBTDestroy(&btvc)); 1256 1257 if (PetscDefined(USE_DEBUG)) { 1258 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1259 not interfere with neighbouring coarse edges */ 1260 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1261 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1262 for (i = 0; i < nv; i++) { 1263 PetscInt emax = 0, eemax = 0; 1264 1265 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1266 PetscCall(PetscArrayzero(emarks, nee + 1)); 1267 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1268 for (j = 1; j < nee + 1; j++) { 1269 if (emax < emarks[j]) { 1270 emax = emarks[j]; 1271 eemax = j; 1272 } 1273 } 1274 /* not relevant for edges */ 1275 if (!eemax) continue; 1276 1277 for (j = ii[i]; j < ii[i + 1]; j++) { 1278 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1279 } 1280 } 1281 PetscCall(PetscFree(emarks)); 1282 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1283 } 1284 1285 /* Compute extended rows indices for edge blocks of the change of basis */ 1286 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1287 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1288 extmem *= maxsize; 1289 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1290 PetscCall(PetscMalloc1(nee, &extrows)); 1291 PetscCall(PetscCalloc1(nee, &extrowcum)); 1292 for (i = 0; i < nv; i++) { 1293 PetscInt mark = 0, size, start; 1294 1295 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1296 for (j = ii[i]; j < ii[i + 1]; j++) 1297 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1298 1299 /* not relevant */ 1300 if (!mark) continue; 1301 1302 /* import extended row */ 1303 mark--; 1304 start = mark * extmem + extrowcum[mark]; 1305 size = ii[i + 1] - ii[i]; 1306 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1307 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1308 extrowcum[mark] += size; 1309 } 1310 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1311 PetscCall(MatDestroy(&lGt)); 1312 PetscCall(PetscFree(marks)); 1313 1314 /* Compress extrows */ 1315 cum = 0; 1316 for (i = 0; i < nee; i++) { 1317 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1318 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1320 cum = PetscMax(cum, size); 1321 } 1322 PetscCall(PetscFree(extrowcum)); 1323 PetscCall(PetscBTDestroy(&btv)); 1324 PetscCall(PetscBTDestroy(&btvcand)); 1325 1326 /* Workspace for lapack inner calls and VecSetValues */ 1327 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1328 1329 /* Create change of basis matrix (no preallocation) */ 1330 PetscCall(MatCreate(comm, &T)); 1331 PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap)); 1332 PetscCall(MatSetType(T, MATAIJ)); 1333 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1334 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1335 PetscCall(MatSetOption(T, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 1336 //PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL)); 1337 //PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL)); 1338 //PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1339 1340 /* Defaults to identity */ 1341 { 1342 Vec w; 1343 const PetscScalar *wa; 1344 1345 PetscCall(MatCreateVecs(T, &w, NULL)); 1346 PetscCall(VecSetLocalToGlobalMapping(w, al2g)); 1347 PetscCall(VecSet(w, 1.0)); 1348 for (i = 0; i < nee; i++) { 1349 const PetscInt *idxs; 1350 PetscInt nl; 1351 1352 PetscCall(ISGetLocalSize(eedges[i], &nl)); 1353 PetscCall(ISGetIndices(eedges[i], &idxs)); 1354 PetscCall(VecSetValuesLocal(w, nl, idxs, NULL, INSERT_VALUES)); 1355 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1356 } 1357 PetscCall(VecAssemblyBegin(w)); 1358 PetscCall(VecAssemblyEnd(w)); 1359 PetscCall(VecGetArrayRead(w, &wa)); 1360 for (i = T->rmap->rstart; i < T->rmap->rend; i++) 1361 if (PetscAbsScalar(wa[i - T->rmap->rstart])) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES)); 1362 PetscCall(VecRestoreArrayRead(w, &wa)); 1363 PetscCall(VecDestroy(&w)); 1364 } 1365 1366 /* Create discrete gradient for the coarser level if needed */ 1367 PetscCall(MatDestroy(&pcbddc->nedcG)); 1368 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1369 if (pcbddc->current_level < pcbddc->max_levels) { 1370 ISLocalToGlobalMapping cel2g, cvl2g; 1371 IS wis, gwis; 1372 PetscInt cnv, cne; 1373 1374 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1375 if (fl2g) { 1376 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1377 } else { 1378 PetscCall(PetscObjectReference((PetscObject)wis)); 1379 pcbddc->nedclocal = wis; 1380 } 1381 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1382 PetscCall(ISDestroy(&wis)); 1383 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1384 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1385 PetscCall(ISDestroy(&wis)); 1386 PetscCall(ISDestroy(&gwis)); 1387 1388 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1389 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1390 PetscCall(ISDestroy(&wis)); 1391 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1392 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1393 PetscCall(ISDestroy(&wis)); 1394 PetscCall(ISDestroy(&gwis)); 1395 1396 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1397 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1398 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1399 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1400 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1401 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1402 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1403 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1404 } 1405 1406 MatNullSpace nnsp; 1407 PetscBool nnsp_has_const = PETSC_FALSE; 1408 const Vec *nnsp_vecs = NULL; 1409 PetscInt nnsp_nvecs = 0; 1410 VecScatter nnsp_vscat = NULL; 1411 PetscCall(MatGetNullSpace(pcbddc->discretegradient, &nnsp)); 1412 if (nnsp) PetscCall(MatNullSpaceGetVecs(nnsp, &nnsp_has_const, &nnsp_nvecs, &nnsp_vecs)); 1413 if (nnsp_has_const || nnsp_nvecs) { /* create scatter to import edge constraints */ 1414 IS allextcols, gallextcols, galleedges, is_E_to_zero; 1415 Vec E, V; 1416 PetscInt *eedgesidxs; 1417 const PetscScalar *evals; 1418 1419 PetscCall(MatCreateVecs(pc->pmat, &E, NULL)); 1420 PetscCall(MatCreateVecs(pcbddc->discretegradient, &V, NULL)); 1421 PetscCall(ISConcatenate(PETSC_COMM_SELF, nee, extcols, &allextcols)); 1422 cum = 0; 1423 for (i = 0; i < nee; i++) { 1424 PetscInt j; 1425 1426 PetscCall(ISGetLocalSize(eedges[i], &j)); 1427 PetscCheck(j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Zero sized edge %" PetscInt_FMT, i); 1428 cum += j - 1; 1429 } 1430 PetscCall(PetscMalloc1(PetscMax(cum, pc->pmat->rmap->n), &eedgesidxs)); 1431 cum = 0; 1432 for (i = 0; i < nee; i++) { 1433 const PetscInt *idxs; 1434 PetscInt j; 1435 1436 PetscCall(ISGetLocalSize(eedges[i], &j)); 1437 PetscCall(ISGetIndices(eedges[i], &idxs)); 1438 PetscCall(PetscArraycpy(eedgesidxs + cum, idxs, j - 1)); /* last on the edge is primal */ 1439 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1440 cum += j - 1; 1441 } 1442 PetscCall(ISLocalToGlobalMappingApply(al2g, cum, eedgesidxs, eedgesidxs)); 1443 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_USE_POINTER, &galleedges)); 1444 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, allextcols, &gallextcols)); 1445 PetscCall(VecScatterCreate(V, gallextcols, E, galleedges, &nnsp_vscat)); 1446 PetscCall(ISDestroy(&allextcols)); 1447 PetscCall(ISDestroy(&gallextcols)); 1448 PetscCall(ISDestroy(&galleedges)); 1449 1450 /* identify dofs we must zero if importing user-defined near nullspace from pmat */ 1451 PetscCall(VecSet(E, 1.0)); 1452 PetscCall(VecSetValues(E, cum, eedgesidxs, NULL, INSERT_VALUES)); 1453 PetscCall(VecAssemblyBegin(E)); 1454 PetscCall(VecAssemblyEnd(E)); 1455 PetscCall(VecGetArrayRead(E, &evals)); 1456 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) 1457 if (evals[i] == 0.0) eedgesidxs[cum++] = i + pc->pmat->rmap->rstart; 1458 PetscCall(VecRestoreArrayRead(E, &evals)); 1459 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_COPY_VALUES, &is_E_to_zero)); 1460 PetscCall(PetscFree(eedgesidxs)); 1461 1462 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject)V)); 1463 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject)E)); 1464 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_zero", (PetscObject)is_E_to_zero)); 1465 PetscCall(ISDestroy(&is_E_to_zero)); 1466 PetscCall(VecDestroy(&V)); 1467 PetscCall(VecDestroy(&E)); 1468 } 1469 #if defined(PRINT_GDET) 1470 inc = 0; 1471 lev = pcbddc->current_level; 1472 #endif 1473 1474 /* Insert values in the change of basis matrix */ 1475 for (i = 0; i < nee; i++) { 1476 Mat Gins = NULL, GKins = NULL; 1477 IS cornersis = NULL; 1478 PetscScalar cvals[2]; 1479 1480 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1481 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1482 if (Gins && GKins) { 1483 const PetscScalar *data; 1484 const PetscInt *rows, *cols; 1485 PetscInt nrh, nch, nrc, ncc; 1486 1487 PetscCall(ISGetIndices(eedges[i], &cols)); 1488 /* H1 */ 1489 PetscCall(ISGetIndices(extrows[i], &rows)); 1490 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1491 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1492 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1493 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1494 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1495 /* complement */ 1496 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1497 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1498 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1499 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1500 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1501 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1502 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1503 1504 /* coarse discrete gradient */ 1505 if (pcbddc->nedcG) { 1506 PetscInt cols[2]; 1507 1508 cols[0] = 2 * i; 1509 cols[1] = 2 * i + 1; 1510 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1511 } 1512 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1513 } 1514 PetscCall(ISDestroy(&extrows[i])); 1515 PetscCall(ISDestroy(&extcols[i])); 1516 PetscCall(ISDestroy(&cornersis)); 1517 PetscCall(MatDestroy(&Gins)); 1518 PetscCall(MatDestroy(&GKins)); 1519 } 1520 1521 /* import edge constraints */ 1522 if (nnsp_vscat) { 1523 Vec V, E, *quadvecs; 1524 PetscInt nvecs, nvecs_orth; 1525 MatNullSpace onnsp = NULL; 1526 PetscBool onnsp_has_const = PETSC_FALSE; 1527 const Vec *onnsp_vecs = NULL; 1528 PetscInt onnsp_nvecs = 0, new_nnsp_nvecs, old_nnsp_nvecs; 1529 IS is_E_to_zero; 1530 1531 /* import nearnullspace from preconditioning matrix if user-defined */ 1532 PetscCall(MatGetNearNullSpace(pc->pmat, &onnsp)); 1533 if (onnsp) { 1534 PetscBool isinternal; 1535 1536 PetscCall(PetscStrcmp("_internal_BDDC_nedelec_nnsp", ((PetscObject)onnsp)->name, &isinternal)); 1537 if (!isinternal) PetscCall(MatNullSpaceGetVecs(onnsp, &onnsp_has_const, &onnsp_nvecs, &onnsp_vecs)); 1538 } 1539 new_nnsp_nvecs = nnsp_nvecs + (nnsp_has_const ? 1 : 0); 1540 old_nnsp_nvecs = onnsp_nvecs + (onnsp_has_const ? 1 : 0); 1541 nvecs = old_nnsp_nvecs + new_nnsp_nvecs; 1542 PetscCall(PetscMalloc1(nvecs, &quadvecs)); 1543 1544 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject *)&V)); 1545 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject *)&E)); 1546 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_zero", (PetscObject *)&is_E_to_zero)); 1547 for (i = 0; i < nvecs; i++) PetscCall(VecDuplicate(E, &quadvecs[i])); 1548 cum = 0; 1549 if (nnsp_has_const) { 1550 PetscCall(VecSet(V, 1.0)); 1551 PetscCall(VecScatterBegin(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD)); 1552 PetscCall(VecScatterEnd(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD)); 1553 cum = 1; 1554 } 1555 for (i = 0; i < nnsp_nvecs; i++) { 1556 PetscCall(VecScatterBegin(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD)); 1557 PetscCall(VecScatterEnd(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD)); 1558 } 1559 1560 /* Now add old nnsp if present */ 1561 cum = 0; 1562 if (onnsp_has_const) { 1563 PetscCall(VecSet(quadvecs[new_nnsp_nvecs], 1.0)); 1564 PetscCall(VecISSet(quadvecs[new_nnsp_nvecs], is_E_to_zero, 0)); 1565 cum = 1; 1566 } 1567 for (i = 0; i < onnsp_nvecs; i++) { 1568 PetscCall(VecCopy(onnsp_vecs[i], quadvecs[i + cum + new_nnsp_nvecs])); 1569 PetscCall(VecISSet(quadvecs[i + cum + new_nnsp_nvecs], is_E_to_zero, 0)); 1570 } 1571 nvecs_orth = nvecs; 1572 PetscCall(PCBDDCOrthonormalizeVecs(&nvecs_orth, quadvecs)); 1573 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, nvecs_orth, quadvecs, &nnsp)); 1574 for (i = 0; i < nvecs; i++) PetscCall(VecDestroy(&quadvecs[i])); 1575 PetscCall(PetscFree(quadvecs)); 1576 PetscCall(PetscObjectSetName((PetscObject)nnsp, "_internal_BDDC_nedelec_nnsp")); 1577 PetscCall(MatSetNearNullSpace(pc->pmat, nnsp)); 1578 PetscCall(MatNullSpaceDestroy(&nnsp)); 1579 } 1580 PetscCall(VecScatterDestroy(&nnsp_vscat)); 1581 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1582 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1583 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1584 1585 /* Start assembling */ 1586 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1587 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1588 1589 /* Free */ 1590 if (fl2g) { 1591 PetscCall(ISDestroy(&primals)); 1592 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1593 PetscCall(PetscFree(eedges)); 1594 } 1595 1596 /* hack mat_graph with primal dofs on the coarse edges */ 1597 { 1598 PCBDDCGraph graph = pcbddc->mat_graph; 1599 PetscInt *oqueue = graph->queue; 1600 PetscInt *ocptr = graph->cptr; 1601 PetscInt ncc, *idxs; 1602 1603 /* find first primal edge */ 1604 if (pcbddc->nedclocal) { 1605 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1606 } else { 1607 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1608 idxs = cedges; 1609 } 1610 cum = 0; 1611 while (cum < nee && cedges[cum] < 0) cum++; 1612 1613 /* adapt connected components */ 1614 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1615 graph->cptr[0] = 0; 1616 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1617 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1618 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1619 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1620 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1621 ncc++; 1622 lc--; 1623 cum++; 1624 while (cum < nee && cedges[cum] < 0) cum++; 1625 } 1626 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1627 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1628 ncc++; 1629 } 1630 graph->ncc = ncc; 1631 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1632 PetscCall(PetscFree2(ocptr, oqueue)); 1633 } 1634 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1635 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1636 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1637 1638 PetscCall(ISDestroy(&nedfieldlocal)); 1639 PetscCall(PetscFree(extrow)); 1640 PetscCall(PetscFree2(work, rwork)); 1641 PetscCall(PetscFree(corners)); 1642 PetscCall(PetscFree(cedges)); 1643 PetscCall(PetscFree(extrows)); 1644 PetscCall(PetscFree(extcols)); 1645 PetscCall(MatDestroy(&lG)); 1646 1647 /* Complete assembling */ 1648 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view")); 1650 if (pcbddc->nedcG) { 1651 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1652 PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view")); 1653 } 1654 1655 PetscCall(ISDestroy(&elements_corners)); 1656 1657 /* set change of basis */ 1658 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE)); 1659 PetscCall(MatDestroy(&T)); 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 /* the near-null space of BDDC carries information on quadrature weights, 1664 and these can be collinear -> so cheat with MatNullSpaceCreate 1665 and create a suitable set of basis vectors first */ 1666 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1667 { 1668 PetscInt i; 1669 1670 PetscFunctionBegin; 1671 for (i = 0; i < nvecs; i++) { 1672 PetscInt first, last; 1673 1674 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1675 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1676 if (i >= first && i < last) { 1677 PetscScalar *data; 1678 PetscCall(VecGetArray(quad_vecs[i], &data)); 1679 if (!has_const) { 1680 data[i - first] = 1.; 1681 } else { 1682 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1683 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1684 } 1685 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1686 } 1687 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1688 } 1689 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1690 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1691 PetscInt first, last; 1692 PetscCall(VecLockReadPop(quad_vecs[i])); 1693 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1694 if (i >= first && i < last) { 1695 PetscScalar *data; 1696 PetscCall(VecGetArray(quad_vecs[i], &data)); 1697 if (!has_const) { 1698 data[i - first] = 0.; 1699 } else { 1700 data[2 * i - first] = 0.; 1701 data[2 * i - first + 1] = 0.; 1702 } 1703 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1704 } 1705 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1706 PetscCall(VecLockReadPush(quad_vecs[i])); 1707 } 1708 PetscFunctionReturn(PETSC_SUCCESS); 1709 } 1710 1711 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1712 { 1713 Mat loc_divudotp; 1714 Vec p, v, quad_vec; 1715 ISLocalToGlobalMapping map; 1716 PetscScalar *array; 1717 1718 PetscFunctionBegin; 1719 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1720 if (!transpose) { 1721 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1722 } else { 1723 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1724 } 1725 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp)); 1726 PetscCall(VecLockReadPop(quad_vec)); 1727 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1728 1729 /* compute local quad vec */ 1730 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1731 if (!transpose) { 1732 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1733 } else { 1734 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1735 } 1736 /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */ 1737 PetscCall(VecSet(p, 1.)); 1738 if (!transpose) { 1739 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1740 } else { 1741 PetscCall(MatMult(loc_divudotp, p, v)); 1742 } 1743 PetscCall(VecDestroy(&p)); 1744 if (vl2l) { 1745 Mat lA; 1746 VecScatter sc; 1747 Vec vins; 1748 1749 PetscCall(MatISGetLocalMat(A, &lA)); 1750 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1751 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1752 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1753 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1754 PetscCall(VecScatterDestroy(&sc)); 1755 PetscCall(VecDestroy(&v)); 1756 v = vins; 1757 } 1758 1759 /* mask summation of interface values */ 1760 PetscInt n, *mmask, *mask, *idxs, nmr, nr; 1761 const PetscInt *degree; 1762 PetscSF msf; 1763 1764 PetscCall(VecGetLocalSize(v, &n)); 1765 PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL)); 1766 PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf)); 1767 PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL)); 1768 PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs)); 1769 PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree)); 1770 PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree)); 1771 for (PetscInt i = 0, c = 0; i < nr; i++) { 1772 mmask[c] = 1; 1773 c += degree[i]; 1774 } 1775 PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1776 PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1777 PetscCall(VecGetArray(v, &array)); 1778 for (PetscInt i = 0; i < n; i++) { 1779 array[i] *= mask[i]; 1780 idxs[i] = i; 1781 } 1782 PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES)); 1783 PetscCall(VecRestoreArray(v, &array)); 1784 PetscCall(PetscFree3(mmask, mask, idxs)); 1785 PetscCall(VecDestroy(&v)); 1786 PetscCall(VecAssemblyBegin(quad_vec)); 1787 PetscCall(VecAssemblyEnd(quad_vec)); 1788 PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view")); 1789 PetscCall(VecLockReadPush(quad_vec)); 1790 PetscCall(VecDestroy(&quad_vec)); 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1795 { 1796 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1797 1798 PetscFunctionBegin; 1799 if (primalv) { 1800 if (pcbddc->user_primal_vertices_local) { 1801 IS list[2], newp; 1802 1803 list[0] = primalv; 1804 list[1] = pcbddc->user_primal_vertices_local; 1805 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1806 PetscCall(ISSortRemoveDups(newp)); 1807 PetscCall(ISDestroy(&list[1])); 1808 pcbddc->user_primal_vertices_local = newp; 1809 } else { 1810 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1811 } 1812 } 1813 PetscFunctionReturn(PETSC_SUCCESS); 1814 } 1815 1816 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1817 { 1818 PetscInt f, *comp = (PetscInt *)ctx; 1819 1820 PetscFunctionBegin; 1821 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1822 PetscFunctionReturn(PETSC_SUCCESS); 1823 } 1824 1825 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1826 { 1827 Vec local, global; 1828 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1829 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1830 PetscBool monolithic = PETSC_FALSE; 1831 1832 PetscFunctionBegin; 1833 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1834 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1835 PetscOptionsEnd(); 1836 /* need to convert from global to local topology information and remove references to information in global ordering */ 1837 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1838 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1839 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1840 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1841 if (monolithic) { /* just get block size to properly compute vertices */ 1842 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1843 goto boundary; 1844 } 1845 1846 if (pcbddc->user_provided_isfordofs) { 1847 if (pcbddc->n_ISForDofs) { 1848 PetscInt i; 1849 1850 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1851 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1852 PetscInt bs; 1853 1854 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1855 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1856 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1857 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1858 } 1859 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1860 pcbddc->n_ISForDofs = 0; 1861 PetscCall(PetscFree(pcbddc->ISForDofs)); 1862 } 1863 } else { 1864 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1865 DM dm; 1866 1867 PetscCall(MatGetDM(pc->pmat, &dm)); 1868 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1869 if (dm) { 1870 IS *fields; 1871 PetscInt nf, i; 1872 1873 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1874 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1875 for (i = 0; i < nf; i++) { 1876 PetscInt bs; 1877 1878 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1879 PetscCall(ISGetBlockSize(fields[i], &bs)); 1880 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1881 PetscCall(ISDestroy(&fields[i])); 1882 } 1883 PetscCall(PetscFree(fields)); 1884 pcbddc->n_ISForDofsLocal = nf; 1885 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1886 PetscContainer c; 1887 1888 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1889 if (c) { 1890 MatISLocalFields lf; 1891 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1892 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1893 } else { /* fallback, create the default fields if bs > 1 */ 1894 PetscInt i, n = matis->A->rmap->n; 1895 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1896 if (i > 1) { 1897 pcbddc->n_ISForDofsLocal = i; 1898 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1899 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1900 } 1901 } 1902 } 1903 } else { 1904 PetscInt i; 1905 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1906 } 1907 } 1908 1909 boundary: 1910 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1911 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1912 } else if (pcbddc->DirichletBoundariesLocal) { 1913 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1914 } 1915 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1916 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1917 } else if (pcbddc->NeumannBoundariesLocal) { 1918 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1919 } 1920 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1921 PetscCall(VecDestroy(&global)); 1922 PetscCall(VecDestroy(&local)); 1923 /* detect local disconnected subdomains if requested or needed */ 1924 if (pcbddc->detect_disconnected || matis->allow_repeated) { 1925 IS primalv = NULL; 1926 PetscInt nel; 1927 PetscBool filter = pcbddc->detect_disconnected_filter; 1928 1929 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1930 PetscCall(PetscFree(pcbddc->local_subs)); 1931 PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL)); 1932 if (matis->allow_repeated && nel) { 1933 const PetscInt *elsizes; 1934 1935 pcbddc->n_local_subs = nel; 1936 PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes)); 1937 PetscCall(PetscMalloc1(nel, &pcbddc->local_subs)); 1938 for (PetscInt i = 0, c = 0; i < nel; i++) { 1939 PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i])); 1940 c += elsizes[i]; 1941 } 1942 } else { 1943 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1944 } 1945 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1946 PetscCall(ISDestroy(&primalv)); 1947 } 1948 /* early stage corner detection */ 1949 { 1950 DM dm; 1951 1952 PetscCall(MatGetDM(pc->pmat, &dm)); 1953 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1954 if (dm) { 1955 PetscBool isda; 1956 1957 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1958 if (isda) { 1959 ISLocalToGlobalMapping l2l; 1960 IS corners; 1961 Mat lA; 1962 PetscBool gl, lo; 1963 1964 { 1965 Vec cvec; 1966 const PetscScalar *coords; 1967 PetscInt dof, n, cdim; 1968 PetscBool memc = PETSC_TRUE; 1969 1970 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1971 PetscCall(DMGetCoordinates(dm, &cvec)); 1972 PetscCall(VecGetLocalSize(cvec, &n)); 1973 PetscCall(VecGetBlockSize(cvec, &cdim)); 1974 n /= cdim; 1975 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1976 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1977 PetscCall(VecGetArrayRead(cvec, &coords)); 1978 #if defined(PETSC_USE_COMPLEX) 1979 memc = PETSC_FALSE; 1980 #endif 1981 if (dof != 1) memc = PETSC_FALSE; 1982 if (memc) { 1983 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1984 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1985 PetscReal *bcoords = pcbddc->mat_graph->coords; 1986 PetscInt i, b, d; 1987 1988 for (i = 0; i < n; i++) { 1989 for (b = 0; b < dof; b++) { 1990 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1991 } 1992 } 1993 } 1994 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1995 pcbddc->mat_graph->cdim = cdim; 1996 pcbddc->mat_graph->cnloc = dof * n; 1997 pcbddc->mat_graph->cloc = PETSC_FALSE; 1998 } 1999 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 2000 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 2001 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 2002 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 2003 lo = (PetscBool)(l2l && corners); 2004 PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2005 if (gl) { /* From PETSc's DMDA */ 2006 const PetscInt *idx; 2007 PetscInt dof, bs, *idxout, n; 2008 2009 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 2010 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 2011 PetscCall(ISGetLocalSize(corners, &n)); 2012 PetscCall(ISGetIndices(corners, &idx)); 2013 if (bs == dof) { 2014 PetscCall(PetscMalloc1(n, &idxout)); 2015 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 2016 } else { /* the original DMDA local-to-local map have been modified */ 2017 PetscInt i, d; 2018 2019 PetscCall(PetscMalloc1(dof * n, &idxout)); 2020 for (i = 0; i < n; i++) 2021 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 2022 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 2023 2024 bs = 1; 2025 n *= dof; 2026 } 2027 PetscCall(ISRestoreIndices(corners, &idx)); 2028 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 2029 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 2030 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 2031 PetscCall(ISDestroy(&corners)); 2032 pcbddc->corner_selected = PETSC_TRUE; 2033 pcbddc->corner_selection = PETSC_TRUE; 2034 } 2035 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 2036 } 2037 } 2038 } 2039 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 2040 DM dm; 2041 2042 PetscCall(MatGetDM(pc->pmat, &dm)); 2043 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2044 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 2045 Vec vcoords; 2046 PetscSection section; 2047 PetscReal *coords; 2048 PetscInt d, cdim, nl, nf, **ctxs; 2049 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 2050 /* debug coordinates */ 2051 PetscViewer viewer; 2052 PetscBool flg; 2053 PetscViewerFormat format; 2054 const char *prefix; 2055 2056 PetscCall(DMGetCoordinateDim(dm, &cdim)); 2057 PetscCall(DMGetLocalSection(dm, §ion)); 2058 PetscCall(PetscSectionGetNumFields(section, &nf)); 2059 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 2060 PetscCall(VecGetLocalSize(vcoords, &nl)); 2061 PetscCall(PetscMalloc1(nl * cdim, &coords)); 2062 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 2063 PetscCall(PetscMalloc1(nf, &ctxs[0])); 2064 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 2065 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 2066 2067 /* debug coordinates */ 2068 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 2069 PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 2070 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 2071 for (d = 0; d < cdim; d++) { 2072 PetscInt i; 2073 const PetscScalar *v; 2074 char name[16]; 2075 2076 for (i = 0; i < nf; i++) ctxs[i][0] = d; 2077 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d)); 2078 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 2079 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 2080 if (flg) PetscCall(VecView(vcoords, viewer)); 2081 PetscCall(VecGetArrayRead(vcoords, &v)); 2082 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 2083 PetscCall(VecRestoreArrayRead(vcoords, &v)); 2084 } 2085 PetscCall(VecDestroy(&vcoords)); 2086 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 2087 PetscCall(PetscFree(coords)); 2088 PetscCall(PetscFree(ctxs[0])); 2089 PetscCall(PetscFree2(funcs, ctxs)); 2090 if (flg) { 2091 PetscCall(PetscViewerPopFormat(viewer)); 2092 PetscCall(PetscViewerDestroy(&viewer)); 2093 } 2094 } 2095 } 2096 PetscFunctionReturn(PETSC_SUCCESS); 2097 } 2098 2099 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 2100 { 2101 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2102 IS nis; 2103 const PetscInt *idxs; 2104 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 2105 2106 PetscFunctionBegin; 2107 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 2108 if (mop == MPI_LAND) { 2109 /* init rootdata with true */ 2110 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 2111 } else { 2112 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 2113 } 2114 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 2115 PetscCall(ISGetLocalSize(*is, &nd)); 2116 PetscCall(ISGetIndices(*is, &idxs)); 2117 for (i = 0; i < nd; i++) 2118 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 2119 PetscCall(ISRestoreIndices(*is, &idxs)); 2120 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2121 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2122 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2123 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2124 if (mop == MPI_LAND) { 2125 PetscCall(PetscMalloc1(nd, &nidxs)); 2126 } else { 2127 PetscCall(PetscMalloc1(n, &nidxs)); 2128 } 2129 for (i = 0, nnd = 0; i < n; i++) 2130 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 2131 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 2132 PetscCall(ISDestroy(is)); 2133 *is = nis; 2134 PetscFunctionReturn(PETSC_SUCCESS); 2135 } 2136 2137 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 2138 { 2139 PC_IS *pcis = (PC_IS *)pc->data; 2140 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2141 2142 PetscFunctionBegin; 2143 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 2144 if (pcbddc->ChangeOfBasisMatrix) { 2145 Vec swap; 2146 2147 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 2148 swap = pcbddc->work_change; 2149 pcbddc->work_change = r; 2150 r = swap; 2151 } 2152 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2153 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2154 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2155 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 2156 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2157 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 2158 PetscCall(VecSet(z, 0.)); 2159 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2160 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2161 if (pcbddc->ChangeOfBasisMatrix) { 2162 pcbddc->work_change = r; 2163 PetscCall(VecCopy(z, pcbddc->work_change)); 2164 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 2165 } 2166 PetscFunctionReturn(PETSC_SUCCESS); 2167 } 2168 2169 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 2170 { 2171 PCBDDCBenignMatMult_ctx ctx; 2172 PetscBool apply_right, apply_left, reset_x; 2173 2174 PetscFunctionBegin; 2175 PetscCall(MatShellGetContext(A, &ctx)); 2176 if (transpose) { 2177 apply_right = ctx->apply_left; 2178 apply_left = ctx->apply_right; 2179 } else { 2180 apply_right = ctx->apply_right; 2181 apply_left = ctx->apply_left; 2182 } 2183 reset_x = PETSC_FALSE; 2184 if (apply_right) { 2185 const PetscScalar *ax; 2186 PetscInt nl, i; 2187 2188 PetscCall(VecGetLocalSize(x, &nl)); 2189 PetscCall(VecGetArrayRead(x, &ax)); 2190 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 2191 PetscCall(VecRestoreArrayRead(x, &ax)); 2192 for (i = 0; i < ctx->benign_n; i++) { 2193 PetscScalar sum, val; 2194 const PetscInt *idxs; 2195 PetscInt nz, j; 2196 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2197 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2198 sum = 0.; 2199 if (ctx->apply_p0) { 2200 val = ctx->work[idxs[nz - 1]]; 2201 for (j = 0; j < nz - 1; j++) { 2202 sum += ctx->work[idxs[j]]; 2203 ctx->work[idxs[j]] += val; 2204 } 2205 } else { 2206 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 2207 } 2208 ctx->work[idxs[nz - 1]] -= sum; 2209 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2210 } 2211 PetscCall(VecPlaceArray(x, ctx->work)); 2212 reset_x = PETSC_TRUE; 2213 } 2214 if (transpose) { 2215 PetscCall(MatMultTranspose(ctx->A, x, y)); 2216 } else { 2217 PetscCall(MatMult(ctx->A, x, y)); 2218 } 2219 if (reset_x) PetscCall(VecResetArray(x)); 2220 if (apply_left) { 2221 PetscScalar *ay; 2222 PetscInt i; 2223 2224 PetscCall(VecGetArray(y, &ay)); 2225 for (i = 0; i < ctx->benign_n; i++) { 2226 PetscScalar sum, val; 2227 const PetscInt *idxs; 2228 PetscInt nz, j; 2229 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2230 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2231 val = -ay[idxs[nz - 1]]; 2232 if (ctx->apply_p0) { 2233 sum = 0.; 2234 for (j = 0; j < nz - 1; j++) { 2235 sum += ay[idxs[j]]; 2236 ay[idxs[j]] += val; 2237 } 2238 ay[idxs[nz - 1]] += sum; 2239 } else { 2240 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 2241 ay[idxs[nz - 1]] = 0.; 2242 } 2243 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2244 } 2245 PetscCall(VecRestoreArray(y, &ay)); 2246 } 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } 2249 2250 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2251 { 2252 PetscFunctionBegin; 2253 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 2254 PetscFunctionReturn(PETSC_SUCCESS); 2255 } 2256 2257 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2258 { 2259 PetscFunctionBegin; 2260 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 2261 PetscFunctionReturn(PETSC_SUCCESS); 2262 } 2263 2264 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2265 { 2266 PC_IS *pcis = (PC_IS *)pc->data; 2267 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2268 PCBDDCBenignMatMult_ctx ctx; 2269 2270 PetscFunctionBegin; 2271 if (!restore) { 2272 Mat A_IB, A_BI; 2273 PetscScalar *work; 2274 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2275 2276 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 2277 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 2278 PetscCall(PetscMalloc1(pcis->n, &work)); 2279 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 2280 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 2281 PetscCall(MatSetType(A_IB, MATSHELL)); 2282 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (PetscErrorCodeFn *)PCBDDCBenignMatMult_Private)); 2283 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)PCBDDCBenignMatMultTranspose_Private)); 2284 PetscCall(PetscNew(&ctx)); 2285 PetscCall(MatShellSetContext(A_IB, ctx)); 2286 ctx->apply_left = PETSC_TRUE; 2287 ctx->apply_right = PETSC_FALSE; 2288 ctx->apply_p0 = PETSC_FALSE; 2289 ctx->benign_n = pcbddc->benign_n; 2290 if (reuse) { 2291 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2292 ctx->free = PETSC_FALSE; 2293 } else { /* TODO: could be optimized for successive solves */ 2294 ISLocalToGlobalMapping N_to_D; 2295 PetscInt i; 2296 2297 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 2298 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 2299 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 2300 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2301 ctx->free = PETSC_TRUE; 2302 } 2303 ctx->A = pcis->A_IB; 2304 ctx->work = work; 2305 PetscCall(MatSetUp(A_IB)); 2306 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2307 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2308 pcis->A_IB = A_IB; 2309 2310 /* A_BI as A_IB^T */ 2311 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2312 pcbddc->benign_original_mat = pcis->A_BI; 2313 pcis->A_BI = A_BI; 2314 } else { 2315 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2316 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2317 PetscCall(MatDestroy(&pcis->A_IB)); 2318 pcis->A_IB = ctx->A; 2319 ctx->A = NULL; 2320 PetscCall(MatDestroy(&pcis->A_BI)); 2321 pcis->A_BI = pcbddc->benign_original_mat; 2322 pcbddc->benign_original_mat = NULL; 2323 if (ctx->free) { 2324 PetscInt i; 2325 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2326 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2327 } 2328 PetscCall(PetscFree(ctx->work)); 2329 PetscCall(PetscFree(ctx)); 2330 } 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 /* used just in bddc debug mode */ 2335 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2336 { 2337 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2338 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2339 Mat An; 2340 2341 PetscFunctionBegin; 2342 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2343 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2344 if (is1) { 2345 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2346 PetscCall(MatDestroy(&An)); 2347 } else { 2348 *B = An; 2349 } 2350 PetscFunctionReturn(PETSC_SUCCESS); 2351 } 2352 2353 /* TODO: add reuse flag */ 2354 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2355 { 2356 Mat Bt; 2357 PetscScalar *a, *bdata; 2358 const PetscInt *ii, *ij; 2359 PetscInt m, n, i, nnz, *bii, *bij; 2360 PetscBool flg_row; 2361 2362 PetscFunctionBegin; 2363 PetscCall(MatGetSize(A, &n, &m)); 2364 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2365 PetscCall(MatSeqAIJGetArray(A, &a)); 2366 nnz = n; 2367 for (i = 0; i < ii[n]; i++) { 2368 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2369 } 2370 PetscCall(PetscMalloc1(n + 1, &bii)); 2371 PetscCall(PetscMalloc1(nnz, &bij)); 2372 PetscCall(PetscMalloc1(nnz, &bdata)); 2373 nnz = 0; 2374 bii[0] = 0; 2375 for (i = 0; i < n; i++) { 2376 PetscInt j; 2377 for (j = ii[i]; j < ii[i + 1]; j++) { 2378 PetscScalar entry = a[j]; 2379 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2380 bij[nnz] = ij[j]; 2381 bdata[nnz] = entry; 2382 nnz++; 2383 } 2384 } 2385 bii[i + 1] = nnz; 2386 } 2387 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2388 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2389 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2390 { 2391 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data; 2392 b->free_a = PETSC_TRUE; 2393 b->free_ij = PETSC_TRUE; 2394 } 2395 if (*B == A) PetscCall(MatDestroy(&A)); 2396 *B = Bt; 2397 PetscFunctionReturn(PETSC_SUCCESS); 2398 } 2399 2400 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2401 { 2402 Mat B = NULL; 2403 DM dm; 2404 IS is_dummy, *cc_n; 2405 ISLocalToGlobalMapping l2gmap_dummy; 2406 PCBDDCGraph graph; 2407 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2408 PetscInt i, n; 2409 PetscInt *xadj, *adjncy; 2410 PetscBool isplex = PETSC_FALSE; 2411 2412 PetscFunctionBegin; 2413 if (ncc) *ncc = 0; 2414 if (cc) *cc = NULL; 2415 if (primalv) *primalv = NULL; 2416 PetscCall(PCBDDCGraphCreate(&graph)); 2417 PetscCall(MatGetDM(pc->pmat, &dm)); 2418 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2419 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2420 if (filter) isplex = PETSC_FALSE; 2421 2422 if (isplex) { /* this code has been modified from plexpartition.c */ 2423 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2424 PetscInt *adj = NULL; 2425 IS cellNumbering; 2426 const PetscInt *cellNum; 2427 PetscBool useCone, useClosure; 2428 PetscSection section; 2429 PetscSegBuffer adjBuffer; 2430 PetscSF sfPoint; 2431 2432 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2433 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2434 PetscCall(DMGetPointSF(dm, &sfPoint)); 2435 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2436 /* Build adjacency graph via a section/segbuffer */ 2437 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2438 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2439 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2440 /* Always use FVM adjacency to create partitioner graph */ 2441 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2442 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2443 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2444 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2445 for (n = 0, p = pStart; p < pEnd; p++) { 2446 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2447 if (nroots > 0) { 2448 if (cellNum[p] < 0) continue; 2449 } 2450 adjSize = PETSC_DETERMINE; 2451 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2452 for (a = 0; a < adjSize; ++a) { 2453 const PetscInt point = adj[a]; 2454 if (pStart <= point && point < pEnd) { 2455 PetscInt *PETSC_RESTRICT pBuf; 2456 PetscCall(PetscSectionAddDof(section, p, 1)); 2457 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2458 *pBuf = point; 2459 } 2460 } 2461 n++; 2462 } 2463 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2464 /* Derive CSR graph from section/segbuffer */ 2465 PetscCall(PetscSectionSetUp(section)); 2466 PetscCall(PetscSectionGetStorageSize(section, &size)); 2467 PetscCall(PetscMalloc1(n + 1, &xadj)); 2468 for (idx = 0, p = pStart; p < pEnd; p++) { 2469 if (nroots > 0) { 2470 if (cellNum[p] < 0) continue; 2471 } 2472 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++])); 2473 } 2474 xadj[n] = size; 2475 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2476 /* Clean up */ 2477 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2478 PetscCall(PetscSectionDestroy(§ion)); 2479 PetscCall(PetscFree(adj)); 2480 graph->xadj = xadj; 2481 graph->adjncy = adjncy; 2482 } else { 2483 Mat A; 2484 PetscBool isseqaij, flg_row; 2485 2486 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2487 if (!A->rmap->N || !A->cmap->N) { 2488 PetscCall(PCBDDCGraphDestroy(&graph)); 2489 PetscFunctionReturn(PETSC_SUCCESS); 2490 } 2491 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2492 if (!isseqaij && filter) { 2493 PetscBool isseqdense; 2494 2495 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2496 if (!isseqdense) { 2497 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2498 } else { /* TODO: rectangular case and LDA */ 2499 PetscScalar *array; 2500 PetscReal chop = 1.e-6; 2501 2502 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2503 PetscCall(MatDenseGetArray(B, &array)); 2504 PetscCall(MatGetSize(B, &n, NULL)); 2505 for (i = 0; i < n; i++) { 2506 PetscInt j; 2507 for (j = i + 1; j < n; j++) { 2508 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2509 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2510 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2511 } 2512 } 2513 PetscCall(MatDenseRestoreArray(B, &array)); 2514 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2515 } 2516 } else { 2517 PetscCall(PetscObjectReference((PetscObject)A)); 2518 B = A; 2519 } 2520 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2521 2522 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2523 if (filter) { 2524 PetscScalar *data; 2525 PetscInt j, cum; 2526 2527 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2528 PetscCall(MatSeqAIJGetArray(B, &data)); 2529 cum = 0; 2530 for (i = 0; i < n; i++) { 2531 PetscInt t; 2532 2533 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2534 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2535 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2536 } 2537 t = xadj_filtered[i]; 2538 xadj_filtered[i] = cum; 2539 cum += t; 2540 } 2541 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2542 graph->xadj = xadj_filtered; 2543 graph->adjncy = adjncy_filtered; 2544 } else { 2545 graph->xadj = xadj; 2546 graph->adjncy = adjncy; 2547 } 2548 } 2549 /* compute local connected components using PCBDDCGraph */ 2550 graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */ 2551 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2552 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2553 PetscCall(ISDestroy(&is_dummy)); 2554 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX)); 2555 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2556 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2557 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2558 2559 /* partial clean up */ 2560 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2561 if (B) { 2562 PetscBool flg_row; 2563 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2564 PetscCall(MatDestroy(&B)); 2565 } 2566 if (isplex) { 2567 PetscCall(PetscFree(xadj)); 2568 PetscCall(PetscFree(adjncy)); 2569 } 2570 2571 /* get back data */ 2572 if (isplex) { 2573 if (ncc) *ncc = graph->ncc; 2574 if (cc || primalv) { 2575 Mat A; 2576 PetscBT btv, btvt, btvc; 2577 PetscSection subSection; 2578 PetscInt *ids, cum, cump, *cids, *pids; 2579 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2580 2581 PetscCall(DMGetDimension(dm, &dim)); 2582 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2583 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2584 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2585 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2586 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2587 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2588 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2589 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2590 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2591 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2592 2593 /* First see if we find corners for the subdomains, i.e. a vertex 2594 shared by at least dim subdomain boundary faces. This does not 2595 cover all the possible cases with simplices but it is enough 2596 for tensor cells */ 2597 if (vStart != fStart && dim <= 3) { 2598 for (PetscInt c = cStart; c < cEnd; c++) { 2599 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2600 const PetscInt *faces; 2601 2602 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2603 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2604 PetscCall(DMPlexGetCone(dm, c, &faces)); 2605 for (PetscInt f = 0; f < nf; f++) { 2606 PetscInt nc, ff; 2607 2608 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2609 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2610 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2611 } 2612 if (cnt >= mcnt) { 2613 PetscInt size, *closure = NULL; 2614 2615 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2616 for (PetscInt k = 0; k < 2 * size; k += 2) { 2617 PetscInt v = closure[k]; 2618 if (v >= vStart && v < vEnd) { 2619 PetscInt vsize, *vclosure = NULL; 2620 2621 cnt = 0; 2622 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2623 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2624 PetscInt f = vclosure[vk]; 2625 if (f >= fStart && f < fEnd) { 2626 PetscInt nc, ff; 2627 PetscBool valid = PETSC_FALSE; 2628 2629 for (PetscInt fk = 0; fk < nf; fk++) 2630 if (f == cfaces[fk]) valid = PETSC_TRUE; 2631 if (!valid) continue; 2632 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2633 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2634 if (nc == 1 && f == ff) cnt++; 2635 } 2636 } 2637 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2638 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2639 } 2640 } 2641 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2642 } 2643 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2644 } 2645 } 2646 2647 cids[0] = 0; 2648 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2649 PetscInt j; 2650 2651 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2652 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2653 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2654 2655 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2656 for (k = 0; k < 2 * size; k += 2) { 2657 PetscInt s, pp, p = closure[k], off, dof, cdof; 2658 2659 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2660 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2661 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2662 for (s = 0; s < dof - cdof; s++) { 2663 if (PetscBTLookupSet(btvt, off + s)) continue; 2664 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2665 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2666 else pids[cump++] = off + s; /* cross-vertex */ 2667 } 2668 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2669 if (pp != p) { 2670 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2671 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2672 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2673 for (s = 0; s < dof - cdof; s++) { 2674 if (PetscBTLookupSet(btvt, off + s)) continue; 2675 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2676 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2677 else pids[cump++] = off + s; /* cross-vertex */ 2678 } 2679 } 2680 } 2681 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2682 } 2683 cids[i + 1] = cum; 2684 /* mark dofs as already assigned */ 2685 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2686 } 2687 if (cc) { 2688 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2689 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2690 *cc = cc_n; 2691 } 2692 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2693 PetscCall(PetscFree3(ids, cids, pids)); 2694 PetscCall(PetscBTDestroy(&btv)); 2695 PetscCall(PetscBTDestroy(&btvt)); 2696 PetscCall(PetscBTDestroy(&btvc)); 2697 PetscCall(DMDestroy(&dm)); 2698 } 2699 } else { 2700 if (ncc) *ncc = graph->ncc; 2701 if (cc) { 2702 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2703 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2704 *cc = cc_n; 2705 } 2706 } 2707 /* clean up graph */ 2708 graph->xadj = NULL; 2709 graph->adjncy = NULL; 2710 PetscCall(PCBDDCGraphDestroy(&graph)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2715 { 2716 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2717 PC_IS *pcis = (PC_IS *)pc->data; 2718 IS dirIS = NULL; 2719 PetscInt i; 2720 2721 PetscFunctionBegin; 2722 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2723 if (zerodiag) { 2724 Mat A; 2725 Vec vec3_N; 2726 PetscScalar *vals; 2727 const PetscInt *idxs; 2728 PetscInt nz, *count; 2729 2730 /* p0 */ 2731 PetscCall(VecSet(pcis->vec1_N, 0.)); 2732 PetscCall(PetscMalloc1(pcis->n, &vals)); 2733 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2734 PetscCall(ISGetIndices(zerodiag, &idxs)); 2735 for (i = 0; i < nz; i++) vals[i] = 1.; 2736 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2737 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2738 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2739 /* v_I */ 2740 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2741 for (i = 0; i < nz; i++) vals[i] = 0.; 2742 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2743 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2744 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2745 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2746 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2747 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2748 if (dirIS) { 2749 PetscInt n; 2750 2751 PetscCall(ISGetLocalSize(dirIS, &n)); 2752 PetscCall(ISGetIndices(dirIS, &idxs)); 2753 for (i = 0; i < n; i++) vals[i] = 0.; 2754 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2755 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2756 } 2757 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2758 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2759 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2760 PetscCall(VecSet(vec3_N, 0.)); 2761 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2762 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2763 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2764 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2765 PetscCall(PetscFree(vals)); 2766 PetscCall(VecDestroy(&vec3_N)); 2767 2768 /* there should not be any pressure dofs lying on the interface */ 2769 PetscCall(PetscCalloc1(pcis->n, &count)); 2770 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2771 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2772 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2773 PetscCall(ISGetIndices(zerodiag, &idxs)); 2774 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2775 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2776 PetscCall(PetscFree(count)); 2777 } 2778 PetscCall(ISDestroy(&dirIS)); 2779 2780 /* check PCBDDCBenignGetOrSetP0 */ 2781 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2782 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2783 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2784 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2785 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2786 for (i = 0; i < pcbddc->benign_n; i++) { 2787 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2788 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2789 } 2790 PetscFunctionReturn(PETSC_SUCCESS); 2791 } 2792 2793 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2794 { 2795 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2796 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2797 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2798 PetscInt nz, n, benign_n, bsp = 1; 2799 PetscInt *interior_dofs, n_interior_dofs, nneu; 2800 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2801 2802 PetscFunctionBegin; 2803 if (reuse) goto project_b0; 2804 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2805 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2806 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2807 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2808 has_null_pressures = PETSC_TRUE; 2809 have_null = PETSC_TRUE; 2810 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2811 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2812 Checks if all the pressure dofs in each subdomain have a zero diagonal 2813 If not, a change of basis on pressures is not needed 2814 since the local Schur complements are already SPD 2815 */ 2816 if (pcbddc->n_ISForDofsLocal) { 2817 IS iP = NULL; 2818 PetscInt p, *pp; 2819 PetscBool flg, blocked = PETSC_FALSE; 2820 2821 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2822 n = pcbddc->n_ISForDofsLocal; 2823 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2824 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2825 PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL)); 2826 PetscOptionsEnd(); 2827 if (!flg) { 2828 n = 1; 2829 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2830 } 2831 2832 bsp = 0; 2833 for (p = 0; p < n; p++) { 2834 PetscInt bs = 1; 2835 2836 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2837 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2838 bsp += bs; 2839 } 2840 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2841 bsp = 0; 2842 for (p = 0; p < n; p++) { 2843 const PetscInt *idxs; 2844 PetscInt b, bs = 1, npl, *bidxs; 2845 2846 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2847 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2848 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2849 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2850 for (b = 0; b < bs; b++) { 2851 PetscInt i; 2852 2853 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2854 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2855 bsp++; 2856 } 2857 PetscCall(PetscFree(bidxs)); 2858 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2859 } 2860 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2861 2862 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2863 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2864 if (iP) { 2865 IS newpressures; 2866 2867 PetscCall(ISDifference(pressures, iP, &newpressures)); 2868 PetscCall(ISDestroy(&pressures)); 2869 pressures = newpressures; 2870 } 2871 PetscCall(ISSorted(pressures, &sorted)); 2872 if (!sorted) PetscCall(ISSort(pressures)); 2873 PetscCall(PetscFree(pp)); 2874 } 2875 2876 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2877 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2878 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2879 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2880 PetscCall(ISSorted(zerodiag, &sorted)); 2881 if (!sorted) PetscCall(ISSort(zerodiag)); 2882 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2883 zerodiag_save = zerodiag; 2884 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2885 if (!nz) { 2886 if (n) have_null = PETSC_FALSE; 2887 has_null_pressures = PETSC_FALSE; 2888 PetscCall(ISDestroy(&zerodiag)); 2889 } 2890 recompute_zerodiag = PETSC_FALSE; 2891 2892 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2893 zerodiag_subs = NULL; 2894 benign_n = 0; 2895 n_interior_dofs = 0; 2896 interior_dofs = NULL; 2897 nneu = 0; 2898 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2899 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2900 if (checkb) { /* need to compute interior nodes */ 2901 PetscInt n, i; 2902 PetscInt *count; 2903 ISLocalToGlobalMapping mapping; 2904 2905 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL)); 2906 PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL)); 2907 PetscCall(PetscMalloc1(n, &interior_dofs)); 2908 for (i = 0; i < n; i++) 2909 if (count[i] < 2) interior_dofs[n_interior_dofs++] = i; 2910 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL)); 2911 } 2912 if (has_null_pressures) { 2913 IS *subs; 2914 PetscInt nsubs, i, j, nl; 2915 const PetscInt *idxs; 2916 PetscScalar *array; 2917 Vec *work; 2918 2919 subs = pcbddc->local_subs; 2920 nsubs = pcbddc->n_local_subs; 2921 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2922 if (checkb) { 2923 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2924 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2925 PetscCall(ISGetIndices(zerodiag, &idxs)); 2926 /* work[0] = 1_p */ 2927 PetscCall(VecSet(work[0], 0.)); 2928 PetscCall(VecGetArray(work[0], &array)); 2929 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2930 PetscCall(VecRestoreArray(work[0], &array)); 2931 /* work[0] = 1_v */ 2932 PetscCall(VecSet(work[1], 1.)); 2933 PetscCall(VecGetArray(work[1], &array)); 2934 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2935 PetscCall(VecRestoreArray(work[1], &array)); 2936 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2937 } 2938 2939 if (nsubs > 1 || bsp > 1) { 2940 IS *is; 2941 PetscInt b, totb; 2942 2943 totb = bsp; 2944 is = bsp > 1 ? bzerodiag : &zerodiag; 2945 nsubs = PetscMax(nsubs, 1); 2946 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2947 for (b = 0; b < totb; b++) { 2948 for (i = 0; i < nsubs; i++) { 2949 ISLocalToGlobalMapping l2g; 2950 IS t_zerodiag_subs; 2951 PetscInt nl; 2952 2953 if (subs) { 2954 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2955 } else { 2956 IS tis; 2957 2958 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2959 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2960 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2961 PetscCall(ISDestroy(&tis)); 2962 } 2963 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2964 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2965 if (nl) { 2966 PetscBool valid = PETSC_TRUE; 2967 2968 if (checkb) { 2969 PetscCall(VecSet(matis->x, 0)); 2970 PetscCall(ISGetLocalSize(subs[i], &nl)); 2971 PetscCall(ISGetIndices(subs[i], &idxs)); 2972 PetscCall(VecGetArray(matis->x, &array)); 2973 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2974 PetscCall(VecRestoreArray(matis->x, &array)); 2975 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2976 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2977 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2978 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2979 PetscCall(VecGetArray(matis->y, &array)); 2980 for (j = 0; j < n_interior_dofs; j++) { 2981 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2982 valid = PETSC_FALSE; 2983 break; 2984 } 2985 } 2986 PetscCall(VecRestoreArray(matis->y, &array)); 2987 } 2988 if (valid && nneu) { 2989 const PetscInt *idxs; 2990 PetscInt nzb; 2991 2992 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2993 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2994 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2995 if (nzb) valid = PETSC_FALSE; 2996 } 2997 if (valid && pressures) { 2998 IS t_pressure_subs, tmp; 2999 PetscInt i1, i2; 3000 3001 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 3002 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 3003 PetscCall(ISGetLocalSize(tmp, &i1)); 3004 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 3005 if (i2 != i1) valid = PETSC_FALSE; 3006 PetscCall(ISDestroy(&t_pressure_subs)); 3007 PetscCall(ISDestroy(&tmp)); 3008 } 3009 if (valid) { 3010 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 3011 benign_n++; 3012 } else recompute_zerodiag = PETSC_TRUE; 3013 } 3014 PetscCall(ISDestroy(&t_zerodiag_subs)); 3015 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 3016 } 3017 } 3018 } else { /* there's just one subdomain (or zero if they have not been detected */ 3019 PetscBool valid = PETSC_TRUE; 3020 3021 if (nneu) valid = PETSC_FALSE; 3022 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 3023 if (valid && checkb) { 3024 PetscCall(MatMult(matis->A, work[0], matis->x)); 3025 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 3026 PetscCall(VecGetArray(matis->x, &array)); 3027 for (j = 0; j < n_interior_dofs; j++) { 3028 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 3029 valid = PETSC_FALSE; 3030 break; 3031 } 3032 } 3033 PetscCall(VecRestoreArray(matis->x, &array)); 3034 } 3035 if (valid) { 3036 benign_n = 1; 3037 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 3038 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 3039 zerodiag_subs[0] = zerodiag; 3040 } 3041 } 3042 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 3043 } 3044 PetscCall(PetscFree(interior_dofs)); 3045 3046 if (!benign_n) { 3047 PetscInt n; 3048 3049 PetscCall(ISDestroy(&zerodiag)); 3050 recompute_zerodiag = PETSC_FALSE; 3051 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3052 if (n) have_null = PETSC_FALSE; 3053 } 3054 3055 /* final check for null pressures */ 3056 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 3057 3058 if (recompute_zerodiag) { 3059 PetscCall(ISDestroy(&zerodiag)); 3060 if (benign_n == 1) { 3061 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 3062 zerodiag = zerodiag_subs[0]; 3063 } else { 3064 PetscInt i, nzn, *new_idxs; 3065 3066 nzn = 0; 3067 for (i = 0; i < benign_n; i++) { 3068 PetscInt ns; 3069 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 3070 nzn += ns; 3071 } 3072 PetscCall(PetscMalloc1(nzn, &new_idxs)); 3073 nzn = 0; 3074 for (i = 0; i < benign_n; i++) { 3075 PetscInt ns, *idxs; 3076 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 3077 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 3078 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 3079 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 3080 nzn += ns; 3081 } 3082 PetscCall(PetscSortInt(nzn, new_idxs)); 3083 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 3084 } 3085 have_null = PETSC_FALSE; 3086 } 3087 3088 /* determines if the coarse solver will be singular or not */ 3089 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 3090 3091 /* Prepare matrix to compute no-net-flux */ 3092 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 3093 Mat A, loc_divudotp; 3094 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 3095 IS row, col, isused = NULL; 3096 PetscInt M, N, n, st, n_isused; 3097 3098 if (pressures) { 3099 isused = pressures; 3100 } else { 3101 isused = zerodiag_save; 3102 } 3103 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 3104 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 3105 PetscCall(MatGetLocalSize(A, &n, NULL)); 3106 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 3107 n_isused = 0; 3108 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 3109 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 3110 st = st - n_isused; 3111 if (n) { 3112 const PetscInt *gidxs; 3113 3114 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 3115 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 3116 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 3117 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3118 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 3119 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 3120 } else { 3121 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 3122 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3123 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 3124 } 3125 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 3126 PetscCall(ISGetSize(row, &M)); 3127 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 3128 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 3129 PetscCall(ISDestroy(&row)); 3130 PetscCall(ISDestroy(&col)); 3131 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 3132 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 3133 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3134 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 3135 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 3136 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 3137 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 3138 PetscCall(MatDestroy(&loc_divudotp)); 3139 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3140 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3141 } 3142 PetscCall(ISDestroy(&zerodiag_save)); 3143 PetscCall(ISDestroy(&pressures)); 3144 if (bzerodiag) { 3145 PetscInt i; 3146 3147 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 3148 PetscCall(PetscFree(bzerodiag)); 3149 } 3150 pcbddc->benign_n = benign_n; 3151 pcbddc->benign_zerodiag_subs = zerodiag_subs; 3152 3153 /* determines if the problem has subdomains with 0 pressure block */ 3154 have_null = (PetscBool)(!!pcbddc->benign_n); 3155 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 3156 3157 project_b0: 3158 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3159 /* change of basis and p0 dofs */ 3160 if (pcbddc->benign_n) { 3161 PetscInt i, s, *nnz; 3162 3163 /* local change of basis for pressures */ 3164 PetscCall(MatDestroy(&pcbddc->benign_change)); 3165 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 3166 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 3167 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 3168 PetscCall(PetscMalloc1(n, &nnz)); 3169 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 3170 for (i = 0; i < pcbddc->benign_n; i++) { 3171 const PetscInt *idxs; 3172 PetscInt nzs, j; 3173 3174 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 3175 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3176 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 3177 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 3178 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3179 } 3180 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 3181 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3182 PetscCall(PetscFree(nnz)); 3183 /* set identity by default */ 3184 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 3185 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3186 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 3187 /* set change on pressures */ 3188 for (s = 0; s < pcbddc->benign_n; s++) { 3189 PetscScalar *array; 3190 const PetscInt *idxs; 3191 PetscInt nzs; 3192 3193 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 3194 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3195 for (i = 0; i < nzs - 1; i++) { 3196 PetscScalar vals[2]; 3197 PetscInt cols[2]; 3198 3199 cols[0] = idxs[i]; 3200 cols[1] = idxs[nzs - 1]; 3201 vals[0] = 1.; 3202 vals[1] = 1.; 3203 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 3204 } 3205 PetscCall(PetscMalloc1(nzs, &array)); 3206 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 3207 array[nzs - 1] = 1.; 3208 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 3209 /* store local idxs for p0 */ 3210 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 3211 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3212 PetscCall(PetscFree(array)); 3213 } 3214 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3215 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3216 3217 /* project if needed */ 3218 if (pcbddc->benign_change_explicit) { 3219 Mat M; 3220 3221 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 3222 PetscCall(MatDestroy(&pcbddc->local_mat)); 3223 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 3224 PetscCall(MatDestroy(&M)); 3225 } 3226 /* store global idxs for p0 */ 3227 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 3228 } 3229 *zerodiaglocal = zerodiag; 3230 PetscFunctionReturn(PETSC_SUCCESS); 3231 } 3232 3233 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3234 { 3235 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3236 PetscScalar *array; 3237 3238 PetscFunctionBegin; 3239 if (!pcbddc->benign_sf) { 3240 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 3241 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 3242 } 3243 if (get) { 3244 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 3245 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3246 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3247 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 3248 } else { 3249 PetscCall(VecGetArray(v, &array)); 3250 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3251 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3252 PetscCall(VecRestoreArray(v, &array)); 3253 } 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3258 { 3259 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3260 3261 PetscFunctionBegin; 3262 /* TODO: add error checking 3263 - avoid nested pop (or push) calls. 3264 - cannot push before pop. 3265 - cannot call this if pcbddc->local_mat is NULL 3266 */ 3267 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 3268 if (pop) { 3269 if (pcbddc->benign_change_explicit) { 3270 IS is_p0; 3271 MatReuse reuse; 3272 3273 /* extract B_0 */ 3274 reuse = MAT_INITIAL_MATRIX; 3275 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 3276 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 3277 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 3278 /* remove rows and cols from local problem */ 3279 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 3280 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 3281 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 3282 PetscCall(ISDestroy(&is_p0)); 3283 } else { 3284 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 3285 PetscScalar *vals; 3286 PetscInt i, n, *idxs_ins; 3287 3288 PetscCall(VecGetLocalSize(matis->y, &n)); 3289 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 3290 if (!pcbddc->benign_B0) { 3291 PetscInt *nnz; 3292 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 3293 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 3294 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 3295 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 3296 for (i = 0; i < pcbddc->benign_n; i++) { 3297 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 3298 nnz[i] = n - nnz[i]; 3299 } 3300 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 3301 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3302 PetscCall(PetscFree(nnz)); 3303 } 3304 3305 for (i = 0; i < pcbddc->benign_n; i++) { 3306 PetscScalar *array; 3307 PetscInt *idxs, j, nz, cum; 3308 3309 PetscCall(VecSet(matis->x, 0.)); 3310 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3311 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3312 for (j = 0; j < nz; j++) vals[j] = 1.; 3313 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3314 PetscCall(VecAssemblyBegin(matis->x)); 3315 PetscCall(VecAssemblyEnd(matis->x)); 3316 PetscCall(VecSet(matis->y, 0.)); 3317 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3318 PetscCall(VecGetArray(matis->y, &array)); 3319 cum = 0; 3320 for (j = 0; j < n; j++) { 3321 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3322 vals[cum] = array[j]; 3323 idxs_ins[cum] = j; 3324 cum++; 3325 } 3326 } 3327 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3328 PetscCall(VecRestoreArray(matis->y, &array)); 3329 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3330 } 3331 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3332 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3333 PetscCall(PetscFree2(idxs_ins, vals)); 3334 } 3335 } else { /* push */ 3336 3337 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3338 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3339 PetscScalar *B0_vals; 3340 PetscInt *B0_cols, B0_ncol; 3341 3342 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3343 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3344 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3345 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3346 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3347 } 3348 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3349 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3350 } 3351 PetscFunctionReturn(PETSC_SUCCESS); 3352 } 3353 3354 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3355 { 3356 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3357 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3358 PetscBLASInt B_neigs, B_ierr, B_lwork; 3359 PetscBLASInt *B_iwork, *B_ifail; 3360 PetscScalar *work, lwork; 3361 PetscScalar *St, *S, *eigv; 3362 PetscScalar *Sarray, *Starray; 3363 PetscReal *eigs, thresh, lthresh, uthresh; 3364 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3365 PetscBool allocated_S_St, upart; 3366 #if defined(PETSC_USE_COMPLEX) 3367 PetscReal *rwork; 3368 #endif 3369 3370 PetscFunctionBegin; 3371 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3372 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3373 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3374 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3375 sub_schurs->is_posdef); 3376 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3377 3378 if (pcbddc->dbg_flag) { 3379 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3380 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3381 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3382 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3383 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3384 } 3385 3386 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3387 3388 /* max size of subsets */ 3389 mss = 0; 3390 for (i = 0; i < sub_schurs->n_subs; i++) { 3391 PetscInt subset_size; 3392 3393 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3394 mss = PetscMax(mss, subset_size); 3395 } 3396 3397 /* min/max and threshold */ 3398 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3399 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3400 nmax = PetscMax(nmin, nmax); 3401 allocated_S_St = PETSC_FALSE; 3402 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3403 allocated_S_St = PETSC_TRUE; 3404 } 3405 3406 /* allocate lapack workspace */ 3407 cum = cum2 = 0; 3408 maxneigs = 0; 3409 for (i = 0; i < sub_schurs->n_subs; i++) { 3410 PetscInt n, subset_size; 3411 3412 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3413 n = PetscMin(subset_size, nmax); 3414 cum += subset_size; 3415 cum2 += subset_size * n; 3416 maxneigs = PetscMax(maxneigs, n); 3417 } 3418 lwork = 0; 3419 if (mss) { 3420 PetscScalar sdummy = 0.; 3421 PetscBLASInt B_itype = 1; 3422 PetscBLASInt B_N, idummy = 0; 3423 PetscReal rdummy = 0., zero = 0.0; 3424 PetscReal eps = 0.0; /* dlamch? */ 3425 3426 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3427 PetscCall(PetscBLASIntCast(mss, &B_N)); 3428 B_lwork = -1; 3429 /* some implementations may complain about NULL pointers, even if we are querying */ 3430 S = &sdummy; 3431 St = &sdummy; 3432 eigs = &rdummy; 3433 eigv = &sdummy; 3434 B_iwork = &idummy; 3435 B_ifail = &idummy; 3436 #if defined(PETSC_USE_COMPLEX) 3437 rwork = &rdummy; 3438 #endif 3439 thresh = 1.0; 3440 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3441 #if defined(PETSC_USE_COMPLEX) 3442 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3443 #else 3444 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3445 #endif 3446 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr); 3447 PetscCall(PetscFPTrapPop()); 3448 } 3449 3450 nv = 0; 3451 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3452 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3453 } 3454 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3455 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3456 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3457 #if defined(PETSC_USE_COMPLEX) 3458 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3459 #endif 3460 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3461 &pcbddc->adaptive_constraints_data)); 3462 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3463 3464 maxneigs = 0; 3465 cum = cumarray = 0; 3466 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3467 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3468 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3469 const PetscInt *idxs; 3470 3471 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3472 for (cum = 0; cum < nv; cum++) { 3473 pcbddc->adaptive_constraints_n[cum] = 1; 3474 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3475 pcbddc->adaptive_constraints_data[cum] = 1.0; 3476 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3477 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3478 } 3479 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3480 } 3481 3482 if (mss) { /* multilevel */ 3483 if (sub_schurs->gdsw) { 3484 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3485 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3486 } else { 3487 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3488 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3489 } 3490 } 3491 3492 lthresh = pcbddc->adaptive_threshold[0]; 3493 uthresh = pcbddc->adaptive_threshold[1]; 3494 upart = pcbddc->use_deluxe_scaling; 3495 for (i = 0; i < sub_schurs->n_subs; i++) { 3496 const PetscInt *idxs; 3497 PetscReal upper, lower; 3498 PetscInt j, subset_size, eigs_start = 0; 3499 PetscBLASInt B_N; 3500 PetscBool same_data = PETSC_FALSE; 3501 PetscBool scal = PETSC_FALSE; 3502 3503 if (upart) { 3504 upper = PETSC_MAX_REAL; 3505 lower = uthresh; 3506 } else { 3507 if (sub_schurs->gdsw) { 3508 upper = uthresh; 3509 lower = PETSC_MIN_REAL; 3510 } else { 3511 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3512 upper = 1. / uthresh; 3513 lower = 0.; 3514 } 3515 } 3516 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3517 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3518 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3519 /* this is experimental: we assume the dofs have been properly grouped to have 3520 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3521 if (!sub_schurs->is_posdef) { 3522 Mat T; 3523 3524 for (j = 0; j < subset_size; j++) { 3525 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3526 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3527 PetscCall(MatScale(T, -1.0)); 3528 PetscCall(MatDestroy(&T)); 3529 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3530 PetscCall(MatScale(T, -1.0)); 3531 PetscCall(MatDestroy(&T)); 3532 if (sub_schurs->change_primal_sub) { 3533 PetscInt nz, k; 3534 const PetscInt *idxs; 3535 3536 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3537 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3538 for (k = 0; k < nz; k++) { 3539 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3540 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3541 } 3542 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3543 } 3544 scal = PETSC_TRUE; 3545 break; 3546 } 3547 } 3548 } 3549 3550 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3551 if (sub_schurs->is_symmetric) { 3552 PetscInt j, k; 3553 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3554 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3555 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3556 } 3557 for (j = 0; j < subset_size; j++) { 3558 for (k = j; k < subset_size; k++) { 3559 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3560 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3561 } 3562 } 3563 } else { 3564 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3565 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3566 } 3567 } else { 3568 S = Sarray + cumarray; 3569 St = Starray + cumarray; 3570 } 3571 /* see if we can save some work */ 3572 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3573 3574 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3575 B_neigs = 0; 3576 } else { 3577 PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0; 3578 PetscReal eps = -1.0; /* dlamch? */ 3579 PetscInt nmin_s; 3580 PetscBool compute_range; 3581 3582 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3583 B_neigs = 0; 3584 compute_range = (PetscBool)!same_data; 3585 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3586 3587 if (pcbddc->dbg_flag) { 3588 PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof; 3589 3590 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3591 PetscCall( 3592 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc)); 3593 } 3594 3595 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3596 if (compute_range) { 3597 /* ask for eigenvalues larger than thresh */ 3598 if (sub_schurs->is_posdef) { 3599 #if defined(PETSC_USE_COMPLEX) 3600 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3601 #else 3602 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3603 #endif 3604 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3605 } else { /* no theory so far, but it works nicely */ 3606 PetscInt recipe = 0, recipe_m = 1; 3607 PetscReal bb[2]; 3608 3609 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3610 switch (recipe) { 3611 case 0: 3612 if (scal) { 3613 bb[0] = PETSC_MIN_REAL; 3614 bb[1] = lthresh; 3615 } else { 3616 bb[0] = uthresh; 3617 bb[1] = PETSC_MAX_REAL; 3618 } 3619 #if defined(PETSC_USE_COMPLEX) 3620 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3621 #else 3622 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3623 #endif 3624 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3625 break; 3626 case 1: 3627 bb[0] = PETSC_MIN_REAL; 3628 bb[1] = lthresh * lthresh; 3629 #if defined(PETSC_USE_COMPLEX) 3630 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3631 #else 3632 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3633 #endif 3634 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3635 if (!scal) { 3636 PetscBLASInt B_neigs2 = 0; 3637 3638 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3639 bb[1] = PETSC_MAX_REAL; 3640 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3641 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3642 #if defined(PETSC_USE_COMPLEX) 3643 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3644 #else 3645 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3646 #endif 3647 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3648 B_neigs += B_neigs2; 3649 } 3650 break; 3651 case 2: 3652 if (scal) { 3653 bb[0] = PETSC_MIN_REAL; 3654 bb[1] = 0; 3655 #if defined(PETSC_USE_COMPLEX) 3656 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3657 #else 3658 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3659 #endif 3660 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3661 } else { 3662 PetscBLASInt B_neigs2 = 0; 3663 PetscBool do_copy = PETSC_FALSE; 3664 3665 lthresh = PetscMax(lthresh, 0.0); 3666 if (lthresh > 0.0) { 3667 bb[0] = PETSC_MIN_REAL; 3668 bb[1] = lthresh * lthresh; 3669 3670 do_copy = PETSC_TRUE; 3671 #if defined(PETSC_USE_COMPLEX) 3672 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3673 #else 3674 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3675 #endif 3676 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3677 } 3678 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3679 bb[1] = PETSC_MAX_REAL; 3680 if (do_copy) { 3681 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3682 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3683 } 3684 #if defined(PETSC_USE_COMPLEX) 3685 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3686 #else 3687 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3688 #endif 3689 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3690 B_neigs += B_neigs2; 3691 } 3692 break; 3693 case 3: 3694 if (scal) { 3695 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3696 } else { 3697 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3698 } 3699 if (!scal) { 3700 bb[0] = uthresh; 3701 bb[1] = PETSC_MAX_REAL; 3702 #if defined(PETSC_USE_COMPLEX) 3703 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3704 #else 3705 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3706 #endif 3707 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3708 } 3709 if (recipe_m > 0 && B_N - B_neigs > 0) { 3710 PetscBLASInt B_neigs2 = 0; 3711 3712 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3713 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3714 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3715 #if defined(PETSC_USE_COMPLEX) 3716 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3717 #else 3718 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3719 #endif 3720 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3721 B_neigs += B_neigs2; 3722 } 3723 break; 3724 case 4: 3725 bb[0] = PETSC_MIN_REAL; 3726 bb[1] = lthresh; 3727 #if defined(PETSC_USE_COMPLEX) 3728 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3729 #else 3730 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3731 #endif 3732 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3733 { 3734 PetscBLASInt B_neigs2 = 0; 3735 3736 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3737 bb[1] = PETSC_MAX_REAL; 3738 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3739 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3740 #if defined(PETSC_USE_COMPLEX) 3741 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3742 #else 3743 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3744 #endif 3745 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3746 B_neigs += B_neigs2; 3747 } 3748 break; 3749 case 5: /* same as before: first compute all eigenvalues, then filter */ 3750 #if defined(PETSC_USE_COMPLEX) 3751 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3752 #else 3753 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3754 #endif 3755 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3756 { 3757 PetscInt e, k, ne; 3758 for (e = 0, ne = 0; e < B_neigs; e++) { 3759 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3760 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3761 eigs[ne] = eigs[e]; 3762 ne++; 3763 } 3764 } 3765 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3766 PetscCall(PetscBLASIntCast(ne, &B_neigs)); 3767 } 3768 break; 3769 default: 3770 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3771 } 3772 } 3773 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3774 PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU)); 3775 #if defined(PETSC_USE_COMPLEX) 3776 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3777 #else 3778 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3779 #endif 3780 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3781 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3782 PetscInt k; 3783 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3784 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3785 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3786 nmin = nmax; 3787 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3788 for (k = 0; k < nmax; k++) { 3789 eigs[k] = 1. / PETSC_SMALL; 3790 eigv[k * (subset_size + 1)] = 1.0; 3791 } 3792 } 3793 PetscCall(PetscFPTrapPop()); 3794 if (B_ierr) { 3795 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3796 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3797 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3798 } 3799 3800 if (B_neigs > nmax) { 3801 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3802 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3803 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3804 } 3805 3806 nmin_s = PetscMin(nmin, B_N); 3807 if (B_neigs < nmin_s) { 3808 PetscBLASInt B_neigs2 = 0; 3809 3810 if (upart) { 3811 if (scal) { 3812 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3813 B_IL = B_neigs + 1; 3814 } else { 3815 PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL)); 3816 B_IU = B_N - B_neigs; 3817 } 3818 } else { 3819 B_IL = B_neigs + 1; 3820 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3821 } 3822 if (pcbddc->dbg_flag) { 3823 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3824 } 3825 if (sub_schurs->is_symmetric) { 3826 PetscInt j, k; 3827 for (j = 0; j < subset_size; j++) { 3828 for (k = j; k < subset_size; k++) { 3829 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3830 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3831 } 3832 } 3833 } else { 3834 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3835 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3836 } 3837 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3838 #if defined(PETSC_USE_COMPLEX) 3839 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3840 #else 3841 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3842 #endif 3843 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3844 PetscCall(PetscFPTrapPop()); 3845 B_neigs += B_neigs2; 3846 } 3847 if (B_ierr) { 3848 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3849 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3850 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3851 } 3852 if (pcbddc->dbg_flag) { 3853 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3854 for (j = 0; j < B_neigs; j++) { 3855 if (!sub_schurs->gdsw) { 3856 if (eigs[j] == 0.0) { 3857 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3858 } else { 3859 if (upart) { 3860 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3861 } else { 3862 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1 / eigs[j + eigs_start]))); 3863 } 3864 } 3865 } else { 3866 double pg = (double)eigs[j + eigs_start]; 3867 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3868 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3869 } 3870 } 3871 } 3872 } 3873 /* change the basis back to the original one */ 3874 if (sub_schurs->change) { 3875 Mat change, phi, phit; 3876 3877 if (pcbddc->dbg_flag > 2) { 3878 PetscInt ii; 3879 for (ii = 0; ii < B_neigs; ii++) { 3880 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3881 for (j = 0; j < B_N; j++) { 3882 #if defined(PETSC_USE_COMPLEX) 3883 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3884 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3885 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3886 #else 3887 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3888 #endif 3889 } 3890 } 3891 } 3892 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3893 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3894 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi)); 3895 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3896 PetscCall(MatDestroy(&phit)); 3897 PetscCall(MatDestroy(&phi)); 3898 } 3899 maxneigs = PetscMax(B_neigs, maxneigs); 3900 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3901 if (B_neigs) { 3902 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3903 3904 if (pcbddc->dbg_flag > 1) { 3905 PetscInt ii; 3906 for (ii = 0; ii < B_neigs; ii++) { 3907 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3908 for (j = 0; j < B_N; j++) { 3909 #if defined(PETSC_USE_COMPLEX) 3910 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3911 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3912 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3913 #else 3914 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3915 #endif 3916 } 3917 } 3918 } 3919 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3920 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3921 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3922 cum++; 3923 } 3924 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3925 /* shift for next computation */ 3926 cumarray += subset_size * subset_size; 3927 } 3928 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3929 3930 if (mss) { 3931 if (sub_schurs->gdsw) { 3932 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3933 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3934 } else { 3935 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3936 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3937 /* destroy matrices (junk) */ 3938 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3939 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3940 } 3941 } 3942 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3943 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3944 #if defined(PETSC_USE_COMPLEX) 3945 PetscCall(PetscFree(rwork)); 3946 #endif 3947 if (pcbddc->dbg_flag) { 3948 PetscInt maxneigs_r; 3949 PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3950 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3951 } 3952 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3953 PetscFunctionReturn(PETSC_SUCCESS); 3954 } 3955 3956 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3957 { 3958 Mat coarse_submat; 3959 3960 PetscFunctionBegin; 3961 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3962 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3963 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3964 3965 /* Setup local neumann solver ksp_R */ 3966 /* PCBDDCSetUpLocalScatters should be called first! */ 3967 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3968 3969 /* 3970 Setup local correction and local part of coarse basis. 3971 Gives back the dense local part of the coarse matrix in column major ordering 3972 */ 3973 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat)); 3974 3975 /* Compute total number of coarse nodes and setup coarse solver */ 3976 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat)); 3977 PetscCall(MatDestroy(&coarse_submat)); 3978 PetscFunctionReturn(PETSC_SUCCESS); 3979 } 3980 3981 PetscErrorCode PCBDDCResetCustomization(PC pc) 3982 { 3983 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3984 3985 PetscFunctionBegin; 3986 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3987 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3988 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3989 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3990 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3991 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3992 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3993 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3994 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3995 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3996 PetscFunctionReturn(PETSC_SUCCESS); 3997 } 3998 3999 PetscErrorCode PCBDDCResetTopography(PC pc) 4000 { 4001 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4002 PetscInt i; 4003 4004 PetscFunctionBegin; 4005 PetscCall(MatDestroy(&pcbddc->nedcG)); 4006 PetscCall(ISDestroy(&pcbddc->nedclocal)); 4007 PetscCall(MatDestroy(&pcbddc->discretegradient)); 4008 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 4009 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 4010 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 4011 PetscCall(VecDestroy(&pcbddc->work_change)); 4012 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 4013 PetscCall(MatDestroy(&pcbddc->divudotp)); 4014 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 4015 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 4016 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 4017 pcbddc->n_local_subs = 0; 4018 PetscCall(PetscFree(pcbddc->local_subs)); 4019 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 4020 pcbddc->graphanalyzed = PETSC_FALSE; 4021 pcbddc->recompute_topography = PETSC_TRUE; 4022 pcbddc->corner_selected = PETSC_FALSE; 4023 PetscFunctionReturn(PETSC_SUCCESS); 4024 } 4025 4026 PetscErrorCode PCBDDCResetSolvers(PC pc) 4027 { 4028 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4029 4030 PetscFunctionBegin; 4031 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 4032 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4033 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4034 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4035 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4036 PetscCall(VecDestroy(&pcbddc->vec1_P)); 4037 PetscCall(VecDestroy(&pcbddc->vec1_C)); 4038 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4039 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4040 PetscCall(VecDestroy(&pcbddc->vec1_R)); 4041 PetscCall(VecDestroy(&pcbddc->vec2_R)); 4042 PetscCall(ISDestroy(&pcbddc->is_R_local)); 4043 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 4044 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 4045 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 4046 PetscCall(KSPReset(pcbddc->ksp_D)); 4047 PetscCall(KSPReset(pcbddc->ksp_R)); 4048 PetscCall(KSPReset(pcbddc->coarse_ksp)); 4049 PetscCall(MatDestroy(&pcbddc->local_mat)); 4050 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 4051 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 4052 PetscCall(PetscFree(pcbddc->global_primal_indices)); 4053 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 4054 PetscCall(MatDestroy(&pcbddc->benign_change)); 4055 PetscCall(VecDestroy(&pcbddc->benign_vec)); 4056 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 4057 PetscCall(MatDestroy(&pcbddc->benign_B0)); 4058 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 4059 if (pcbddc->benign_zerodiag_subs) { 4060 PetscInt i; 4061 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 4062 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 4063 } 4064 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 4065 PetscFunctionReturn(PETSC_SUCCESS); 4066 } 4067 4068 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 4069 { 4070 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4071 PC_IS *pcis = (PC_IS *)pc->data; 4072 VecType impVecType; 4073 PetscInt n_constraints, n_R, old_size; 4074 4075 PetscFunctionBegin; 4076 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 4077 n_R = pcis->n - pcbddc->n_vertices; 4078 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 4079 /* local work vectors (try to avoid unneeded work)*/ 4080 /* R nodes */ 4081 old_size = -1; 4082 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 4083 if (n_R != old_size) { 4084 PetscCall(VecDestroy(&pcbddc->vec1_R)); 4085 PetscCall(VecDestroy(&pcbddc->vec2_R)); 4086 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 4087 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 4088 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 4089 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 4090 } 4091 /* local primal dofs */ 4092 old_size = -1; 4093 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 4094 if (pcbddc->local_primal_size != old_size) { 4095 PetscCall(VecDestroy(&pcbddc->vec1_P)); 4096 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 4097 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 4098 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 4099 } 4100 /* local explicit constraints */ 4101 old_size = -1; 4102 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 4103 if (n_constraints && n_constraints != old_size) { 4104 PetscCall(VecDestroy(&pcbddc->vec1_C)); 4105 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 4106 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 4107 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 4108 } 4109 PetscFunctionReturn(PETSC_SUCCESS); 4110 } 4111 4112 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode) 4113 { 4114 PetscBool flg; 4115 const PetscScalar *a; 4116 4117 PetscFunctionBegin; 4118 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg)); 4119 if (flg) { 4120 PetscCall(MatDenseGetArrayRead(S, &a)); 4121 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE)); 4122 PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode)); 4123 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE)); 4124 PetscCall(MatDenseRestoreArrayRead(S, &a)); 4125 } else { 4126 const PetscInt *ii, *jj; 4127 PetscInt n; 4128 PetscInt buf[8192], *bufc = NULL; 4129 PetscBool freeb = PETSC_FALSE; 4130 Mat Sm = S; 4131 4132 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg)); 4133 if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm)); 4134 else PetscCall(PetscObjectReference((PetscObject)S)); 4135 PetscCall(MatSeqAIJGetArrayRead(Sm, &a)); 4136 PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4137 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure"); 4138 if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) { 4139 bufc = buf; 4140 } else { 4141 PetscCall(PetscMalloc1(nc, &bufc)); 4142 freeb = PETSC_TRUE; 4143 } 4144 4145 for (PetscInt i = 0; i < n; i++) { 4146 const PetscInt nci = ii[i + 1] - ii[i]; 4147 4148 for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]]; 4149 PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode)); 4150 } 4151 PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4152 PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a)); 4153 PetscCall(MatDestroy(&Sm)); 4154 if (freeb) PetscCall(PetscFree(bufc)); 4155 } 4156 PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY)); 4157 PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY)); 4158 PetscFunctionReturn(PETSC_SUCCESS); 4159 } 4160 4161 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat) 4162 { 4163 Mat_SeqAIJ *aij; 4164 PetscInt *ii, *jj; 4165 PetscScalar *aa; 4166 PetscInt nnz = 0, m, nc; 4167 const PetscScalar *a; 4168 const PetscScalar zero = 0.0; 4169 4170 PetscFunctionBegin; 4171 PetscCall(MatGetLocalSize(D, &m, &nc)); 4172 PetscCall(MatDenseGetArrayRead(D, &a)); 4173 PetscCall(PetscMalloc1(m + 1, &ii)); 4174 PetscCall(PetscMalloc1(m * nc, &jj)); 4175 PetscCall(PetscMalloc1(m * nc, &aa)); 4176 ii[0] = 0; 4177 for (PetscInt k = 0; k < m; k++) { 4178 for (PetscInt s = 0; s < nc; s++) { 4179 const PetscInt c = s + k * nc; 4180 const PetscScalar v = a[k + s * m]; 4181 4182 if (PetscUnlikely(j[c] < 0 || v == zero)) continue; 4183 jj[nnz] = j[c]; 4184 aa[nnz] = a[k + s * m]; 4185 nnz++; 4186 } 4187 ii[k + 1] = nnz; 4188 } 4189 4190 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat)); 4191 PetscCall(MatDenseRestoreArrayRead(D, &a)); 4192 4193 aij = (Mat_SeqAIJ *)(*mat)->data; 4194 aij->free_a = PETSC_TRUE; 4195 aij->free_ij = PETSC_TRUE; 4196 PetscFunctionReturn(PETSC_SUCCESS); 4197 } 4198 4199 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */ 4200 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B) 4201 { 4202 PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL; 4203 const PetscBool allowzeropivot = PETSC_FALSE; 4204 PetscBool zeropivotdetected = PETSC_FALSE; 4205 const PetscReal shift = 0.0; 4206 PetscInt ipvt[5], *ii, *jj, *indi, *indj; 4207 PetscScalar work[25], *v_work = NULL, *aa, *diag; 4208 PetscLogDouble flops = 0.0; 4209 4210 PetscFunctionBegin; 4211 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices"); 4212 for (PetscInt i = 0; i < nblocks; i++) { 4213 ncnt += bsizes[i]; 4214 ncnt2 += PetscSqr(bsizes[i]); 4215 } 4216 PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n); 4217 for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]); 4218 if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots)); 4219 4220 PetscCall(PetscMalloc1(n + 1, &ii)); 4221 PetscCall(PetscMalloc1(ncnt2, &jj)); 4222 PetscCall(PetscCalloc1(ncnt2, &aa)); 4223 4224 ncnt = 0; 4225 ii[0] = 0; 4226 indi = ii; 4227 indj = jj; 4228 diag = aa; 4229 for (PetscInt i = 0; i < nblocks; i++) { 4230 const PetscInt bs = bsizes[i]; 4231 4232 for (PetscInt k = 0; k < bs; k++) { 4233 indi[k + 1] = indi[k] + bs; 4234 for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j; 4235 } 4236 PetscCall(MatGetValues(A, bs, indj, bs, indj, diag)); 4237 switch (bs) { 4238 case 1: 4239 *diag = 1.0 / (*diag); 4240 break; 4241 case 2: 4242 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 4243 break; 4244 case 3: 4245 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 4246 break; 4247 case 4: 4248 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 4249 break; 4250 case 5: 4251 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4252 break; 4253 case 6: 4254 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 4255 break; 4256 case 7: 4257 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 4258 break; 4259 default: 4260 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4261 } 4262 ncnt += bs; 4263 flops += 2.0 * PetscPowInt(bs, 3) / 3.0; 4264 diag += bs * bs; 4265 indj += bs * bs; 4266 indi += bs; 4267 } 4268 PetscCall(PetscLogFlops(flops)); 4269 PetscCall(PetscFree2(v_work, v_pivots)); 4270 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B)); 4271 { 4272 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data; 4273 aij->free_a = PETSC_TRUE; 4274 aij->free_ij = PETSC_TRUE; 4275 } 4276 PetscFunctionReturn(PETSC_SUCCESS); 4277 } 4278 4279 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B) 4280 { 4281 const PetscScalar *rarr; 4282 PetscScalar *larr; 4283 PetscSF vsf; 4284 PetscInt n, rld, lld; 4285 4286 PetscFunctionBegin; 4287 PetscCall(MatGetSize(A, NULL, &n)); 4288 PetscCall(MatDenseGetLDA(A, &rld)); 4289 PetscCall(MatDenseGetLDA(B, &lld)); 4290 PetscCall(MatDenseGetArrayRead(A, &rarr)); 4291 PetscCall(MatDenseGetArrayWrite(B, &larr)); 4292 PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf)); 4293 PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4294 PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4295 PetscCall(MatDenseRestoreArrayRead(A, &rarr)); 4296 PetscCall(MatDenseRestoreArrayWrite(B, &larr)); 4297 PetscCall(PetscSFDestroy(&vsf)); 4298 PetscFunctionReturn(PETSC_SUCCESS); 4299 } 4300 4301 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat) 4302 { 4303 PC_IS *pcis = (PC_IS *)pc->data; 4304 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4305 PCBDDCGraph graph = pcbddc->mat_graph; 4306 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4307 /* submatrices of local problem */ 4308 Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL; 4309 /* submatrices of local coarse problem */ 4310 Mat S_CV = NULL, S_VC = NULL, S_CC = NULL; 4311 /* working matrices */ 4312 Mat C_CR; 4313 4314 /* additional working stuff */ 4315 PC pc_R; 4316 IS is_R, is_V, is_C; 4317 const PetscInt *idx_V, *idx_C; 4318 Mat F, Brhs = NULL; 4319 Vec dummy_vec; 4320 PetscBool isPreonly, isLU, isCHOL, need_benign_correction, sparserhs; 4321 PetscInt *idx_V_B; 4322 PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I; 4323 PetscInt n_eff_vertices, n_eff_constraints; 4324 PetscInt i, n_R, n_D, n_B; 4325 PetscScalar one = 1.0, m_one = -1.0; 4326 4327 /* Multi-element support */ 4328 PetscBool multi_element = graph->multi_element; 4329 PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL; 4330 PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL; 4331 IS is_C_perm = NULL; 4332 PetscInt n_C_bss = 0, *C_bss = NULL; 4333 Mat coarse_phi_multi; 4334 4335 PetscFunctionBegin; 4336 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 4337 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4338 4339 /* Set Non-overlapping dimensions */ 4340 n_vertices = pcbddc->n_vertices; 4341 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 4342 n_B = pcis->n_B; 4343 n_D = pcis->n - n_B; 4344 n_R = pcis->n - n_vertices; 4345 4346 /* vertices in boundary numbering */ 4347 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 4348 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 4349 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 4350 4351 /* these two cases still need to be optimized */ 4352 if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE; 4353 4354 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 4355 if (multi_element) { 4356 PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 4357 4358 PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat)); 4359 PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size)); 4360 PetscCall(MatSetType(*coarse_submat, MATSEQAIJ)); 4361 PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 4362 PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 4363 4364 /* group vertices and constraints by subdomain id */ 4365 const PetscInt *vidxs = pcbddc->primal_indices_local_idxs; 4366 const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices; 4367 PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz; 4368 PetscInt n_el = PetscMax(graph->n_local_subs, 1); 4369 4370 PetscCall(PetscCalloc1(2 * n_el, &count_eff)); 4371 PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V)); 4372 PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C)); 4373 for (PetscInt i = 0; i < n_vertices; i++) { 4374 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4375 4376 V_to_eff_V[i] = count_eff[s]; 4377 count_eff[s] += 1; 4378 } 4379 for (PetscInt i = 0; i < n_constraints; i++) { 4380 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1; 4381 4382 C_to_eff_C[i] = count_eff[s]; 4383 count_eff[s] += 1; 4384 } 4385 4386 /* preallocation */ 4387 PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz)); 4388 for (PetscInt i = 0; i < n_vertices; i++) { 4389 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4390 4391 nnz[i] = count_eff[s] + count_eff[s + 1]; 4392 } 4393 for (PetscInt i = 0; i < n_constraints; i++) { 4394 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub; 4395 4396 nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1]; 4397 } 4398 PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz)); 4399 PetscCall(PetscFree(nnz)); 4400 4401 n_eff_vertices = 0; 4402 n_eff_constraints = 0; 4403 for (PetscInt i = 0; i < n_el; i++) { 4404 n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]); 4405 n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]); 4406 count_eff[2 * i] = 0; 4407 count_eff[2 * i + 1] = 0; 4408 } 4409 4410 const PetscInt *idx; 4411 PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C)); 4412 4413 for (PetscInt i = 0; i < n_vertices; i++) { 4414 const PetscInt e = graph->nodes[vidxs[i]].local_sub; 4415 const PetscInt s = 2 * e; 4416 4417 V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i; 4418 count_eff[s] += 1; 4419 } 4420 for (PetscInt i = 0; i < n_constraints; i++) { 4421 const PetscInt e = graph->nodes[cidxs[i]].local_sub; 4422 const PetscInt s = 2 * e + 1; 4423 4424 C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i; 4425 count_eff[s] += 1; 4426 } 4427 4428 PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J)); 4429 PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J)); 4430 PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J)); 4431 PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J)); 4432 for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1; 4433 for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1; 4434 for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1; 4435 for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1; 4436 4437 PetscCall(ISGetIndices(pcbddc->is_R_local, &idx)); 4438 for (PetscInt i = 0; i < n_R; i++) { 4439 const PetscInt e = graph->nodes[idx[i]].local_sub; 4440 const PetscInt s = 2 * e; 4441 PetscInt j; 4442 4443 for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4444 for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4445 } 4446 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx)); 4447 PetscCall(ISGetIndices(pcis->is_B_local, &idx)); 4448 for (PetscInt i = 0; i < n_B; i++) { 4449 const PetscInt e = graph->nodes[idx[i]].local_sub; 4450 const PetscInt s = 2 * e; 4451 PetscInt j; 4452 4453 for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4454 for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4455 } 4456 PetscCall(ISRestoreIndices(pcis->is_B_local, &idx)); 4457 4458 /* permutation and blocksizes for block invert of S_CC */ 4459 PetscInt *idxp; 4460 4461 PetscCall(PetscMalloc1(n_constraints, &idxp)); 4462 PetscCall(PetscMalloc1(n_el, &C_bss)); 4463 n_C_bss = 0; 4464 for (PetscInt e = 0, cnt = 0; e < n_el; e++) { 4465 const PetscInt nc = count_eff[2 * e + 1]; 4466 4467 if (nc) C_bss[n_C_bss++] = nc; 4468 for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; 4469 cnt += nc; 4470 } 4471 4472 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm)); 4473 4474 PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C)); 4475 PetscCall(PetscFree(count_eff)); 4476 } else { 4477 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat)); 4478 n_eff_constraints = n_constraints; 4479 n_eff_vertices = n_vertices; 4480 } 4481 4482 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 4483 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 4484 PetscCall(PCSetUp(pc_R)); 4485 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->ksp_R, KSPPREONLY, &isPreonly)); 4486 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 4487 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 4488 lda_rhs = n_R; 4489 need_benign_correction = PETSC_FALSE; 4490 F = NULL; 4491 if (isPreonly && (isLU || isCHOL)) { 4492 PetscCall(PCFactorGetMatrix(pc_R, &F)); 4493 } else if (sub_schurs && sub_schurs->reuse_solver) { 4494 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4495 MatFactorType type; 4496 4497 F = reuse_solver->F; 4498 PetscCall(MatGetFactorType(F, &type)); 4499 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 4500 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 4501 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 4502 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 4503 } 4504 4505 /* determine if we can use a sparse right-hand side */ 4506 sparserhs = PETSC_FALSE; 4507 if (F && !multi_element) { 4508 MatSolverType solver; 4509 4510 PetscCall(MatFactorGetSolverType(F, &solver)); 4511 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 4512 } 4513 4514 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 4515 dummy_vec = NULL; 4516 if (need_benign_correction && lda_rhs != n_R && F) { 4517 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 4518 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 4519 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 4520 } 4521 4522 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4523 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4524 4525 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R)); 4526 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V)); 4527 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C)); 4528 PetscCall(ISGetIndices(is_V, &idx_V)); 4529 PetscCall(ISGetIndices(is_C, &idx_C)); 4530 4531 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4532 if (n_constraints) { 4533 Mat C_B; 4534 4535 /* Extract constraints on R nodes: C_{CR} */ 4536 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 4537 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4538 4539 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4540 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4541 if (!sparserhs) { 4542 PetscScalar *marr; 4543 4544 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs)); 4545 PetscCall(MatDenseGetArrayWrite(Brhs, &marr)); 4546 for (i = 0; i < n_constraints; i++) { 4547 const PetscScalar *row_cmat_values; 4548 const PetscInt *row_cmat_indices; 4549 PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i; 4550 4551 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4552 for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j]; 4553 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4554 } 4555 PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr)); 4556 } else { 4557 Mat tC_CR; 4558 4559 PetscCall(MatScale(C_CR, -1.0)); 4560 if (lda_rhs != n_R) { 4561 PetscScalar *aa; 4562 PetscInt r, *ii, *jj; 4563 PetscBool done; 4564 4565 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4566 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4567 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 4568 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 4569 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4570 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4571 } else { 4572 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4573 tC_CR = C_CR; 4574 } 4575 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 4576 PetscCall(MatDestroy(&tC_CR)); 4577 } 4578 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R)); 4579 if (F) { 4580 if (need_benign_correction) { 4581 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4582 4583 /* rhs is already zero on interior dofs, no need to change the rhs */ 4584 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 4585 } 4586 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 4587 if (need_benign_correction) { 4588 PetscScalar *marr; 4589 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4590 4591 /* XXX multi_element? */ 4592 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4593 if (lda_rhs != n_R) { 4594 for (i = 0; i < n_eff_constraints; i++) { 4595 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4596 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4597 PetscCall(VecResetArray(dummy_vec)); 4598 } 4599 } else { 4600 for (i = 0; i < n_eff_constraints; i++) { 4601 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4602 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4603 PetscCall(VecResetArray(pcbddc->vec1_R)); 4604 } 4605 } 4606 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4607 } 4608 } else { 4609 const PetscScalar *barr; 4610 PetscScalar *marr; 4611 4612 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4613 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4614 for (i = 0; i < n_eff_constraints; i++) { 4615 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4616 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4617 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4618 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4619 PetscCall(VecResetArray(pcbddc->vec1_R)); 4620 PetscCall(VecResetArray(pcbddc->vec2_R)); 4621 } 4622 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4623 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4624 } 4625 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4626 PetscCall(MatDestroy(&Brhs)); 4627 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4628 if (!pcbddc->switch_static) { 4629 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2)); 4630 for (i = 0; i < n_eff_constraints; i++) { 4631 Vec r, b; 4632 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 4633 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 4634 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4635 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4636 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 4637 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 4638 } 4639 if (multi_element) { 4640 Mat T; 4641 4642 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4643 PetscCall(MatDestroy(&local_auxmat2_R)); 4644 local_auxmat2_R = T; 4645 PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T)); 4646 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4647 pcbddc->local_auxmat2 = T; 4648 } 4649 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4650 } else { 4651 if (multi_element) { 4652 Mat T; 4653 4654 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4655 PetscCall(MatDestroy(&local_auxmat2_R)); 4656 local_auxmat2_R = T; 4657 } 4658 if (lda_rhs != n_R) { 4659 PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4660 } else { 4661 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4662 pcbddc->local_auxmat2 = local_auxmat2_R; 4663 } 4664 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4665 } 4666 PetscCall(MatScale(S_CC, m_one)); 4667 if (multi_element) { 4668 Mat T, T2; 4669 IS isp, ispi; 4670 4671 isp = is_C_perm; 4672 4673 PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi)); 4674 PetscCall(MatPermute(S_CC, isp, isp, &T)); 4675 PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2)); 4676 PetscCall(MatDestroy(&T)); 4677 PetscCall(MatDestroy(&S_CC)); 4678 PetscCall(MatPermute(T2, ispi, ispi, &S_CC)); 4679 PetscCall(MatDestroy(&T2)); 4680 PetscCall(ISDestroy(&ispi)); 4681 } else { 4682 if (isCHOL) { 4683 PetscCall(MatCholeskyFactor(S_CC, NULL, NULL)); 4684 } else { 4685 PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL)); 4686 } 4687 PetscCall(MatSeqDenseInvertFactors_Private(S_CC)); 4688 } 4689 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4690 PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1)); 4691 PetscCall(MatDestroy(&C_B)); 4692 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES)); 4693 } 4694 4695 /* Get submatrices from subdomain matrix */ 4696 if (n_vertices) { 4697 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4698 PetscBool oldpin; 4699 #endif 4700 IS is_aux; 4701 4702 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4703 IS tis; 4704 4705 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4706 PetscCall(ISSort(tis)); 4707 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4708 PetscCall(ISDestroy(&tis)); 4709 } else { 4710 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4711 } 4712 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4713 oldpin = pcbddc->local_mat->boundtocpu; 4714 #endif 4715 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4716 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4717 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4718 /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4719 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4720 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4721 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4722 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4723 #endif 4724 PetscCall(ISDestroy(&is_aux)); 4725 } 4726 PetscCall(ISDestroy(&is_C_perm)); 4727 PetscCall(PetscFree(C_bss)); 4728 4729 p0_lidx_I = NULL; 4730 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4731 const PetscInt *idxs; 4732 4733 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4734 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4735 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4736 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4737 } 4738 4739 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4740 4741 /* Matrices of coarse basis functions (local) */ 4742 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4743 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4744 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4745 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4746 if (!multi_element) { 4747 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B)); 4748 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D)); 4749 coarse_phi_multi = NULL; 4750 } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */ 4751 IS is_rows[2] = {pcbddc->is_R_local, NULL}; 4752 IS is_cols[2] = {is_V, is_C}; 4753 4754 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1])); 4755 PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi)); 4756 PetscCall(ISDestroy(&is_rows[1])); 4757 } 4758 4759 /* vertices */ 4760 if (n_vertices) { 4761 PetscBool restoreavr = PETSC_FALSE; 4762 Mat A_RRmA_RV = NULL; 4763 4764 PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4765 PetscCall(MatDestroy(&A_VV)); 4766 4767 if (n_R) { 4768 Mat A_RV_bcorr = NULL, S_VV; 4769 4770 PetscCall(MatScale(A_RV, m_one)); 4771 if (need_benign_correction) { 4772 ISLocalToGlobalMapping RtoN; 4773 IS is_p0; 4774 PetscInt *idxs_p0, n; 4775 4776 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4777 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4778 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4779 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4780 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4781 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4782 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4783 PetscCall(ISDestroy(&is_p0)); 4784 } 4785 4786 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV)); 4787 if (!sparserhs || need_benign_correction) { 4788 if (lda_rhs == n_R && !multi_element) { 4789 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4790 } else { 4791 Mat T; 4792 PetscScalar *av, *array; 4793 const PetscInt *xadj, *adjncy; 4794 PetscInt n; 4795 PetscBool flg_row; 4796 4797 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T)); 4798 PetscCall(MatDenseGetArrayWrite(T, &array)); 4799 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4800 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4801 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4802 for (i = 0; i < n; i++) { 4803 PetscInt j; 4804 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j]; 4805 } 4806 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4807 PetscCall(MatDenseRestoreArrayWrite(T, &array)); 4808 PetscCall(MatDestroy(&A_RV)); 4809 A_RV = T; 4810 } 4811 if (need_benign_correction) { 4812 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4813 PetscScalar *marr; 4814 4815 /* XXX multi_element */ 4816 PetscCall(MatDenseGetArray(A_RV, &marr)); 4817 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4818 4819 | 0 0 0 | (V) 4820 L = | 0 0 -1 | (P-p0) 4821 | 0 0 -1 | (p0) 4822 4823 */ 4824 for (i = 0; i < reuse_solver->benign_n; i++) { 4825 const PetscScalar *vals; 4826 const PetscInt *idxs, *idxs_zero; 4827 PetscInt n, j, nz; 4828 4829 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4830 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4831 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4832 for (j = 0; j < n; j++) { 4833 PetscScalar val = vals[j]; 4834 PetscInt k, col = idxs[j]; 4835 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4836 } 4837 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4838 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4839 } 4840 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4841 } 4842 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4843 Brhs = A_RV; 4844 } else { 4845 Mat tA_RVT, A_RVT; 4846 4847 if (!pcbddc->symmetric_primal) { 4848 /* A_RV already scaled by -1 */ 4849 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4850 } else { 4851 restoreavr = PETSC_TRUE; 4852 PetscCall(MatScale(A_VR, -1.0)); 4853 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4854 A_RVT = A_VR; 4855 } 4856 if (lda_rhs != n_R) { 4857 PetscScalar *aa; 4858 PetscInt r, *ii, *jj; 4859 PetscBool done; 4860 4861 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4862 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4863 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4864 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4865 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4866 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4867 } else { 4868 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4869 tA_RVT = A_RVT; 4870 } 4871 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4872 PetscCall(MatDestroy(&tA_RVT)); 4873 PetscCall(MatDestroy(&A_RVT)); 4874 } 4875 if (F) { 4876 /* need to correct the rhs */ 4877 if (need_benign_correction) { 4878 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4879 PetscScalar *marr; 4880 4881 PetscCall(MatDenseGetArray(Brhs, &marr)); 4882 if (lda_rhs != n_R) { 4883 for (i = 0; i < n_eff_vertices; i++) { 4884 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4885 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4886 PetscCall(VecResetArray(dummy_vec)); 4887 } 4888 } else { 4889 for (i = 0; i < n_eff_vertices; i++) { 4890 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4891 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4892 PetscCall(VecResetArray(pcbddc->vec1_R)); 4893 } 4894 } 4895 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4896 } 4897 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4898 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4899 /* need to correct the solution */ 4900 if (need_benign_correction) { 4901 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4902 PetscScalar *marr; 4903 4904 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4905 if (lda_rhs != n_R) { 4906 for (i = 0; i < n_eff_vertices; i++) { 4907 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4908 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4909 PetscCall(VecResetArray(dummy_vec)); 4910 } 4911 } else { 4912 for (i = 0; i < n_eff_vertices; i++) { 4913 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4914 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4915 PetscCall(VecResetArray(pcbddc->vec1_R)); 4916 } 4917 } 4918 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4919 } 4920 } else { 4921 const PetscScalar *barr; 4922 PetscScalar *marr; 4923 4924 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4925 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4926 for (i = 0; i < n_eff_vertices; i++) { 4927 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4928 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4929 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4930 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4931 PetscCall(VecResetArray(pcbddc->vec1_R)); 4932 PetscCall(VecResetArray(pcbddc->vec2_R)); 4933 } 4934 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4935 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4936 } 4937 PetscCall(MatDestroy(&A_RV)); 4938 PetscCall(MatDestroy(&Brhs)); 4939 /* S_VV and S_CV */ 4940 if (n_constraints) { 4941 Mat B; 4942 4943 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B)); 4944 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B)); 4945 4946 /* S_CV = pcbddc->local_auxmat1 * B */ 4947 if (multi_element) { 4948 Mat T; 4949 4950 PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T)); 4951 PetscCall(MatDestroy(&B)); 4952 B = T; 4953 } 4954 PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV)); 4955 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4956 PetscCall(MatProductSetFromOptions(S_CV)); 4957 PetscCall(MatProductSymbolic(S_CV)); 4958 PetscCall(MatProductNumeric(S_CV)); 4959 PetscCall(MatProductClear(S_CV)); 4960 PetscCall(MatDestroy(&B)); 4961 4962 /* B = local_auxmat2_R * S_CV */ 4963 PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B)); 4964 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4965 PetscCall(MatProductSetFromOptions(B)); 4966 PetscCall(MatProductSymbolic(B)); 4967 PetscCall(MatProductNumeric(B)); 4968 4969 PetscCall(MatScale(S_CV, m_one)); 4970 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES)); 4971 4972 if (multi_element) { 4973 Mat T; 4974 4975 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4976 PetscCall(MatDestroy(&A_RRmA_RV)); 4977 A_RRmA_RV = T; 4978 } 4979 PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */ 4980 PetscCall(MatDestroy(&B)); 4981 } else if (multi_element) { 4982 Mat T; 4983 4984 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4985 PetscCall(MatDestroy(&A_RRmA_RV)); 4986 A_RRmA_RV = T; 4987 } 4988 4989 if (lda_rhs != n_R) { 4990 Mat T; 4991 4992 PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T)); 4993 PetscCall(MatDestroy(&A_RRmA_RV)); 4994 A_RRmA_RV = T; 4995 } 4996 4997 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4998 if (need_benign_correction) { /* XXX SPARSE */ 4999 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5000 PetscScalar *sums; 5001 const PetscScalar *marr; 5002 5003 PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr)); 5004 PetscCall(PetscMalloc1(n_vertices, &sums)); 5005 for (i = 0; i < reuse_solver->benign_n; i++) { 5006 const PetscScalar *vals; 5007 const PetscInt *idxs, *idxs_zero; 5008 PetscInt n, j, nz; 5009 5010 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 5011 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 5012 for (j = 0; j < n_vertices; j++) { 5013 sums[j] = 0.; 5014 for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R]; 5015 } 5016 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 5017 for (j = 0; j < n; j++) { 5018 PetscScalar val = vals[j]; 5019 for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES)); 5020 } 5021 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 5022 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 5023 } 5024 PetscCall(PetscFree(sums)); 5025 PetscCall(MatDestroy(&A_RV_bcorr)); 5026 PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr)); 5027 } 5028 5029 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV)); 5030 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 5031 PetscCall(MatDestroy(&S_VV)); 5032 } 5033 5034 /* coarse basis functions */ 5035 if (coarse_phi_multi) { 5036 Mat Vid; 5037 5038 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid)); 5039 PetscCall(MatShift_Basic(Vid, 1.0)); 5040 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV)); 5041 PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid)); 5042 PetscCall(MatDestroy(&Vid)); 5043 } else { 5044 if (A_RRmA_RV) { 5045 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B)); 5046 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5047 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D)); 5048 if (pcbddc->benign_n) { 5049 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); 5050 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 5051 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 5052 } 5053 } 5054 } 5055 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES)); 5056 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 5057 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 5058 } 5059 PetscCall(MatDestroy(&A_RRmA_RV)); 5060 } 5061 PetscCall(MatDestroy(&A_RV)); 5062 PetscCall(VecDestroy(&dummy_vec)); 5063 5064 if (n_constraints) { 5065 Mat B, B2; 5066 5067 PetscCall(MatScale(S_CC, m_one)); 5068 PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B)); 5069 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 5070 PetscCall(MatProductSetFromOptions(B)); 5071 PetscCall(MatProductSymbolic(B)); 5072 PetscCall(MatProductNumeric(B)); 5073 5074 if (n_vertices) { 5075 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 5076 PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC)); 5077 } else { 5078 if (lda_rhs != n_R) { 5079 Mat tB; 5080 5081 PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB)); 5082 PetscCall(MatDestroy(&B)); 5083 B = tB; 5084 } 5085 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC)); 5086 } 5087 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES)); 5088 } 5089 5090 /* coarse basis functions */ 5091 if (coarse_phi_multi) { 5092 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B)); 5093 } else { 5094 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 5095 PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2)); 5096 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2)); 5097 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5098 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 5099 PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2)); 5100 if (pcbddc->benign_n) { 5101 for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); 5102 } 5103 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2)); 5104 } 5105 } 5106 PetscCall(MatDestroy(&B)); 5107 } 5108 5109 /* assemble sparse coarse basis functions */ 5110 if (coarse_phi_multi) { 5111 Mat T; 5112 5113 PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T)); 5114 PetscCall(MatDestroy(&coarse_phi_multi)); 5115 PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B)); 5116 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); 5117 PetscCall(MatDestroy(&T)); 5118 } 5119 PetscCall(MatDestroy(&local_auxmat2_R)); 5120 PetscCall(PetscFree(p0_lidx_I)); 5121 5122 /* coarse matrix entries relative to B_0 */ 5123 if (pcbddc->benign_n) { 5124 Mat B0_B, B0_BPHI; 5125 IS is_dummy; 5126 const PetscScalar *data; 5127 PetscInt j; 5128 5129 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5130 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5131 PetscCall(ISDestroy(&is_dummy)); 5132 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5133 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5134 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 5135 for (j = 0; j < pcbddc->benign_n; j++) { 5136 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5137 for (i = 0; i < pcbddc->local_primal_size; i++) { 5138 PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5139 PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5140 } 5141 } 5142 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 5143 PetscCall(MatDestroy(&B0_B)); 5144 PetscCall(MatDestroy(&B0_BPHI)); 5145 } 5146 5147 /* compute other basis functions for non-symmetric problems */ 5148 if (!pcbddc->symmetric_primal) { 5149 Mat B_V = NULL, B_C = NULL; 5150 PetscScalar *marray, *work; 5151 5152 /* TODO multi_element MatDenseScatter */ 5153 if (n_constraints) { 5154 Mat S_CCT, C_CRT; 5155 5156 PetscCall(MatScale(S_CC, m_one)); 5157 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 5158 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 5159 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C)); 5160 PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C)); 5161 PetscCall(MatDestroy(&S_CCT)); 5162 if (n_vertices) { 5163 Mat S_VCT; 5164 5165 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 5166 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V)); 5167 PetscCall(MatDestroy(&S_VCT)); 5168 PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V)); 5169 } 5170 PetscCall(MatDestroy(&C_CRT)); 5171 } else { 5172 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 5173 } 5174 if (n_vertices && n_R) { 5175 PetscScalar *av, *marray; 5176 const PetscInt *xadj, *adjncy; 5177 PetscInt n; 5178 PetscBool flg_row; 5179 5180 /* B_V = B_V - A_VR^T */ 5181 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 5182 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5183 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 5184 PetscCall(MatDenseGetArray(B_V, &marray)); 5185 for (i = 0; i < n; i++) { 5186 PetscInt j; 5187 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 5188 } 5189 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5190 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5191 PetscCall(MatDestroy(&A_VR)); 5192 } 5193 5194 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 5195 PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work)); 5196 if (n_vertices) { 5197 PetscCall(MatDenseGetArray(B_V, &marray)); 5198 for (i = 0; i < n_vertices; i++) { 5199 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 5200 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5201 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5202 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5203 PetscCall(VecResetArray(pcbddc->vec1_R)); 5204 PetscCall(VecResetArray(pcbddc->vec2_R)); 5205 } 5206 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5207 } 5208 if (B_C) { 5209 PetscCall(MatDenseGetArray(B_C, &marray)); 5210 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 5211 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 5212 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5213 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5214 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5215 PetscCall(VecResetArray(pcbddc->vec1_R)); 5216 PetscCall(VecResetArray(pcbddc->vec2_R)); 5217 } 5218 PetscCall(MatDenseRestoreArray(B_C, &marray)); 5219 } 5220 /* coarse basis functions */ 5221 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B)); 5222 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D)); 5223 for (i = 0; i < pcbddc->local_primal_size; i++) { 5224 Vec v; 5225 5226 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 5227 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 5228 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5229 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5230 if (i < n_vertices) { 5231 PetscScalar one = 1.0; 5232 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 5233 PetscCall(VecAssemblyBegin(v)); 5234 PetscCall(VecAssemblyEnd(v)); 5235 } 5236 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 5237 5238 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5239 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 5240 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5241 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5242 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 5243 } 5244 PetscCall(VecResetArray(pcbddc->vec1_R)); 5245 } 5246 PetscCall(MatDestroy(&B_V)); 5247 PetscCall(MatDestroy(&B_C)); 5248 PetscCall(PetscFree(work)); 5249 } else { 5250 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 5251 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 5252 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 5253 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 5254 } 5255 PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5256 PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5257 5258 /* free memory */ 5259 PetscCall(PetscFree(V_to_eff_V)); 5260 PetscCall(PetscFree(C_to_eff_C)); 5261 PetscCall(PetscFree(R_eff_V_J)); 5262 PetscCall(PetscFree(R_eff_C_J)); 5263 PetscCall(PetscFree(B_eff_V_J)); 5264 PetscCall(PetscFree(B_eff_C_J)); 5265 PetscCall(ISDestroy(&is_R)); 5266 PetscCall(ISRestoreIndices(is_V, &idx_V)); 5267 PetscCall(ISRestoreIndices(is_C, &idx_C)); 5268 PetscCall(ISDestroy(&is_V)); 5269 PetscCall(ISDestroy(&is_C)); 5270 PetscCall(PetscFree(idx_V_B)); 5271 PetscCall(MatDestroy(&S_CV)); 5272 PetscCall(MatDestroy(&S_VC)); 5273 PetscCall(MatDestroy(&S_CC)); 5274 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 5275 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 5276 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 5277 5278 /* Checking coarse_sub_mat and coarse basis functions */ 5279 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5280 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5281 if (pcbddc->dbg_flag) { 5282 Mat AUXMAT, TM1, TM2, TM3, TM4; 5283 Mat coarse_phi_D, coarse_phi_B; 5284 Mat coarse_psi_D, coarse_psi_B; 5285 Mat A_II, A_BB, A_IB, A_BI; 5286 Mat C_B, CPHI; 5287 IS is_dummy; 5288 Vec mones; 5289 MatType checkmattype = MATSEQAIJ; 5290 PetscReal real_value; 5291 5292 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5293 Mat A; 5294 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 5295 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 5296 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 5297 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 5298 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 5299 PetscCall(MatDestroy(&A)); 5300 } else { 5301 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 5302 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 5303 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 5304 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 5305 } 5306 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 5307 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 5308 if (!pcbddc->symmetric_primal) { 5309 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 5310 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 5311 } 5312 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5313 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 5314 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5315 if (!pcbddc->symmetric_primal) { 5316 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5317 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5318 PetscCall(MatDestroy(&AUXMAT)); 5319 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5320 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5321 PetscCall(MatDestroy(&AUXMAT)); 5322 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5323 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5324 PetscCall(MatDestroy(&AUXMAT)); 5325 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5326 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5327 PetscCall(MatDestroy(&AUXMAT)); 5328 } else { 5329 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5330 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5331 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5332 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5333 PetscCall(MatDestroy(&AUXMAT)); 5334 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5335 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5336 PetscCall(MatDestroy(&AUXMAT)); 5337 } 5338 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 5339 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 5340 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 5341 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 5342 if (pcbddc->benign_n) { 5343 Mat B0_B, B0_BPHI; 5344 const PetscScalar *data2; 5345 PetscScalar *data; 5346 PetscInt j; 5347 5348 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5349 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5350 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5351 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5352 PetscCall(MatDenseGetArray(TM1, &data)); 5353 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 5354 for (j = 0; j < pcbddc->benign_n; j++) { 5355 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5356 for (i = 0; i < pcbddc->local_primal_size; i++) { 5357 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 5358 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 5359 } 5360 } 5361 PetscCall(MatDenseRestoreArray(TM1, &data)); 5362 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 5363 PetscCall(MatDestroy(&B0_B)); 5364 PetscCall(ISDestroy(&is_dummy)); 5365 PetscCall(MatDestroy(&B0_BPHI)); 5366 } 5367 PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN)); 5368 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 5369 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5370 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5371 5372 /* check constraints */ 5373 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 5374 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 5375 if (!pcbddc->benign_n) { /* TODO: add benign case */ 5376 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5377 } else { 5378 PetscScalar *data; 5379 Mat tmat; 5380 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 5381 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 5382 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 5383 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5384 PetscCall(MatDestroy(&tmat)); 5385 } 5386 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 5387 PetscCall(VecSet(mones, -1.0)); 5388 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5389 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5390 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5391 if (!pcbddc->symmetric_primal) { 5392 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 5393 PetscCall(VecSet(mones, -1.0)); 5394 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5395 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5396 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5397 } 5398 PetscCall(MatDestroy(&C_B)); 5399 PetscCall(MatDestroy(&CPHI)); 5400 PetscCall(ISDestroy(&is_dummy)); 5401 PetscCall(VecDestroy(&mones)); 5402 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5403 PetscCall(MatDestroy(&A_II)); 5404 PetscCall(MatDestroy(&A_BB)); 5405 PetscCall(MatDestroy(&A_IB)); 5406 PetscCall(MatDestroy(&A_BI)); 5407 PetscCall(MatDestroy(&TM1)); 5408 PetscCall(MatDestroy(&TM2)); 5409 PetscCall(MatDestroy(&TM3)); 5410 PetscCall(MatDestroy(&TM4)); 5411 PetscCall(MatDestroy(&coarse_phi_D)); 5412 PetscCall(MatDestroy(&coarse_phi_B)); 5413 if (!pcbddc->symmetric_primal) { 5414 PetscCall(MatDestroy(&coarse_psi_D)); 5415 PetscCall(MatDestroy(&coarse_psi_B)); 5416 } 5417 } 5418 5419 #if 0 5420 { 5421 PetscViewer viewer; 5422 char filename[256]; 5423 5424 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 5425 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 5426 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 5427 PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat")); 5428 PetscCall(MatView(*coarse_submat,viewer)); 5429 if (pcbddc->coarse_phi_B) { 5430 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 5431 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 5432 } 5433 if (pcbddc->coarse_phi_D) { 5434 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 5435 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 5436 } 5437 if (pcbddc->coarse_psi_B) { 5438 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 5439 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 5440 } 5441 if (pcbddc->coarse_psi_D) { 5442 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 5443 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 5444 } 5445 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 5446 PetscCall(MatView(pcbddc->local_mat,viewer)); 5447 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 5448 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 5449 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 5450 PetscCall(ISView(pcis->is_I_local,viewer)); 5451 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 5452 PetscCall(ISView(pcis->is_B_local,viewer)); 5453 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 5454 PetscCall(ISView(pcbddc->is_R_local,viewer)); 5455 PetscCall(PetscViewerDestroy(&viewer)); 5456 } 5457 #endif 5458 5459 /* device support */ 5460 { 5461 PetscBool iscuda, iship, iskokkos; 5462 MatType mtype = NULL; 5463 5464 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, "")); 5465 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, "")); 5466 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, "")); 5467 if (iskokkos) { 5468 if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE; 5469 else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE; 5470 } 5471 if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP); 5472 else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP; 5473 else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA; 5474 if (mtype) { 5475 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 5476 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 5477 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 5478 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 5479 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 5480 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 5481 } 5482 } 5483 PetscFunctionReturn(PETSC_SUCCESS); 5484 } 5485 5486 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 5487 { 5488 Mat *work_mat; 5489 IS isrow_s, iscol_s; 5490 PetscBool rsorted, csorted; 5491 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 5492 5493 PetscFunctionBegin; 5494 PetscCall(ISSorted(isrow, &rsorted)); 5495 PetscCall(ISSorted(iscol, &csorted)); 5496 PetscCall(ISGetLocalSize(isrow, &rsize)); 5497 PetscCall(ISGetLocalSize(iscol, &csize)); 5498 5499 if (!rsorted) { 5500 const PetscInt *idxs; 5501 PetscInt *idxs_sorted, i; 5502 5503 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 5504 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 5505 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 5506 PetscCall(ISGetIndices(isrow, &idxs)); 5507 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 5508 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 5509 PetscCall(ISRestoreIndices(isrow, &idxs)); 5510 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 5511 } else { 5512 PetscCall(PetscObjectReference((PetscObject)isrow)); 5513 isrow_s = isrow; 5514 } 5515 5516 if (!csorted) { 5517 if (isrow == iscol) { 5518 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 5519 iscol_s = isrow_s; 5520 } else { 5521 const PetscInt *idxs; 5522 PetscInt *idxs_sorted, i; 5523 5524 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 5525 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 5526 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 5527 PetscCall(ISGetIndices(iscol, &idxs)); 5528 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 5529 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5530 PetscCall(ISRestoreIndices(iscol, &idxs)); 5531 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 5532 } 5533 } else { 5534 PetscCall(PetscObjectReference((PetscObject)iscol)); 5535 iscol_s = iscol; 5536 } 5537 5538 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 5539 5540 if (!rsorted || !csorted) { 5541 Mat new_mat; 5542 IS is_perm_r, is_perm_c; 5543 5544 if (!rsorted) { 5545 PetscInt *idxs_r, i; 5546 PetscCall(PetscMalloc1(rsize, &idxs_r)); 5547 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 5548 PetscCall(PetscFree(idxs_perm_r)); 5549 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 5550 } else { 5551 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 5552 } 5553 PetscCall(ISSetPermutation(is_perm_r)); 5554 5555 if (!csorted) { 5556 if (isrow_s == iscol_s) { 5557 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5558 is_perm_c = is_perm_r; 5559 } else { 5560 PetscInt *idxs_c, i; 5561 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 5562 PetscCall(PetscMalloc1(csize, &idxs_c)); 5563 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 5564 PetscCall(PetscFree(idxs_perm_c)); 5565 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 5566 } 5567 } else { 5568 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 5569 } 5570 PetscCall(ISSetPermutation(is_perm_c)); 5571 5572 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 5573 PetscCall(MatDestroy(&work_mat[0])); 5574 work_mat[0] = new_mat; 5575 PetscCall(ISDestroy(&is_perm_r)); 5576 PetscCall(ISDestroy(&is_perm_c)); 5577 } 5578 5579 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5580 *B = work_mat[0]; 5581 PetscCall(MatDestroyMatrices(1, &work_mat)); 5582 PetscCall(ISDestroy(&isrow_s)); 5583 PetscCall(ISDestroy(&iscol_s)); 5584 PetscFunctionReturn(PETSC_SUCCESS); 5585 } 5586 5587 static PetscErrorCode MatPtAPWithPrefix_Private(Mat A, Mat P, PetscReal fill, const char *prefix, Mat *C) 5588 { 5589 PetscFunctionBegin; 5590 PetscCall(MatProductCreate(A, P, NULL, C)); 5591 PetscCall(MatProductSetType(*C, MATPRODUCT_PtAP)); 5592 PetscCall(MatProductSetAlgorithm(*C, "default")); 5593 PetscCall(MatProductSetFill(*C, fill)); 5594 PetscCall(MatSetOptionsPrefix(*C, prefix)); 5595 PetscCall(MatProductSetFromOptions(*C)); 5596 PetscCall(MatProductSymbolic(*C)); 5597 PetscCall(MatProductNumeric(*C)); 5598 (*C)->symmetric = A->symmetric; 5599 (*C)->spd = A->spd; 5600 PetscFunctionReturn(PETSC_SUCCESS); 5601 } 5602 5603 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5604 { 5605 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5606 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5607 Mat new_mat, lA; 5608 IS is_local, is_global; 5609 PetscInt local_size; 5610 PetscBool isseqaij, issym, isset; 5611 char ptapprefix[256]; 5612 5613 PetscFunctionBegin; 5614 PetscCall(MatDestroy(&pcbddc->local_mat)); 5615 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 5616 if (pcbddc->mat_graph->multi_element) { 5617 Mat *mats, *bdiags; 5618 IS *gsubs; 5619 PetscInt nsubs = pcbddc->n_local_subs; 5620 5621 PetscCall(PetscCalloc1(nsubs * nsubs, &mats)); 5622 #if 1 5623 PetscCall(PetscMalloc1(nsubs, &gsubs)); 5624 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i])); 5625 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags)); 5626 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i])); 5627 PetscCall(PetscFree(gsubs)); 5628 #else /* this does not work since MatCreateSubMatrices does not support repeated indices */ 5629 Mat *tmats; 5630 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5631 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5632 PetscCall(ISDestroy(&is_local)); 5633 PetscCall(MatSetOption(ChangeOfBasisMatrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE)); 5634 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, 1, &is_global, &is_global, MAT_INITIAL_MATRIX, &tmats)); 5635 PetscCall(ISDestroy(&is_global)); 5636 PetscCall(MatCreateSubMatrices(tmats[0], nsubs, pcbddc->local_subs, pcbddc->local_subs, MAT_INITIAL_MATRIX, &bdiags)); 5637 PetscCall(MatDestroySubMatrices(1, &tmats)); 5638 #endif 5639 for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i]; 5640 PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat)); 5641 PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat)); 5642 PetscCall(MatDestroySubMatrices(nsubs, &bdiags)); 5643 PetscCall(PetscFree(mats)); 5644 } else { 5645 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5646 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5647 PetscCall(ISDestroy(&is_local)); 5648 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 5649 PetscCall(ISDestroy(&is_global)); 5650 } 5651 if (pcbddc->dbg_flag) { 5652 Vec x, x_change; 5653 PetscReal error; 5654 5655 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 5656 PetscCall(VecSetRandom(x, NULL)); 5657 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 5658 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5659 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5660 PetscCall(MatMult(new_mat, matis->x, matis->y)); 5661 if (!pcbddc->change_interior) { 5662 const PetscScalar *x, *y, *v; 5663 PetscReal lerror = 0.; 5664 PetscInt i; 5665 5666 PetscCall(VecGetArrayRead(matis->x, &x)); 5667 PetscCall(VecGetArrayRead(matis->y, &y)); 5668 PetscCall(VecGetArrayRead(matis->counter, &v)); 5669 for (i = 0; i < local_size; i++) 5670 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 5671 PetscCall(VecRestoreArrayRead(matis->x, &x)); 5672 PetscCall(VecRestoreArrayRead(matis->y, &y)); 5673 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 5674 PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 5675 if (error > PETSC_SMALL) { 5676 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5677 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 5678 } else { 5679 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 5680 } 5681 } 5682 } 5683 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5684 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5685 PetscCall(VecAXPY(x, -1.0, x_change)); 5686 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 5687 if (error > PETSC_SMALL) { 5688 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5689 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 5690 } else { 5691 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 5692 } 5693 } 5694 PetscCall(VecDestroy(&x)); 5695 PetscCall(VecDestroy(&x_change)); 5696 } 5697 5698 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5699 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5700 5701 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5702 if (((PetscObject)pc)->prefix) PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "%spc_bddc_change_", ((PetscObject)pc)->prefix)); 5703 else PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "pc_bddc_change_")); 5704 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5705 if (isseqaij) { 5706 PetscCall(MatDestroy(&pcbddc->local_mat)); 5707 PetscCall(MatPtAPWithPrefix_Private(matis->A, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat)); 5708 if (lA) { 5709 Mat work; 5710 PetscCall(MatPtAPWithPrefix_Private(lA, new_mat, PETSC_DEFAULT, ptapprefix, &work)); 5711 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5712 PetscCall(MatDestroy(&work)); 5713 } 5714 } else { 5715 Mat work_mat; 5716 5717 PetscCall(MatDestroy(&pcbddc->local_mat)); 5718 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5719 PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat)); 5720 PetscCall(MatDestroy(&work_mat)); 5721 if (lA) { 5722 Mat work; 5723 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5724 PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &work)); 5725 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5726 PetscCall(MatDestroy(&work)); 5727 } 5728 } 5729 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5730 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5731 PetscCall(MatDestroy(&new_mat)); 5732 PetscFunctionReturn(PETSC_SUCCESS); 5733 } 5734 5735 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5736 { 5737 PC_IS *pcis = (PC_IS *)pc->data; 5738 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5739 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5740 PetscInt *idx_R_local = NULL; 5741 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5742 PetscInt vbs, bs; 5743 PetscBT bitmask = NULL; 5744 5745 PetscFunctionBegin; 5746 /* 5747 No need to setup local scatters if 5748 - primal space is unchanged 5749 AND 5750 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5751 AND 5752 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5753 */ 5754 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5755 /* destroy old objects */ 5756 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5757 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5758 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5759 /* Set Non-overlapping dimensions */ 5760 n_B = pcis->n_B; 5761 n_D = pcis->n - n_B; 5762 n_vertices = pcbddc->n_vertices; 5763 5764 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5765 5766 /* create auxiliary bitmask and allocate workspace */ 5767 if (!sub_schurs || !sub_schurs->reuse_solver) { 5768 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5769 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5770 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5771 5772 for (i = 0, n_R = 0; i < pcis->n; i++) { 5773 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5774 } 5775 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5776 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5777 5778 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5779 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5780 } 5781 5782 /* Block code */ 5783 vbs = 1; 5784 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5785 if (bs > 1 && !(n_vertices % bs)) { 5786 PetscBool is_blocked = PETSC_TRUE; 5787 PetscInt *vary; 5788 if (!sub_schurs || !sub_schurs->reuse_solver) { 5789 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5790 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5791 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5792 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5793 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5794 for (i = 0; i < pcis->n / bs; i++) { 5795 if (vary[i] != 0 && vary[i] != bs) { 5796 is_blocked = PETSC_FALSE; 5797 break; 5798 } 5799 } 5800 PetscCall(PetscFree(vary)); 5801 } else { 5802 /* Verify directly the R set */ 5803 for (i = 0; i < n_R / bs; i++) { 5804 PetscInt j, node = idx_R_local[bs * i]; 5805 for (j = 1; j < bs; j++) { 5806 if (node != idx_R_local[bs * i + j] - j) { 5807 is_blocked = PETSC_FALSE; 5808 break; 5809 } 5810 } 5811 } 5812 } 5813 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5814 vbs = bs; 5815 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5816 } 5817 } 5818 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5819 if (sub_schurs && sub_schurs->reuse_solver) { 5820 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5821 5822 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5823 PetscCall(ISDestroy(&reuse_solver->is_R)); 5824 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5825 reuse_solver->is_R = pcbddc->is_R_local; 5826 } else { 5827 PetscCall(PetscFree(idx_R_local)); 5828 } 5829 5830 /* print some info if requested */ 5831 if (pcbddc->dbg_flag) { 5832 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5833 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5834 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5835 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5836 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5837 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5838 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5839 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5840 } 5841 5842 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5843 if (!sub_schurs || !sub_schurs->reuse_solver) { 5844 IS is_aux1, is_aux2; 5845 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5846 5847 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5848 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5849 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5850 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5851 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5852 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5853 for (i = 0, j = 0; i < n_R; i++) { 5854 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5855 } 5856 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5857 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5858 for (i = 0, j = 0; i < n_B; i++) { 5859 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5860 } 5861 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5862 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5863 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5864 PetscCall(ISDestroy(&is_aux1)); 5865 PetscCall(ISDestroy(&is_aux2)); 5866 5867 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5868 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5869 for (i = 0, j = 0; i < n_R; i++) { 5870 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5871 } 5872 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5873 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5874 PetscCall(ISDestroy(&is_aux1)); 5875 } 5876 PetscCall(PetscBTDestroy(&bitmask)); 5877 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5878 } else { 5879 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5880 IS tis; 5881 PetscInt schur_size; 5882 5883 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5884 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5885 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5886 PetscCall(ISDestroy(&tis)); 5887 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5888 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5889 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5890 PetscCall(ISDestroy(&tis)); 5891 } 5892 } 5893 PetscFunctionReturn(PETSC_SUCCESS); 5894 } 5895 5896 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5897 { 5898 MatNullSpace NullSpace; 5899 Mat dmat; 5900 const Vec *nullvecs; 5901 Vec v, v2, *nullvecs2; 5902 VecScatter sct = NULL; 5903 PetscScalar *ddata; 5904 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5905 PetscBool nnsp_has_cnst; 5906 5907 PetscFunctionBegin; 5908 if (!is && !B) { /* MATIS */ 5909 Mat_IS *matis = (Mat_IS *)A->data; 5910 5911 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5912 sct = matis->cctx; 5913 PetscCall(PetscObjectReference((PetscObject)sct)); 5914 } else { 5915 PetscCall(MatGetNullSpace(B, &NullSpace)); 5916 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5917 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5918 } 5919 PetscCall(MatGetNullSpace(A, &NullSpace)); 5920 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5921 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5922 5923 PetscCall(MatCreateVecs(A, &v, NULL)); 5924 PetscCall(MatCreateVecs(B, &v2, NULL)); 5925 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5926 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs)); 5927 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5928 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5929 PetscCall(VecGetBlockSize(v2, &bs)); 5930 PetscCall(VecGetSize(v2, &N)); 5931 PetscCall(VecGetLocalSize(v2, &n)); 5932 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5933 for (k = 0; k < nnsp_size; k++) { 5934 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5935 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5936 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5937 } 5938 if (nnsp_has_cnst) { 5939 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5940 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5941 } 5942 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5943 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5944 5945 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5946 PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault)); 5947 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5948 PetscCall(MatDestroy(&dmat)); 5949 5950 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5951 PetscCall(PetscFree(nullvecs2)); 5952 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5953 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5954 PetscCall(VecDestroy(&v)); 5955 PetscCall(VecDestroy(&v2)); 5956 PetscCall(VecScatterDestroy(&sct)); 5957 PetscFunctionReturn(PETSC_SUCCESS); 5958 } 5959 5960 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5961 { 5962 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5963 PC_IS *pcis = (PC_IS *)pc->data; 5964 PC pc_temp; 5965 Mat A_RR; 5966 MatNullSpace nnsp; 5967 MatReuse reuse; 5968 PetscScalar m_one = -1.0; 5969 PetscReal value; 5970 PetscInt n_D, n_R; 5971 PetscBool issbaij, opts, isset, issym; 5972 PetscBool f = PETSC_FALSE; 5973 char dir_prefix[256], neu_prefix[256], str_level[16]; 5974 size_t len; 5975 5976 PetscFunctionBegin; 5977 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5978 /* approximate solver, propagate NearNullSpace if needed */ 5979 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5980 MatNullSpace gnnsp1, gnnsp2; 5981 PetscBool lhas, ghas; 5982 5983 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5984 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5985 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5986 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5987 PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5988 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5989 } 5990 5991 /* compute prefixes */ 5992 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5993 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5994 if (!pcbddc->current_level) { 5995 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5996 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5997 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5998 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5999 } else { 6000 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 6001 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 6002 len -= 15; /* remove "pc_bddc_coarse_" */ 6003 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 6004 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 6005 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 6006 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 6007 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 6008 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 6009 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 6010 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 6011 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 6012 } 6013 6014 /* DIRICHLET PROBLEM */ 6015 if (dirichlet) { 6016 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6017 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 6018 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 6019 if (pcbddc->dbg_flag) { 6020 Mat A_IIn; 6021 6022 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 6023 PetscCall(MatDestroy(&pcis->A_II)); 6024 pcis->A_II = A_IIn; 6025 } 6026 } 6027 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6028 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 6029 6030 /* Matrix for Dirichlet problem is pcis->A_II */ 6031 n_D = pcis->n - pcis->n_B; 6032 opts = PETSC_FALSE; 6033 if (!pcbddc->ksp_D) { /* create object if not yet build */ 6034 opts = PETSC_TRUE; 6035 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 6036 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 6037 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 6038 /* default */ 6039 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 6040 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 6041 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 6042 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 6043 if (issbaij) { 6044 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6045 } else { 6046 PetscCall(PCSetType(pc_temp, PCLU)); 6047 } 6048 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 6049 } 6050 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 6051 PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view")); 6052 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 6053 /* Allow user's customization */ 6054 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 6055 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 6056 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6057 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 6058 } 6059 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 6060 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 6061 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6062 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6063 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6064 const PetscInt *idxs; 6065 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6066 6067 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 6068 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 6069 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6070 for (i = 0; i < nl; i++) { 6071 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6072 } 6073 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 6074 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6075 PetscCall(PetscFree(scoords)); 6076 } 6077 if (sub_schurs && sub_schurs->reuse_solver) { 6078 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6079 6080 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 6081 } 6082 6083 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6084 if (!n_D) { 6085 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 6086 PetscCall(PCSetType(pc_temp, PCNONE)); 6087 } 6088 PetscCall(KSPSetUp(pcbddc->ksp_D)); 6089 /* set ksp_D into pcis data */ 6090 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 6091 PetscCall(KSPDestroy(&pcis->ksp_D)); 6092 pcis->ksp_D = pcbddc->ksp_D; 6093 } 6094 6095 /* NEUMANN PROBLEM */ 6096 A_RR = NULL; 6097 if (neumann) { 6098 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6099 PetscInt ibs, mbs; 6100 PetscBool issbaij, reuse_neumann_solver, isset, issym; 6101 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6102 6103 reuse_neumann_solver = PETSC_FALSE; 6104 if (sub_schurs && sub_schurs->reuse_solver) { 6105 IS iP; 6106 6107 reuse_neumann_solver = PETSC_TRUE; 6108 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 6109 if (iP) reuse_neumann_solver = PETSC_FALSE; 6110 } 6111 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 6112 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 6113 if (pcbddc->ksp_R) { /* already created ksp */ 6114 PetscInt nn_R; 6115 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 6116 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6117 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 6118 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 6119 PetscCall(KSPReset(pcbddc->ksp_R)); 6120 PetscCall(MatDestroy(&A_RR)); 6121 reuse = MAT_INITIAL_MATRIX; 6122 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 6123 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 6124 PetscCall(MatDestroy(&A_RR)); 6125 reuse = MAT_INITIAL_MATRIX; 6126 } else { /* safe to reuse the matrix */ 6127 reuse = MAT_REUSE_MATRIX; 6128 } 6129 } 6130 /* last check */ 6131 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 6132 PetscCall(MatDestroy(&A_RR)); 6133 reuse = MAT_INITIAL_MATRIX; 6134 } 6135 } else { /* first time, so we need to create the matrix */ 6136 reuse = MAT_INITIAL_MATRIX; 6137 } 6138 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 6139 TODO: Get Rid of these conversions */ 6140 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 6141 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 6142 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 6143 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 6144 if (matis->A == pcbddc->local_mat) { 6145 PetscCall(MatDestroy(&pcbddc->local_mat)); 6146 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6147 } else { 6148 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6149 } 6150 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 6151 if (matis->A == pcbddc->local_mat) { 6152 PetscCall(MatDestroy(&pcbddc->local_mat)); 6153 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6154 } else { 6155 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6156 } 6157 } 6158 /* extract A_RR */ 6159 if (reuse_neumann_solver) { 6160 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6161 6162 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 6163 PetscCall(MatDestroy(&A_RR)); 6164 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 6165 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 6166 } else { 6167 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 6168 } 6169 } else { 6170 PetscCall(MatDestroy(&A_RR)); 6171 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 6172 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6173 } 6174 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 6175 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 6176 } 6177 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6178 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 6179 opts = PETSC_FALSE; 6180 if (!pcbddc->ksp_R) { /* create object if not present */ 6181 opts = PETSC_TRUE; 6182 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 6183 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 6184 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 6185 /* default */ 6186 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 6187 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 6188 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6189 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 6190 if (issbaij) { 6191 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6192 } else { 6193 PetscCall(PCSetType(pc_temp, PCLU)); 6194 } 6195 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 6196 } 6197 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 6198 PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view")); 6199 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 6200 if (opts) { /* Allow user's customization once */ 6201 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 6202 } 6203 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6204 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6205 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 6206 } 6207 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6208 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6209 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6210 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6211 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6212 const PetscInt *idxs; 6213 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6214 6215 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 6216 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 6217 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6218 for (i = 0; i < nl; i++) { 6219 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6220 } 6221 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 6222 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6223 PetscCall(PetscFree(scoords)); 6224 } 6225 6226 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6227 if (!n_R) { 6228 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6229 PetscCall(PCSetType(pc_temp, PCNONE)); 6230 } 6231 /* Reuse solver if it is present */ 6232 if (reuse_neumann_solver) { 6233 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6234 6235 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 6236 } 6237 PetscCall(KSPSetUp(pcbddc->ksp_R)); 6238 } 6239 6240 if (pcbddc->dbg_flag) { 6241 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6242 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6243 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 6244 } 6245 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 6246 6247 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 6248 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 6249 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 6250 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 6251 /* check Dirichlet and Neumann solvers */ 6252 if (pcbddc->dbg_flag) { 6253 if (dirichlet) { /* Dirichlet */ 6254 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 6255 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 6256 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 6257 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 6258 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 6259 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 6260 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value)); 6261 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6262 } 6263 if (neumann) { /* Neumann */ 6264 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 6265 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 6266 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 6267 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 6268 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 6269 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 6270 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value)); 6271 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6272 } 6273 } 6274 /* free Neumann problem's matrix */ 6275 PetscCall(MatDestroy(&A_RR)); 6276 PetscFunctionReturn(PETSC_SUCCESS); 6277 } 6278 6279 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 6280 { 6281 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6282 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6283 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 6284 6285 PetscFunctionBegin; 6286 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 6287 if (!pcbddc->switch_static) { 6288 if (applytranspose && pcbddc->local_auxmat1) { 6289 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 6290 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6291 } 6292 if (!reuse_solver) { 6293 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6294 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6295 } else { 6296 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6297 6298 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6299 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6300 } 6301 } else { 6302 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6303 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6304 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6305 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6306 if (applytranspose && pcbddc->local_auxmat1) { 6307 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 6308 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6309 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6310 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6311 } 6312 } 6313 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6314 if (!reuse_solver || pcbddc->switch_static) { 6315 if (applytranspose) { 6316 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6317 } else { 6318 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6319 } 6320 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 6321 } else { 6322 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6323 6324 if (applytranspose) { 6325 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6326 } else { 6327 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6328 } 6329 } 6330 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6331 PetscCall(VecSet(inout_B, 0.)); 6332 if (!pcbddc->switch_static) { 6333 if (!reuse_solver) { 6334 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6335 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6336 } else { 6337 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6338 6339 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6340 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6341 } 6342 if (!applytranspose && pcbddc->local_auxmat1) { 6343 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6344 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 6345 } 6346 } else { 6347 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6348 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6349 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6350 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6351 if (!applytranspose && pcbddc->local_auxmat1) { 6352 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6353 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 6354 } 6355 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6356 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6357 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6358 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6359 } 6360 PetscFunctionReturn(PETSC_SUCCESS); 6361 } 6362 6363 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 6364 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 6365 { 6366 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6367 PC_IS *pcis = (PC_IS *)pc->data; 6368 const PetscScalar zero = 0.0; 6369 6370 PetscFunctionBegin; 6371 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 6372 if (!pcbddc->benign_apply_coarse_only) { 6373 if (applytranspose) { 6374 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 6375 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6376 } else { 6377 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 6378 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6379 } 6380 } else { 6381 PetscCall(VecSet(pcbddc->vec1_P, zero)); 6382 } 6383 6384 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 6385 if (pcbddc->benign_n) { 6386 PetscScalar *array; 6387 PetscInt j; 6388 6389 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6390 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 6391 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6392 } 6393 6394 /* start communications from local primal nodes to rhs of coarse solver */ 6395 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 6396 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 6397 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 6398 6399 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 6400 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6401 if (pcbddc->coarse_ksp) { 6402 Mat coarse_mat; 6403 Vec rhs, sol; 6404 MatNullSpace nullsp; 6405 PetscBool isbddc = PETSC_FALSE; 6406 6407 if (pcbddc->benign_have_null) { 6408 PC coarse_pc; 6409 6410 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6411 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 6412 /* we need to propagate to coarser levels the need for a possible benign correction */ 6413 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 6414 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6415 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 6416 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 6417 } 6418 } 6419 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 6420 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 6421 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 6422 if (applytranspose) { 6423 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 6424 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 6425 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6426 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 6427 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6428 } else { 6429 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 6430 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 6431 PC coarse_pc; 6432 6433 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 6434 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6435 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 6436 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 6437 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 6438 } else { 6439 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 6440 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6441 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6442 } 6443 } 6444 /* we don't need the benign correction at coarser levels anymore */ 6445 if (pcbddc->benign_have_null && isbddc) { 6446 PC coarse_pc; 6447 PC_BDDC *coarsepcbddc; 6448 6449 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6450 coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6451 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 6452 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 6453 } 6454 } 6455 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6456 6457 /* Local solution on R nodes */ 6458 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 6459 /* communications from coarse sol to local primal nodes */ 6460 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 6461 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 6462 6463 /* Sum contributions from the two levels */ 6464 if (!pcbddc->benign_apply_coarse_only) { 6465 if (applytranspose) { 6466 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6467 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6468 } else { 6469 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6470 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6471 } 6472 /* store p0 */ 6473 if (pcbddc->benign_n) { 6474 PetscScalar *array; 6475 PetscInt j; 6476 6477 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6478 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 6479 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6480 } 6481 } else { /* expand the coarse solution */ 6482 if (applytranspose) { 6483 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 6484 } else { 6485 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 6486 } 6487 } 6488 PetscFunctionReturn(PETSC_SUCCESS); 6489 } 6490 6491 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 6492 { 6493 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6494 Vec from, to; 6495 const PetscScalar *array; 6496 6497 PetscFunctionBegin; 6498 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6499 from = pcbddc->coarse_vec; 6500 to = pcbddc->vec1_P; 6501 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6502 Vec tvec; 6503 6504 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6505 PetscCall(VecResetArray(tvec)); 6506 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 6507 PetscCall(VecGetArrayRead(tvec, &array)); 6508 PetscCall(VecPlaceArray(from, array)); 6509 PetscCall(VecRestoreArrayRead(tvec, &array)); 6510 } 6511 } else { /* from local to global -> put data in coarse right-hand side */ 6512 from = pcbddc->vec1_P; 6513 to = pcbddc->coarse_vec; 6514 } 6515 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6516 PetscFunctionReturn(PETSC_SUCCESS); 6517 } 6518 6519 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6520 { 6521 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6522 Vec from, to; 6523 const PetscScalar *array; 6524 6525 PetscFunctionBegin; 6526 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6527 from = pcbddc->coarse_vec; 6528 to = pcbddc->vec1_P; 6529 } else { /* from local to global -> put data in coarse right-hand side */ 6530 from = pcbddc->vec1_P; 6531 to = pcbddc->coarse_vec; 6532 } 6533 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6534 if (smode == SCATTER_FORWARD) { 6535 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6536 Vec tvec; 6537 6538 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6539 PetscCall(VecGetArrayRead(to, &array)); 6540 PetscCall(VecPlaceArray(tvec, array)); 6541 PetscCall(VecRestoreArrayRead(to, &array)); 6542 } 6543 } else { 6544 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6545 PetscCall(VecResetArray(from)); 6546 } 6547 } 6548 PetscFunctionReturn(PETSC_SUCCESS); 6549 } 6550 6551 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6552 { 6553 PC_IS *pcis = (PC_IS *)pc->data; 6554 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6555 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6556 /* one and zero */ 6557 PetscScalar one = 1.0, zero = 0.0; 6558 /* space to store constraints and their local indices */ 6559 PetscScalar *constraints_data; 6560 PetscInt *constraints_idxs, *constraints_idxs_B; 6561 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 6562 PetscInt *constraints_n; 6563 /* iterators */ 6564 PetscInt i, j, k, total_counts, total_counts_cc, cum; 6565 /* BLAS integers */ 6566 PetscBLASInt lwork, lierr; 6567 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 6568 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 6569 /* reuse */ 6570 PetscInt olocal_primal_size, olocal_primal_size_cc; 6571 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 6572 /* change of basis */ 6573 PetscBool qr_needed; 6574 PetscBT change_basis, qr_needed_idx; 6575 /* auxiliary stuff */ 6576 PetscInt *nnz, *is_indices; 6577 PetscInt ncc; 6578 /* some quantities */ 6579 PetscInt n_vertices, total_primal_vertices, valid_constraints; 6580 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 6581 PetscReal tol; /* tolerance for retaining eigenmodes */ 6582 6583 PetscFunctionBegin; 6584 tol = PetscSqrtReal(PETSC_SMALL); 6585 /* Destroy Mat objects computed previously */ 6586 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6587 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6588 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6589 /* save info on constraints from previous setup (if any) */ 6590 olocal_primal_size = pcbddc->local_primal_size; 6591 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6592 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 6593 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 6594 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 6595 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 6596 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6597 6598 if (!pcbddc->adaptive_selection) { 6599 IS ISForVertices, *ISForFaces, *ISForEdges; 6600 MatNullSpace nearnullsp; 6601 const Vec *nearnullvecs; 6602 Vec *localnearnullsp; 6603 PetscScalar *array; 6604 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 6605 PetscBool nnsp_has_cnst; 6606 /* LAPACK working arrays for SVD or POD */ 6607 PetscBool skip_lapack, boolforchange; 6608 PetscScalar *work; 6609 PetscReal *singular_vals; 6610 #if defined(PETSC_USE_COMPLEX) 6611 PetscReal *rwork; 6612 #endif 6613 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 6614 PetscBLASInt dummy_int = 1; 6615 PetscScalar dummy_scalar = 1.; 6616 PetscBool use_pod = PETSC_FALSE; 6617 6618 /* MKL SVD with same input gives different results on different processes! */ 6619 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6620 use_pod = PETSC_TRUE; 6621 #endif 6622 /* Get index sets for faces, edges and vertices from graph */ 6623 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 6624 o_nf = n_ISForFaces; 6625 o_ne = n_ISForEdges; 6626 n_vertices = 0; 6627 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 6628 /* print some info */ 6629 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6630 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6631 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 6632 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6633 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6634 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 6635 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 6636 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 6637 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6638 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6639 } 6640 6641 if (!pcbddc->use_vertices) n_vertices = 0; 6642 if (!pcbddc->use_edges) n_ISForEdges = 0; 6643 if (!pcbddc->use_faces) n_ISForFaces = 0; 6644 6645 /* check if near null space is attached to global mat */ 6646 if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 6647 else nearnullsp = NULL; 6648 6649 if (nearnullsp) { 6650 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 6651 /* remove any stored info */ 6652 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6653 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6654 /* store information for BDDC solver reuse */ 6655 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6656 pcbddc->onearnullspace = nearnullsp; 6657 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 6658 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 6659 } else { /* if near null space is not provided BDDC uses constants by default */ 6660 nnsp_size = 0; 6661 nnsp_has_cnst = PETSC_TRUE; 6662 } 6663 /* get max number of constraints on a single cc */ 6664 max_constraints = nnsp_size; 6665 if (nnsp_has_cnst) max_constraints++; 6666 6667 /* 6668 Evaluate maximum storage size needed by the procedure 6669 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6670 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6671 There can be multiple constraints per connected component 6672 */ 6673 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 6674 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 6675 6676 total_counts = n_ISForFaces + n_ISForEdges; 6677 total_counts *= max_constraints; 6678 total_counts += n_vertices; 6679 PetscCall(PetscBTCreate(total_counts, &change_basis)); 6680 6681 total_counts = 0; 6682 max_size_of_constraint = 0; 6683 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 6684 IS used_is; 6685 if (i < n_ISForEdges) { 6686 used_is = ISForEdges[i]; 6687 } else { 6688 used_is = ISForFaces[i - n_ISForEdges]; 6689 } 6690 PetscCall(ISGetSize(used_is, &j)); 6691 total_counts += j; 6692 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6693 } 6694 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6695 6696 /* get local part of global near null space vectors */ 6697 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6698 for (k = 0; k < nnsp_size; k++) { 6699 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6700 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6701 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6702 } 6703 6704 /* whether or not to skip lapack calls */ 6705 skip_lapack = PETSC_TRUE; 6706 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6707 6708 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6709 if (!skip_lapack) { 6710 PetscScalar temp_work; 6711 6712 if (use_pod) { 6713 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6714 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6715 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6716 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6717 #if defined(PETSC_USE_COMPLEX) 6718 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6719 #endif 6720 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6721 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6722 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6723 lwork = -1; 6724 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6725 #if !defined(PETSC_USE_COMPLEX) 6726 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6727 #else 6728 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6729 #endif 6730 PetscCall(PetscFPTrapPop()); 6731 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6732 } else { 6733 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6734 /* SVD */ 6735 PetscInt max_n, min_n; 6736 max_n = max_size_of_constraint; 6737 min_n = max_constraints; 6738 if (max_size_of_constraint < max_constraints) { 6739 min_n = max_size_of_constraint; 6740 max_n = max_constraints; 6741 } 6742 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6743 #if defined(PETSC_USE_COMPLEX) 6744 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6745 #endif 6746 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6747 lwork = -1; 6748 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6749 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6750 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6751 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6752 #if !defined(PETSC_USE_COMPLEX) 6753 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6754 #else 6755 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6756 #endif 6757 PetscCall(PetscFPTrapPop()); 6758 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6759 #else 6760 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6761 #endif /* on missing GESVD */ 6762 } 6763 /* Allocate optimal workspace */ 6764 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6765 PetscCall(PetscMalloc1(lwork, &work)); 6766 } 6767 /* Now we can loop on constraining sets */ 6768 total_counts = 0; 6769 constraints_idxs_ptr[0] = 0; 6770 constraints_data_ptr[0] = 0; 6771 /* vertices */ 6772 if (n_vertices) { 6773 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6774 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6775 for (i = 0; i < n_vertices; i++) { 6776 constraints_n[total_counts] = 1; 6777 constraints_data[total_counts] = 1.0; 6778 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6779 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6780 total_counts++; 6781 } 6782 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6783 } 6784 6785 /* edges and faces */ 6786 total_counts_cc = total_counts; 6787 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6788 IS used_is; 6789 PetscBool idxs_copied = PETSC_FALSE; 6790 6791 if (ncc < n_ISForEdges) { 6792 used_is = ISForEdges[ncc]; 6793 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6794 } else { 6795 used_is = ISForFaces[ncc - n_ISForEdges]; 6796 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6797 } 6798 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6799 6800 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6801 if (!size_of_constraint) continue; 6802 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6803 if (nnsp_has_cnst) { 6804 PetscScalar quad_value; 6805 6806 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6807 idxs_copied = PETSC_TRUE; 6808 6809 if (!pcbddc->use_nnsp_true) { 6810 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6811 } else { 6812 quad_value = 1.0; 6813 } 6814 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6815 temp_constraints++; 6816 total_counts++; 6817 } 6818 for (k = 0; k < nnsp_size; k++) { 6819 PetscReal real_value; 6820 PetscScalar *ptr_to_data; 6821 6822 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6823 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6824 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6825 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6826 /* check if array is null on the connected component */ 6827 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6828 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6829 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6830 temp_constraints++; 6831 total_counts++; 6832 if (!idxs_copied) { 6833 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6834 idxs_copied = PETSC_TRUE; 6835 } 6836 } 6837 } 6838 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6839 valid_constraints = temp_constraints; 6840 if (!pcbddc->use_nnsp_true && temp_constraints) { 6841 if (temp_constraints == 1) { /* just normalize the constraint */ 6842 PetscScalar norm, *ptr_to_data; 6843 6844 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6845 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6846 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6847 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6848 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6849 } else { /* perform SVD */ 6850 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6851 6852 if (use_pod) { 6853 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6854 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6855 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6856 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6857 from that computed using LAPACKgesvd 6858 -> This is due to a different computation of eigenvectors in LAPACKheev 6859 -> The quality of the POD-computed basis will be the same */ 6860 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6861 /* Store upper triangular part of correlation matrix */ 6862 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6863 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6864 for (j = 0; j < temp_constraints; j++) { 6865 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6866 } 6867 /* compute eigenvalues and eigenvectors of correlation matrix */ 6868 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6869 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6870 #if !defined(PETSC_USE_COMPLEX) 6871 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6872 #else 6873 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6874 #endif 6875 PetscCall(PetscFPTrapPop()); 6876 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6877 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6878 j = 0; 6879 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6880 total_counts = total_counts - j; 6881 valid_constraints = temp_constraints - j; 6882 /* scale and copy POD basis into used quadrature memory */ 6883 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6884 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6885 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6886 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6887 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6888 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6889 if (j < temp_constraints) { 6890 PetscInt ii; 6891 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6892 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6893 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6894 PetscCall(PetscFPTrapPop()); 6895 for (k = 0; k < temp_constraints - j; k++) { 6896 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6897 } 6898 } 6899 } else { 6900 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6901 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6902 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6903 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6904 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6905 #if !defined(PETSC_USE_COMPLEX) 6906 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6907 #else 6908 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6909 #endif 6910 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6911 PetscCall(PetscFPTrapPop()); 6912 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6913 k = temp_constraints; 6914 if (k > size_of_constraint) k = size_of_constraint; 6915 j = 0; 6916 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6917 valid_constraints = k - j; 6918 total_counts = total_counts - temp_constraints + valid_constraints; 6919 #else 6920 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6921 #endif /* on missing GESVD */ 6922 } 6923 } 6924 } 6925 /* update pointers information */ 6926 if (valid_constraints) { 6927 constraints_n[total_counts_cc] = valid_constraints; 6928 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6929 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6930 /* set change_of_basis flag */ 6931 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6932 total_counts_cc++; 6933 } 6934 } 6935 /* free workspace */ 6936 if (!skip_lapack) { 6937 PetscCall(PetscFree(work)); 6938 #if defined(PETSC_USE_COMPLEX) 6939 PetscCall(PetscFree(rwork)); 6940 #endif 6941 PetscCall(PetscFree(singular_vals)); 6942 PetscCall(PetscFree(correlation_mat)); 6943 PetscCall(PetscFree(temp_basis)); 6944 } 6945 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6946 PetscCall(PetscFree(localnearnullsp)); 6947 /* free index sets of faces, edges and vertices */ 6948 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6949 } else { 6950 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6951 6952 total_counts = 0; 6953 n_vertices = 0; 6954 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6955 max_constraints = 0; 6956 total_counts_cc = 0; 6957 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6958 total_counts += pcbddc->adaptive_constraints_n[i]; 6959 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6960 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6961 } 6962 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6963 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6964 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6965 constraints_data = pcbddc->adaptive_constraints_data; 6966 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6967 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6968 total_counts_cc = 0; 6969 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6970 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6971 } 6972 6973 max_size_of_constraint = 0; 6974 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6975 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6976 /* Change of basis */ 6977 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6978 if (pcbddc->use_change_of_basis) { 6979 for (i = 0; i < sub_schurs->n_subs; i++) { 6980 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6981 } 6982 } 6983 } 6984 pcbddc->local_primal_size = total_counts; 6985 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6986 6987 /* map constraints_idxs in boundary numbering */ 6988 if (pcbddc->use_change_of_basis) { 6989 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6990 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6991 } 6992 6993 /* Create constraint matrix */ 6994 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6995 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6996 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6997 6998 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6999 /* determine if a QR strategy is needed for change of basis */ 7000 qr_needed = pcbddc->use_qr_single; 7001 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 7002 total_primal_vertices = 0; 7003 pcbddc->local_primal_size_cc = 0; 7004 for (i = 0; i < total_counts_cc; i++) { 7005 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7006 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 7007 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 7008 pcbddc->local_primal_size_cc += 1; 7009 } else if (PetscBTLookup(change_basis, i)) { 7010 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 7011 pcbddc->local_primal_size_cc += constraints_n[i]; 7012 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 7013 PetscCall(PetscBTSet(qr_needed_idx, i)); 7014 qr_needed = PETSC_TRUE; 7015 } 7016 } else { 7017 pcbddc->local_primal_size_cc += 1; 7018 } 7019 } 7020 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 7021 pcbddc->n_vertices = total_primal_vertices; 7022 /* permute indices in order to have a sorted set of vertices */ 7023 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 7024 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 7025 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 7026 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 7027 7028 /* nonzero structure of constraint matrix */ 7029 /* and get reference dof for local constraints */ 7030 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 7031 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 7032 7033 j = total_primal_vertices; 7034 total_counts = total_primal_vertices; 7035 cum = total_primal_vertices; 7036 for (i = n_vertices; i < total_counts_cc; i++) { 7037 if (!PetscBTLookup(change_basis, i)) { 7038 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 7039 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 7040 cum++; 7041 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7042 for (k = 0; k < constraints_n[i]; k++) { 7043 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 7044 nnz[j + k] = size_of_constraint; 7045 } 7046 j += constraints_n[i]; 7047 } 7048 } 7049 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 7050 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 7051 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 7052 PetscCall(PetscFree(nnz)); 7053 7054 /* set values in constraint matrix */ 7055 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 7056 total_counts = total_primal_vertices; 7057 for (i = n_vertices; i < total_counts_cc; i++) { 7058 if (!PetscBTLookup(change_basis, i)) { 7059 PetscInt *cols; 7060 7061 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7062 cols = constraints_idxs + constraints_idxs_ptr[i]; 7063 for (k = 0; k < constraints_n[i]; k++) { 7064 PetscInt row = total_counts + k; 7065 PetscScalar *vals; 7066 7067 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 7068 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 7069 } 7070 total_counts += constraints_n[i]; 7071 } 7072 } 7073 /* assembling */ 7074 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 7075 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 7076 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 7077 7078 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 7079 if (pcbddc->use_change_of_basis) { 7080 /* dual and primal dofs on a single cc */ 7081 PetscInt dual_dofs, primal_dofs; 7082 /* working stuff for GEQRF */ 7083 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 7084 PetscBLASInt lqr_work; 7085 /* working stuff for UNGQR */ 7086 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 7087 PetscBLASInt lgqr_work; 7088 /* working stuff for TRTRS */ 7089 PetscScalar *trs_rhs = NULL; 7090 PetscBLASInt Blas_NRHS; 7091 /* pointers for values insertion into change of basis matrix */ 7092 PetscInt *start_rows, *start_cols; 7093 PetscScalar *start_vals; 7094 /* working stuff for values insertion */ 7095 PetscBT is_primal; 7096 PetscInt *aux_primal_numbering_B; 7097 /* matrix sizes */ 7098 PetscInt global_size, local_size; 7099 /* temporary change of basis */ 7100 Mat localChangeOfBasisMatrix; 7101 /* extra space for debugging */ 7102 PetscScalar *dbg_work = NULL; 7103 7104 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 7105 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 7106 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 7107 /* nonzeros for local mat */ 7108 PetscCall(PetscMalloc1(pcis->n, &nnz)); 7109 if (!pcbddc->benign_change || pcbddc->fake_change) { 7110 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 7111 } else { 7112 const PetscInt *ii; 7113 PetscInt n; 7114 PetscBool flg_row; 7115 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 7116 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 7117 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 7118 } 7119 for (i = n_vertices; i < total_counts_cc; i++) { 7120 if (PetscBTLookup(change_basis, i)) { 7121 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7122 if (PetscBTLookup(qr_needed_idx, i)) { 7123 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 7124 } else { 7125 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 7126 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 7127 } 7128 } 7129 } 7130 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 7131 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 7132 PetscCall(PetscFree(nnz)); 7133 /* Set interior change in the matrix */ 7134 if (!pcbddc->benign_change || pcbddc->fake_change) { 7135 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 7136 } else { 7137 const PetscInt *ii, *jj; 7138 PetscScalar *aa; 7139 PetscInt n; 7140 PetscBool flg_row; 7141 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 7142 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 7143 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 7144 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 7145 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 7146 } 7147 7148 if (pcbddc->dbg_flag) { 7149 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 7150 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 7151 } 7152 7153 /* Now we loop on the constraints which need a change of basis */ 7154 /* 7155 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 7156 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 7157 7158 Basic blocks of change of basis matrix T computed: 7159 7160 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 7161 7162 | 1 0 ... 0 s_1/S | 7163 | 0 1 ... 0 s_2/S | 7164 | ... | 7165 | 0 ... 1 s_{n-1}/S | 7166 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 7167 7168 with S = \sum_{i=1}^n s_i^2 7169 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 7170 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 7171 7172 - QR decomposition of constraints otherwise 7173 */ 7174 if (qr_needed && max_size_of_constraint) { 7175 /* space to store Q */ 7176 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 7177 /* array to store scaling factors for reflectors */ 7178 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 7179 /* first we issue queries for optimal work */ 7180 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7181 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 7182 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7183 lqr_work = -1; 7184 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 7185 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7186 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 7187 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 7188 lgqr_work = -1; 7189 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7190 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 7191 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 7192 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7193 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 7194 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 7195 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7196 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 7197 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 7198 /* array to store rhs and solution of triangular solver */ 7199 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 7200 /* allocating workspace for check */ 7201 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 7202 } 7203 /* array to store whether a node is primal or not */ 7204 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 7205 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 7206 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 7207 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 7208 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 7209 PetscCall(PetscFree(aux_primal_numbering_B)); 7210 7211 /* loop on constraints and see whether or not they need a change of basis and compute it */ 7212 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 7213 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 7214 if (PetscBTLookup(change_basis, total_counts)) { 7215 /* get constraint info */ 7216 primal_dofs = constraints_n[total_counts]; 7217 dual_dofs = size_of_constraint - primal_dofs; 7218 7219 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 7220 7221 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 7222 7223 /* copy quadrature constraints for change of basis check */ 7224 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7225 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 7226 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7227 7228 /* compute QR decomposition of constraints */ 7229 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7230 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7231 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7232 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7233 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 7234 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7235 PetscCall(PetscFPTrapPop()); 7236 7237 /* explicitly compute R^-T */ 7238 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 7239 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 7240 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7241 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 7242 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7243 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7244 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7245 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 7246 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr); 7247 PetscCall(PetscFPTrapPop()); 7248 7249 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 7250 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7251 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7252 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7253 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7254 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7255 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 7256 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7257 PetscCall(PetscFPTrapPop()); 7258 7259 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 7260 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 7261 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 7262 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7263 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7264 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7265 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7266 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7267 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 7268 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7269 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 7270 PetscCall(PetscFPTrapPop()); 7271 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7272 7273 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 7274 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 7275 /* insert cols for primal dofs */ 7276 for (j = 0; j < primal_dofs; j++) { 7277 start_vals = &qr_basis[j * size_of_constraint]; 7278 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7279 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7280 } 7281 /* insert cols for dual dofs */ 7282 for (j = 0, k = 0; j < dual_dofs; k++) { 7283 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 7284 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 7285 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7286 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7287 j++; 7288 } 7289 } 7290 7291 /* check change of basis */ 7292 if (pcbddc->dbg_flag) { 7293 PetscInt ii, jj; 7294 PetscBool valid_qr = PETSC_TRUE; 7295 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 7296 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7297 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 7298 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7299 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 7300 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 7301 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7302 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 7303 PetscCall(PetscFPTrapPop()); 7304 for (jj = 0; jj < size_of_constraint; jj++) { 7305 for (ii = 0; ii < primal_dofs; ii++) { 7306 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 7307 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 7308 } 7309 } 7310 if (!valid_qr) { 7311 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 7312 for (jj = 0; jj < size_of_constraint; jj++) { 7313 for (ii = 0; ii < primal_dofs; ii++) { 7314 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 7315 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7316 } 7317 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 7318 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7319 } 7320 } 7321 } 7322 } else { 7323 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 7324 } 7325 } 7326 } else { /* simple transformation block */ 7327 PetscInt row, col; 7328 PetscScalar val, norm; 7329 7330 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7331 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 7332 for (j = 0; j < size_of_constraint; j++) { 7333 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 7334 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7335 if (!PetscBTLookup(is_primal, row_B)) { 7336 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 7337 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 7338 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 7339 } else { 7340 for (k = 0; k < size_of_constraint; k++) { 7341 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7342 if (row != col) { 7343 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 7344 } else { 7345 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 7346 } 7347 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 7348 } 7349 } 7350 } 7351 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 7352 } 7353 } else { 7354 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 7355 } 7356 } 7357 7358 /* free workspace */ 7359 if (qr_needed) { 7360 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 7361 PetscCall(PetscFree(trs_rhs)); 7362 PetscCall(PetscFree(qr_tau)); 7363 PetscCall(PetscFree(qr_work)); 7364 PetscCall(PetscFree(gqr_work)); 7365 PetscCall(PetscFree(qr_basis)); 7366 } 7367 PetscCall(PetscBTDestroy(&is_primal)); 7368 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7369 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7370 7371 /* assembling of global change of variable */ 7372 if (!pcbddc->fake_change) { 7373 Mat tmat; 7374 7375 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 7376 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 7377 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 7378 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 7379 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 7380 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 7381 PetscCall(MatConvert(tmat, MATAIJ, MAT_INITIAL_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 7382 PetscCall(MatDestroy(&tmat)); 7383 PetscCall(VecSet(pcis->vec1_global, 0.0)); 7384 PetscCall(VecSet(pcis->vec1_N, 1.0)); 7385 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7386 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7387 PetscCall(VecReciprocal(pcis->vec1_global)); 7388 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 7389 7390 /* check */ 7391 if (pcbddc->dbg_flag) { 7392 PetscReal error; 7393 Vec x, x_change; 7394 7395 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 7396 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 7397 PetscCall(VecSetRandom(x, NULL)); 7398 PetscCall(VecCopy(x, pcis->vec1_global)); 7399 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7400 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7401 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 7402 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7403 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7404 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 7405 PetscCall(VecAXPY(x, -1.0, x_change)); 7406 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 7407 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 7408 PetscCall(VecDestroy(&x)); 7409 PetscCall(VecDestroy(&x_change)); 7410 } 7411 /* adapt sub_schurs computed (if any) */ 7412 if (pcbddc->use_deluxe_scaling) { 7413 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 7414 7415 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 7416 if (sub_schurs && sub_schurs->S_Ej_all) { 7417 Mat S_new, tmat; 7418 IS is_all_N, is_V_Sall = NULL; 7419 7420 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 7421 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 7422 if (pcbddc->deluxe_zerorows) { 7423 ISLocalToGlobalMapping NtoSall; 7424 IS is_V; 7425 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 7426 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 7427 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 7428 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 7429 PetscCall(ISDestroy(&is_V)); 7430 } 7431 PetscCall(ISDestroy(&is_all_N)); 7432 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7433 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 7434 PetscCall(PetscObjectReference((PetscObject)S_new)); 7435 if (pcbddc->deluxe_zerorows) { 7436 const PetscScalar *array; 7437 const PetscInt *idxs_V, *idxs_all; 7438 PetscInt i, n_V; 7439 7440 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7441 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 7442 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 7443 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 7444 PetscCall(VecGetArrayRead(pcis->D, &array)); 7445 for (i = 0; i < n_V; i++) { 7446 PetscScalar val; 7447 PetscInt idx; 7448 7449 idx = idxs_V[i]; 7450 val = array[idxs_all[idxs_V[i]]]; 7451 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 7452 } 7453 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 7454 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 7455 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 7456 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 7457 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 7458 } 7459 sub_schurs->S_Ej_all = S_new; 7460 PetscCall(MatDestroy(&S_new)); 7461 if (sub_schurs->sum_S_Ej_all) { 7462 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7463 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7464 PetscCall(PetscObjectReference((PetscObject)S_new)); 7465 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7466 sub_schurs->sum_S_Ej_all = S_new; 7467 PetscCall(MatDestroy(&S_new)); 7468 } 7469 PetscCall(ISDestroy(&is_V_Sall)); 7470 PetscCall(MatDestroy(&tmat)); 7471 } 7472 /* destroy any change of basis context in sub_schurs */ 7473 if (sub_schurs && sub_schurs->change) { 7474 PetscInt i; 7475 7476 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 7477 PetscCall(PetscFree(sub_schurs->change)); 7478 } 7479 } 7480 if (pcbddc->switch_static) { /* need to save the local change */ 7481 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7482 } else { 7483 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7484 } 7485 /* determine if any process has changed the pressures locally */ 7486 pcbddc->change_interior = pcbddc->benign_have_null; 7487 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7488 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7489 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7490 pcbddc->use_qr_single = qr_needed; 7491 } 7492 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7493 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7494 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7495 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7496 } else { 7497 Mat benign_global = NULL; 7498 if (pcbddc->benign_have_null) { 7499 Mat M; 7500 7501 pcbddc->change_interior = PETSC_TRUE; 7502 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 7503 PetscCall(VecReciprocal(pcis->vec1_N)); 7504 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 7505 if (pcbddc->benign_change) { 7506 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 7507 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 7508 } else { 7509 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 7510 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 7511 } 7512 PetscCall(MatISSetLocalMat(benign_global, M)); 7513 PetscCall(MatDestroy(&M)); 7514 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 7515 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 7516 } 7517 if (pcbddc->user_ChangeOfBasisMatrix) { 7518 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix)); 7519 PetscCall(MatDestroy(&benign_global)); 7520 } else if (pcbddc->benign_have_null) { 7521 pcbddc->ChangeOfBasisMatrix = benign_global; 7522 } 7523 } 7524 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7525 IS is_global; 7526 const PetscInt *gidxs; 7527 7528 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 7529 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 7530 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 7531 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 7532 PetscCall(ISDestroy(&is_global)); 7533 } 7534 } 7535 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 7536 7537 if (!pcbddc->fake_change) { 7538 /* add pressure dofs to set of primal nodes for numbering purposes */ 7539 for (i = 0; i < pcbddc->benign_n; i++) { 7540 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7541 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7542 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7543 pcbddc->local_primal_size_cc++; 7544 pcbddc->local_primal_size++; 7545 } 7546 7547 /* check if a new primal space has been introduced (also take into account benign trick) */ 7548 pcbddc->new_primal_space_local = PETSC_TRUE; 7549 if (olocal_primal_size == pcbddc->local_primal_size) { 7550 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7551 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7552 if (!pcbddc->new_primal_space_local) { 7553 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7554 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7555 } 7556 } 7557 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7558 PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7559 } 7560 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 7561 7562 /* flush dbg viewer */ 7563 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7564 7565 /* free workspace */ 7566 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7567 PetscCall(PetscBTDestroy(&change_basis)); 7568 if (!pcbddc->adaptive_selection) { 7569 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 7570 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 7571 } else { 7572 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 7573 PetscCall(PetscFree(constraints_n)); 7574 PetscCall(PetscFree(constraints_idxs_B)); 7575 } 7576 PetscFunctionReturn(PETSC_SUCCESS); 7577 } 7578 7579 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7580 { 7581 ISLocalToGlobalMapping map; 7582 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7583 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 7584 PetscInt i, N; 7585 PetscBool rcsr = PETSC_FALSE; 7586 7587 PetscFunctionBegin; 7588 if (pcbddc->recompute_topography) { 7589 pcbddc->graphanalyzed = PETSC_FALSE; 7590 /* Reset previously computed graph */ 7591 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7592 /* Init local Graph struct */ 7593 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 7594 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 7595 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 7596 7597 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 7598 /* Check validity of the csr graph passed in by the user */ 7599 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 7600 pcbddc->mat_graph->nvtxs); 7601 7602 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7603 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7604 PetscInt *xadj, *adjncy; 7605 PetscInt nvtxs; 7606 PetscBool flg_row; 7607 Mat A; 7608 7609 PetscCall(PetscObjectReference((PetscObject)matis->A)); 7610 A = matis->A; 7611 for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) { 7612 Mat AtA; 7613 7614 PetscCall(MatProductCreate(A, A, NULL, &AtA)); 7615 PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_")); 7616 PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB)); 7617 PetscCall(MatProductSetFromOptions(AtA)); 7618 PetscCall(MatProductSymbolic(AtA)); 7619 PetscCall(MatProductClear(AtA)); 7620 /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */ 7621 AtA->assembled = PETSC_TRUE; 7622 PetscCall(MatDestroy(&A)); 7623 A = AtA; 7624 } 7625 PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7626 if (flg_row) { 7627 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 7628 pcbddc->computed_rowadj = PETSC_TRUE; 7629 PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7630 rcsr = PETSC_TRUE; 7631 } 7632 PetscCall(MatDestroy(&A)); 7633 } 7634 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7635 7636 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7637 PetscReal *lcoords; 7638 PetscInt n; 7639 MPI_Datatype dimrealtype; 7640 PetscMPIInt cdimi; 7641 7642 /* TODO: support for blocked */ 7643 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 7644 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7645 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 7646 PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi)); 7647 PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype)); 7648 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7649 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7650 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7651 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7652 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7653 7654 pcbddc->mat_graph->coords = lcoords; 7655 pcbddc->mat_graph->cloc = PETSC_TRUE; 7656 pcbddc->mat_graph->cnloc = n; 7657 } 7658 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 7659 pcbddc->mat_graph->nvtxs); 7660 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7661 7662 /* attach info on disconnected subdomains if present */ 7663 if (pcbddc->n_local_subs) { 7664 PetscInt *local_subs, n, totn; 7665 7666 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7667 PetscCall(PetscMalloc1(n, &local_subs)); 7668 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 7669 for (i = 0; i < pcbddc->n_local_subs; i++) { 7670 const PetscInt *idxs; 7671 PetscInt nl, j; 7672 7673 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 7674 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 7675 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 7676 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 7677 } 7678 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 7679 pcbddc->mat_graph->n_local_subs = totn + 1; 7680 pcbddc->mat_graph->local_subs = local_subs; 7681 } 7682 7683 /* Setup of Graph */ 7684 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 7685 } 7686 7687 if (!pcbddc->graphanalyzed) { 7688 /* Graph's connected components analysis */ 7689 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7690 pcbddc->graphanalyzed = PETSC_TRUE; 7691 pcbddc->corner_selected = pcbddc->corner_selection; 7692 } 7693 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7694 PetscFunctionReturn(PETSC_SUCCESS); 7695 } 7696 7697 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7698 { 7699 PetscInt i, j, n; 7700 PetscScalar *alphas; 7701 PetscReal norm, *onorms; 7702 7703 PetscFunctionBegin; 7704 n = *nio; 7705 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7706 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7707 PetscCall(VecNormalize(vecs[0], &norm)); 7708 if (norm < PETSC_SMALL) { 7709 onorms[0] = 0.0; 7710 PetscCall(VecSet(vecs[0], 0.0)); 7711 } else { 7712 onorms[0] = norm; 7713 } 7714 7715 for (i = 1; i < n; i++) { 7716 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7717 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7718 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7719 PetscCall(VecNormalize(vecs[i], &norm)); 7720 if (norm < PETSC_SMALL) { 7721 onorms[i] = 0.0; 7722 PetscCall(VecSet(vecs[i], 0.0)); 7723 } else { 7724 onorms[i] = norm; 7725 } 7726 } 7727 /* push nonzero vectors at the beginning */ 7728 for (i = 0; i < n; i++) { 7729 if (onorms[i] == 0.0) { 7730 for (j = i + 1; j < n; j++) { 7731 if (onorms[j] != 0.0) { 7732 PetscCall(VecCopy(vecs[j], vecs[i])); 7733 onorms[i] = onorms[j]; 7734 onorms[j] = 0.0; 7735 break; 7736 } 7737 } 7738 } 7739 } 7740 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7741 PetscCall(PetscFree2(alphas, onorms)); 7742 PetscFunctionReturn(PETSC_SUCCESS); 7743 } 7744 7745 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7746 { 7747 ISLocalToGlobalMapping mapping; 7748 Mat A; 7749 PetscInt n_neighs, *neighs, *n_shared, **shared; 7750 PetscMPIInt size, rank, color; 7751 PetscInt *xadj, *adjncy; 7752 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7753 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7754 PetscInt void_procs, *procs_candidates = NULL; 7755 PetscInt xadj_count, *count; 7756 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7757 PetscSubcomm psubcomm; 7758 MPI_Comm subcomm; 7759 7760 PetscFunctionBegin; 7761 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7762 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7763 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7764 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7765 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7766 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7767 7768 if (have_void) *have_void = PETSC_FALSE; 7769 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7770 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7771 PetscCall(MatISGetLocalMat(mat, &A)); 7772 PetscCall(MatGetLocalSize(A, &n, NULL)); 7773 im_active = !!n; 7774 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7775 void_procs = size - active_procs; 7776 /* get ranks of non-active processes in mat communicator */ 7777 if (void_procs) { 7778 PetscInt ncand; 7779 7780 if (have_void) *have_void = PETSC_TRUE; 7781 PetscCall(PetscMalloc1(size, &procs_candidates)); 7782 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7783 for (i = 0, ncand = 0; i < size; i++) { 7784 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7785 } 7786 /* force n_subdomains to be not greater that the number of non-active processes */ 7787 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7788 } 7789 7790 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7791 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7792 PetscCall(MatGetSize(mat, &N, NULL)); 7793 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7794 PetscInt issize, isidx, dest; 7795 PetscBool default_sub; 7796 7797 if (*n_subdomains == 1) dest = 0; 7798 else dest = rank; 7799 if (im_active) { 7800 issize = 1; 7801 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7802 isidx = procs_candidates[dest]; 7803 } else { 7804 isidx = dest; 7805 } 7806 } else { 7807 issize = 0; 7808 isidx = rank; 7809 } 7810 if (*n_subdomains != 1) *n_subdomains = active_procs; 7811 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7812 default_sub = (PetscBool)(isidx == rank); 7813 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &default_sub, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 7814 if (default_sub) PetscCall(PetscObjectSetName((PetscObject)*is_sends, "default subassembling")); 7815 PetscCall(PetscFree(procs_candidates)); 7816 PetscFunctionReturn(PETSC_SUCCESS); 7817 } 7818 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL)); 7819 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL)); 7820 threshold = PetscMax(threshold, 2); 7821 7822 /* Get info on mapping */ 7823 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7824 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7825 7826 /* build local CSR graph of subdomains' connectivity */ 7827 PetscCall(PetscMalloc1(2, &xadj)); 7828 xadj[0] = 0; 7829 xadj[1] = PetscMax(n_neighs - 1, 0); 7830 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7831 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7832 PetscCall(PetscCalloc1(n, &count)); 7833 for (i = 1; i < n_neighs; i++) 7834 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7835 7836 xadj_count = 0; 7837 for (i = 1; i < n_neighs; i++) { 7838 for (j = 0; j < n_shared[i]; j++) { 7839 if (count[shared[i][j]] < threshold) { 7840 adjncy[xadj_count] = neighs[i]; 7841 adjncy_wgt[xadj_count] = n_shared[i]; 7842 xadj_count++; 7843 break; 7844 } 7845 } 7846 } 7847 xadj[1] = xadj_count; 7848 PetscCall(PetscFree(count)); 7849 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7850 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7851 7852 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7853 7854 /* Restrict work on active processes only */ 7855 PetscCall(PetscMPIIntCast(im_active, &color)); 7856 if (void_procs) { 7857 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7858 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7859 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7860 subcomm = PetscSubcommChild(psubcomm); 7861 } else { 7862 psubcomm = NULL; 7863 subcomm = PetscObjectComm((PetscObject)mat); 7864 } 7865 7866 v_wgt = NULL; 7867 if (!color) { 7868 PetscCall(PetscFree(xadj)); 7869 PetscCall(PetscFree(adjncy)); 7870 PetscCall(PetscFree(adjncy_wgt)); 7871 } else { 7872 Mat subdomain_adj; 7873 IS new_ranks, new_ranks_contig; 7874 MatPartitioning partitioner; 7875 PetscInt rstart, rend; 7876 PetscMPIInt irstart = 0, irend = 0; 7877 PetscInt *is_indices, *oldranks; 7878 PetscMPIInt size; 7879 PetscBool aggregate; 7880 7881 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7882 if (void_procs) { 7883 PetscInt prank = rank; 7884 PetscCall(PetscMalloc1(size, &oldranks)); 7885 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7886 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7887 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7888 } else { 7889 oldranks = NULL; 7890 } 7891 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7892 if (aggregate) { /* TODO: all this part could be made more efficient */ 7893 PetscInt lrows, row, ncols, *cols; 7894 PetscMPIInt nrank; 7895 PetscScalar *vals; 7896 7897 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7898 lrows = 0; 7899 if (nrank < redprocs) { 7900 lrows = size / redprocs; 7901 if (nrank < size % redprocs) lrows++; 7902 } 7903 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7904 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7905 PetscCall(PetscMPIIntCast(rstart, &irstart)); 7906 PetscCall(PetscMPIIntCast(rend, &irend)); 7907 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7908 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7909 row = nrank; 7910 ncols = xadj[1] - xadj[0]; 7911 cols = adjncy; 7912 PetscCall(PetscMalloc1(ncols, &vals)); 7913 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7914 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7915 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7916 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7917 PetscCall(PetscFree(xadj)); 7918 PetscCall(PetscFree(adjncy)); 7919 PetscCall(PetscFree(adjncy_wgt)); 7920 PetscCall(PetscFree(vals)); 7921 if (use_vwgt) { 7922 Vec v; 7923 const PetscScalar *array; 7924 PetscInt nl; 7925 7926 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7927 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7928 PetscCall(VecAssemblyBegin(v)); 7929 PetscCall(VecAssemblyEnd(v)); 7930 PetscCall(VecGetLocalSize(v, &nl)); 7931 PetscCall(VecGetArrayRead(v, &array)); 7932 PetscCall(PetscMalloc1(nl, &v_wgt)); 7933 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7934 PetscCall(VecRestoreArrayRead(v, &array)); 7935 PetscCall(VecDestroy(&v)); 7936 } 7937 } else { 7938 PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7939 if (use_vwgt) { 7940 PetscCall(PetscMalloc1(1, &v_wgt)); 7941 v_wgt[0] = n; 7942 } 7943 } 7944 /* PetscCall(MatView(subdomain_adj,0)); */ 7945 7946 /* Partition */ 7947 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7948 #if defined(PETSC_HAVE_PTSCOTCH) 7949 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7950 #elif defined(PETSC_HAVE_PARMETIS) 7951 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7952 #else 7953 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7954 #endif 7955 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7956 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7957 *n_subdomains = PetscMin(size, *n_subdomains); 7958 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7959 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7960 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7961 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7962 7963 /* renumber new_ranks to avoid "holes" in new set of processors */ 7964 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7965 PetscCall(ISDestroy(&new_ranks)); 7966 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7967 if (!aggregate) { 7968 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7969 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7970 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7971 } else if (oldranks) { 7972 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7973 } else { 7974 ranks_send_to_idx[0] = is_indices[0]; 7975 } 7976 } else { 7977 PetscInt idx = 0; 7978 PetscMPIInt tag; 7979 MPI_Request *reqs; 7980 7981 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7982 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7983 for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7984 PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7985 PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE)); 7986 PetscCall(PetscFree(reqs)); 7987 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7988 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7989 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7990 } else if (oldranks) { 7991 ranks_send_to_idx[0] = oldranks[idx]; 7992 } else { 7993 ranks_send_to_idx[0] = idx; 7994 } 7995 } 7996 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7997 /* clean up */ 7998 PetscCall(PetscFree(oldranks)); 7999 PetscCall(ISDestroy(&new_ranks_contig)); 8000 PetscCall(MatDestroy(&subdomain_adj)); 8001 PetscCall(MatPartitioningDestroy(&partitioner)); 8002 } 8003 PetscCall(PetscSubcommDestroy(&psubcomm)); 8004 PetscCall(PetscFree(procs_candidates)); 8005 8006 /* assemble parallel IS for sends */ 8007 i = 1; 8008 if (!color) i = 0; 8009 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 8010 PetscFunctionReturn(PETSC_SUCCESS); 8011 } 8012 8013 typedef enum { 8014 MATDENSE_PRIVATE = 0, 8015 MATAIJ_PRIVATE, 8016 MATBAIJ_PRIVATE, 8017 MATSBAIJ_PRIVATE 8018 } MatTypePrivate; 8019 8020 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 8021 { 8022 Mat local_mat; 8023 IS is_sends_internal; 8024 PetscInt rows, cols, new_local_rows; 8025 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 8026 PetscBool ismatis, isdense, newisdense, destroy_mat; 8027 ISLocalToGlobalMapping l2gmap; 8028 PetscInt *l2gmap_indices; 8029 const PetscInt *is_indices; 8030 MatType new_local_type; 8031 /* buffers */ 8032 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 8033 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 8034 PetscInt *recv_buffer_idxs_local; 8035 PetscScalar *ptr_vals, *recv_buffer_vals; 8036 const PetscScalar *send_buffer_vals; 8037 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 8038 /* MPI */ 8039 MPI_Comm comm, comm_n; 8040 PetscSubcomm subcomm; 8041 PetscMPIInt n_sends, n_recvs, size; 8042 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 8043 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 8044 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 8045 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 8046 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 8047 8048 PetscFunctionBegin; 8049 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 8050 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 8051 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 8052 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 8053 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 8054 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 8055 PetscValidLogicalCollectiveBool(mat, reuse, 6); 8056 PetscValidLogicalCollectiveInt(mat, nis, 8); 8057 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 8058 if (nvecs) { 8059 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 8060 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 8061 } 8062 /* further checks */ 8063 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8064 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 8065 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 8066 8067 PetscCall(MatGetSize(local_mat, &rows, &cols)); 8068 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 8069 if (reuse && *mat_n) { 8070 PetscInt mrows, mcols, mnrows, mncols; 8071 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 8072 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 8073 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 8074 PetscCall(MatGetSize(mat, &mrows, &mcols)); 8075 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 8076 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 8077 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 8078 } 8079 PetscCall(MatGetBlockSize(local_mat, &bs)); 8080 PetscValidLogicalCollectiveInt(mat, bs, 1); 8081 8082 /* prepare IS for sending if not provided */ 8083 if (!is_sends) { 8084 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 8085 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 8086 } else { 8087 PetscCall(PetscObjectReference((PetscObject)is_sends)); 8088 is_sends_internal = is_sends; 8089 } 8090 8091 /* get comm */ 8092 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 8093 8094 /* compute number of sends */ 8095 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 8096 PetscCall(PetscMPIIntCast(i, &n_sends)); 8097 8098 /* compute number of receives */ 8099 PetscCallMPI(MPI_Comm_size(comm, &size)); 8100 PetscCall(PetscMalloc1(size, &iflags)); 8101 PetscCall(PetscArrayzero(iflags, size)); 8102 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 8103 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 8104 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 8105 PetscCall(PetscFree(iflags)); 8106 8107 /* restrict comm if requested */ 8108 subcomm = NULL; 8109 destroy_mat = PETSC_FALSE; 8110 if (restrict_comm) { 8111 PetscMPIInt color, subcommsize; 8112 8113 color = 0; 8114 if (restrict_full) { 8115 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 8116 } else { 8117 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 8118 } 8119 PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 8120 subcommsize = size - subcommsize; 8121 /* check if reuse has been requested */ 8122 if (reuse) { 8123 if (*mat_n) { 8124 PetscMPIInt subcommsize2; 8125 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 8126 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 8127 comm_n = PetscObjectComm((PetscObject)*mat_n); 8128 } else { 8129 comm_n = PETSC_COMM_SELF; 8130 } 8131 } else { /* MAT_INITIAL_MATRIX */ 8132 PetscMPIInt rank; 8133 8134 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 8135 PetscCall(PetscSubcommCreate(comm, &subcomm)); 8136 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 8137 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 8138 comm_n = PetscSubcommChild(subcomm); 8139 } 8140 /* flag to destroy *mat_n if not significative */ 8141 if (color) destroy_mat = PETSC_TRUE; 8142 } else { 8143 comm_n = comm; 8144 } 8145 8146 /* prepare send/receive buffers */ 8147 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 8148 PetscCall(PetscArrayzero(ilengths_idxs, size)); 8149 PetscCall(PetscMalloc1(size, &ilengths_vals)); 8150 PetscCall(PetscArrayzero(ilengths_vals, size)); 8151 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 8152 8153 /* Get data from local matrices */ 8154 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 8155 /* TODO: See below some guidelines on how to prepare the local buffers */ 8156 /* 8157 send_buffer_vals should contain the raw values of the local matrix 8158 send_buffer_idxs should contain: 8159 - MatType_PRIVATE type 8160 - PetscInt size_of_l2gmap 8161 - PetscInt global_row_indices[size_of_l2gmap] 8162 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 8163 */ 8164 { 8165 ISLocalToGlobalMapping mapping; 8166 8167 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 8168 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 8169 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 8170 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 8171 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 8172 send_buffer_idxs[1] = i; 8173 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 8174 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 8175 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 8176 PetscCall(PetscMPIIntCast(i, &len)); 8177 for (i = 0; i < n_sends; i++) { 8178 ilengths_vals[is_indices[i]] = len * len; 8179 ilengths_idxs[is_indices[i]] = len + 2; 8180 } 8181 } 8182 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 8183 /* additional is (if any) */ 8184 if (nis) { 8185 PetscMPIInt psum; 8186 PetscInt j; 8187 for (j = 0, psum = 0; j < nis; j++) { 8188 PetscInt plen; 8189 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8190 PetscCall(PetscMPIIntCast(plen, &len)); 8191 psum += len + 1; /* indices + length */ 8192 } 8193 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 8194 for (j = 0, psum = 0; j < nis; j++) { 8195 PetscInt plen; 8196 const PetscInt *is_array_idxs; 8197 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8198 send_buffer_idxs_is[psum] = plen; 8199 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 8200 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 8201 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 8202 psum += plen + 1; /* indices + length */ 8203 } 8204 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 8205 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 8206 } 8207 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8208 8209 buf_size_idxs = 0; 8210 buf_size_vals = 0; 8211 buf_size_idxs_is = 0; 8212 buf_size_vecs = 0; 8213 for (i = 0; i < n_recvs; i++) { 8214 buf_size_idxs += olengths_idxs[i]; 8215 buf_size_vals += olengths_vals[i]; 8216 if (nis) buf_size_idxs_is += olengths_idxs_is[i]; 8217 if (nvecs) buf_size_vecs += olengths_idxs[i]; 8218 } 8219 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 8220 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 8221 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 8222 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 8223 8224 /* get new tags for clean communications */ 8225 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 8226 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 8227 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 8228 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 8229 8230 /* allocate for requests */ 8231 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 8232 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 8233 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 8234 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 8235 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 8236 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 8237 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 8238 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 8239 8240 /* communications */ 8241 ptr_idxs = recv_buffer_idxs; 8242 ptr_vals = recv_buffer_vals; 8243 ptr_idxs_is = recv_buffer_idxs_is; 8244 ptr_vecs = recv_buffer_vecs; 8245 for (i = 0; i < n_recvs; i++) { 8246 PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i])); 8247 PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i])); 8248 ptr_idxs += olengths_idxs[i]; 8249 ptr_vals += olengths_vals[i]; 8250 if (nis) { 8251 PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i])); 8252 ptr_idxs_is += olengths_idxs_is[i]; 8253 } 8254 if (nvecs) { 8255 PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i])); 8256 ptr_vecs += olengths_idxs[i] - 2; 8257 } 8258 } 8259 for (i = 0; i < n_sends; i++) { 8260 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 8261 PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 8262 PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 8263 if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 8264 if (nvecs) { 8265 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8266 PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 8267 } 8268 } 8269 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 8270 PetscCall(ISDestroy(&is_sends_internal)); 8271 8272 /* assemble new l2g map */ 8273 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 8274 ptr_idxs = recv_buffer_idxs; 8275 new_local_rows = 0; 8276 for (i = 0; i < n_recvs; i++) { 8277 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8278 ptr_idxs += olengths_idxs[i]; 8279 } 8280 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 8281 ptr_idxs = recv_buffer_idxs; 8282 new_local_rows = 0; 8283 for (i = 0; i < n_recvs; i++) { 8284 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 8285 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8286 ptr_idxs += olengths_idxs[i]; 8287 } 8288 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 8289 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 8290 PetscCall(PetscFree(l2gmap_indices)); 8291 8292 /* infer new local matrix type from received local matrices type */ 8293 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 8294 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 8295 if (n_recvs) { 8296 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 8297 ptr_idxs = recv_buffer_idxs; 8298 for (i = 0; i < n_recvs; i++) { 8299 if ((PetscInt)new_local_type_private != *ptr_idxs) { 8300 new_local_type_private = MATAIJ_PRIVATE; 8301 break; 8302 } 8303 ptr_idxs += olengths_idxs[i]; 8304 } 8305 switch (new_local_type_private) { 8306 case MATDENSE_PRIVATE: 8307 new_local_type = MATSEQAIJ; 8308 bs = 1; 8309 break; 8310 case MATAIJ_PRIVATE: 8311 new_local_type = MATSEQAIJ; 8312 bs = 1; 8313 break; 8314 case MATBAIJ_PRIVATE: 8315 new_local_type = MATSEQBAIJ; 8316 break; 8317 case MATSBAIJ_PRIVATE: 8318 new_local_type = MATSEQSBAIJ; 8319 break; 8320 default: 8321 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 8322 } 8323 } else { /* by default, new_local_type is seqaij */ 8324 new_local_type = MATSEQAIJ; 8325 bs = 1; 8326 } 8327 8328 /* create MATIS object if needed */ 8329 if (!reuse) { 8330 PetscCall(MatGetSize(mat, &rows, &cols)); 8331 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8332 } else { 8333 /* it also destroys the local matrices */ 8334 if (*mat_n) { 8335 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 8336 } else { /* this is a fake object */ 8337 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8338 } 8339 } 8340 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 8341 PetscCall(MatSetType(local_mat, new_local_type)); 8342 8343 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 8344 8345 /* Global to local map of received indices */ 8346 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 8347 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 8348 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 8349 8350 /* restore attributes -> type of incoming data and its size */ 8351 buf_size_idxs = 0; 8352 for (i = 0; i < n_recvs; i++) { 8353 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 8354 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 8355 buf_size_idxs += olengths_idxs[i]; 8356 } 8357 PetscCall(PetscFree(recv_buffer_idxs)); 8358 8359 /* set preallocation */ 8360 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 8361 if (!newisdense) { 8362 PetscInt *new_local_nnz = NULL; 8363 8364 ptr_idxs = recv_buffer_idxs_local; 8365 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 8366 for (i = 0; i < n_recvs; i++) { 8367 PetscInt j; 8368 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 8369 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 8370 } else { 8371 /* TODO */ 8372 } 8373 ptr_idxs += olengths_idxs[i]; 8374 } 8375 if (new_local_nnz) { 8376 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 8377 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 8378 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 8379 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8380 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 8381 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8382 } else { 8383 PetscCall(MatSetUp(local_mat)); 8384 } 8385 PetscCall(PetscFree(new_local_nnz)); 8386 } else { 8387 PetscCall(MatSetUp(local_mat)); 8388 } 8389 8390 /* set values */ 8391 ptr_vals = recv_buffer_vals; 8392 ptr_idxs = recv_buffer_idxs_local; 8393 for (i = 0; i < n_recvs; i++) { 8394 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 8395 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 8396 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 8397 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 8398 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 8399 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 8400 } else { 8401 /* TODO */ 8402 } 8403 ptr_idxs += olengths_idxs[i]; 8404 ptr_vals += olengths_vals[i]; 8405 } 8406 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 8407 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 8408 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 8409 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 8410 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 8411 PetscCall(PetscFree(recv_buffer_vals)); 8412 8413 #if 0 8414 if (!restrict_comm) { /* check */ 8415 Vec lvec,rvec; 8416 PetscReal infty_error; 8417 8418 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 8419 PetscCall(VecSetRandom(rvec,NULL)); 8420 PetscCall(MatMult(mat,rvec,lvec)); 8421 PetscCall(VecScale(lvec,-1.0)); 8422 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 8423 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 8424 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 8425 PetscCall(VecDestroy(&rvec)); 8426 PetscCall(VecDestroy(&lvec)); 8427 } 8428 #endif 8429 8430 /* assemble new additional is (if any) */ 8431 if (nis) { 8432 PetscInt **temp_idxs, *count_is, j, psum; 8433 8434 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 8435 PetscCall(PetscCalloc1(nis, &count_is)); 8436 ptr_idxs = recv_buffer_idxs_is; 8437 psum = 0; 8438 for (i = 0; i < n_recvs; i++) { 8439 for (j = 0; j < nis; j++) { 8440 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8441 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8442 psum += plen; 8443 ptr_idxs += plen + 1; /* shift pointer to received data */ 8444 } 8445 } 8446 PetscCall(PetscMalloc1(nis, &temp_idxs)); 8447 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 8448 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 8449 PetscCall(PetscArrayzero(count_is, nis)); 8450 ptr_idxs = recv_buffer_idxs_is; 8451 for (i = 0; i < n_recvs; i++) { 8452 for (j = 0; j < nis; j++) { 8453 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8454 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 8455 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8456 ptr_idxs += plen + 1; /* shift pointer to received data */ 8457 } 8458 } 8459 for (i = 0; i < nis; i++) { 8460 PetscCall(ISDestroy(&isarray[i])); 8461 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 8462 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 8463 } 8464 PetscCall(PetscFree(count_is)); 8465 PetscCall(PetscFree(temp_idxs[0])); 8466 PetscCall(PetscFree(temp_idxs)); 8467 } 8468 /* free workspace */ 8469 PetscCall(PetscFree(recv_buffer_idxs_is)); 8470 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 8471 PetscCall(PetscFree(send_buffer_idxs)); 8472 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 8473 if (isdense) { 8474 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8475 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 8476 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8477 } else { 8478 /* PetscCall(PetscFree(send_buffer_vals)); */ 8479 } 8480 if (nis) { 8481 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 8482 PetscCall(PetscFree(send_buffer_idxs_is)); 8483 } 8484 8485 if (nvecs) { 8486 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 8487 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 8488 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8489 PetscCall(VecDestroy(&nnsp_vec[0])); 8490 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 8491 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 8492 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 8493 /* set values */ 8494 ptr_vals = recv_buffer_vecs; 8495 ptr_idxs = recv_buffer_idxs_local; 8496 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8497 for (i = 0; i < n_recvs; i++) { 8498 PetscInt j; 8499 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 8500 ptr_idxs += olengths_idxs[i]; 8501 ptr_vals += olengths_idxs[i] - 2; 8502 } 8503 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8504 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8505 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8506 } 8507 8508 PetscCall(PetscFree(recv_buffer_vecs)); 8509 PetscCall(PetscFree(recv_buffer_idxs_local)); 8510 PetscCall(PetscFree(recv_req_idxs)); 8511 PetscCall(PetscFree(recv_req_vals)); 8512 PetscCall(PetscFree(recv_req_vecs)); 8513 PetscCall(PetscFree(recv_req_idxs_is)); 8514 PetscCall(PetscFree(send_req_idxs)); 8515 PetscCall(PetscFree(send_req_vals)); 8516 PetscCall(PetscFree(send_req_vecs)); 8517 PetscCall(PetscFree(send_req_idxs_is)); 8518 PetscCall(PetscFree(ilengths_vals)); 8519 PetscCall(PetscFree(ilengths_idxs)); 8520 PetscCall(PetscFree(olengths_vals)); 8521 PetscCall(PetscFree(olengths_idxs)); 8522 PetscCall(PetscFree(onodes)); 8523 if (nis) { 8524 PetscCall(PetscFree(ilengths_idxs_is)); 8525 PetscCall(PetscFree(olengths_idxs_is)); 8526 PetscCall(PetscFree(onodes_is)); 8527 } 8528 PetscCall(PetscSubcommDestroy(&subcomm)); 8529 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8530 PetscCall(MatDestroy(mat_n)); 8531 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 8532 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8533 PetscCall(VecDestroy(&nnsp_vec[0])); 8534 } 8535 *mat_n = NULL; 8536 } 8537 PetscFunctionReturn(PETSC_SUCCESS); 8538 } 8539 8540 /* temporary hack into ksp private data structure */ 8541 #include <petsc/private/kspimpl.h> 8542 8543 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat) 8544 { 8545 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8546 PC_IS *pcis = (PC_IS *)pc->data; 8547 PCBDDCGraph graph = pcbddc->mat_graph; 8548 Mat coarse_mat, coarse_mat_is; 8549 Mat coarsedivudotp = NULL; 8550 Mat coarseG, t_coarse_mat_is; 8551 MatNullSpace CoarseNullSpace = NULL; 8552 ISLocalToGlobalMapping coarse_islg; 8553 IS coarse_is, *isarray, corners; 8554 PetscInt i, im_active = -1, active_procs = -1; 8555 PetscInt nis, nisdofs, nisneu, nisvert; 8556 PetscInt coarse_eqs_per_proc, coarsening_ratio; 8557 PC pc_temp; 8558 PCType coarse_pc_type; 8559 KSPType coarse_ksp_type; 8560 PetscBool multilevel_requested, multilevel_allowed; 8561 PetscBool coarse_reuse, multi_element = graph->multi_element; 8562 PetscInt ncoarse, nedcfield; 8563 PetscBool compute_vecs = PETSC_FALSE; 8564 PetscScalar *array; 8565 MatReuse coarse_mat_reuse; 8566 PetscBool restr, full_restr, have_void; 8567 PetscMPIInt size; 8568 8569 PetscFunctionBegin; 8570 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8571 /* Assign global numbering to coarse dofs */ 8572 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8573 PetscInt ocoarse_size; 8574 compute_vecs = PETSC_TRUE; 8575 8576 pcbddc->new_primal_space = PETSC_TRUE; 8577 ocoarse_size = pcbddc->coarse_size; 8578 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8579 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 8580 /* see if we can avoid some work */ 8581 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8582 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8583 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8584 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8585 coarse_reuse = PETSC_FALSE; 8586 } else { /* we can safely reuse already computed coarse matrix */ 8587 coarse_reuse = PETSC_TRUE; 8588 } 8589 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8590 coarse_reuse = PETSC_FALSE; 8591 } 8592 /* reset any subassembling information */ 8593 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8594 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8595 coarse_reuse = PETSC_TRUE; 8596 } 8597 if (coarse_reuse && pcbddc->coarse_ksp) { 8598 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 8599 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8600 coarse_mat_reuse = MAT_REUSE_MATRIX; 8601 } else { 8602 coarse_mat = NULL; 8603 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8604 } 8605 8606 /* creates temporary l2gmap and IS for coarse indexes */ 8607 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 8608 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 8609 8610 /* creates temporary MATIS object for coarse matrix */ 8611 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is)); 8612 PetscCall(MatSetType(t_coarse_mat_is, MATIS)); 8613 PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size)); 8614 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, multi_element)); 8615 PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg)); 8616 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat)); 8617 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8618 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8619 PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view")); 8620 8621 /* count "active" (i.e. with positive local size) and "void" processes */ 8622 im_active = !!pcis->n; 8623 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8624 8625 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8626 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8627 /* full_restr : just use the receivers from the subassembling pattern */ 8628 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 8629 coarse_mat_is = NULL; 8630 multilevel_allowed = PETSC_FALSE; 8631 multilevel_requested = PETSC_FALSE; 8632 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 8633 if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1); 8634 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8635 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8636 coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio; 8637 if (multilevel_requested) { 8638 ncoarse = active_procs / coarsening_ratio; 8639 restr = PETSC_FALSE; 8640 full_restr = PETSC_FALSE; 8641 } else { 8642 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 8643 restr = PETSC_TRUE; 8644 full_restr = PETSC_TRUE; 8645 } 8646 if (!pcbddc->coarse_size || (size == 1 && !multi_element)) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8647 ncoarse = PetscMax(1, ncoarse); 8648 if (!pcbddc->coarse_subassembling) { 8649 if (coarsening_ratio > 1) { 8650 if (multilevel_requested) { 8651 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8652 } else { 8653 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8654 } 8655 } else { 8656 PetscMPIInt rank; 8657 8658 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 8659 have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE; 8660 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 8661 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_subassembling, "default subassembling")); 8662 } 8663 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8664 PetscInt psum; 8665 if (pcbddc->coarse_ksp) psum = 1; 8666 else psum = 0; 8667 PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8668 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8669 } 8670 /* determine if we can go multilevel */ 8671 if (multilevel_requested) { 8672 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8673 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8674 } 8675 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8676 8677 /* dump subassembling pattern */ 8678 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 8679 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8680 nedcfield = -1; 8681 corners = NULL; 8682 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8683 PetscInt *tidxs, *tidxs2, nout, tsize, i; 8684 const PetscInt *idxs; 8685 ISLocalToGlobalMapping tmap; 8686 8687 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8688 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 8689 /* allocate space for temporary storage */ 8690 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 8691 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 8692 /* allocate for IS array */ 8693 nisdofs = pcbddc->n_ISForDofsLocal; 8694 if (pcbddc->nedclocal) { 8695 if (pcbddc->nedfield > -1) { 8696 nedcfield = pcbddc->nedfield; 8697 } else { 8698 nedcfield = 0; 8699 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 8700 nisdofs = 1; 8701 } 8702 } 8703 nisneu = !!pcbddc->NeumannBoundariesLocal; 8704 nisvert = 0; /* nisvert is not used */ 8705 nis = nisdofs + nisneu + nisvert; 8706 PetscCall(PetscMalloc1(nis, &isarray)); 8707 /* dofs splitting */ 8708 for (i = 0; i < nisdofs; i++) { 8709 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8710 if (nedcfield != i) { 8711 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 8712 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8713 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8714 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8715 } else { 8716 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8717 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8718 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8719 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8720 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8721 } 8722 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8723 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8724 /* PetscCall(ISView(isarray[i],0)); */ 8725 } 8726 /* neumann boundaries */ 8727 if (pcbddc->NeumannBoundariesLocal) { 8728 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8729 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8730 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8731 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8732 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8733 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8734 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8735 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8736 } 8737 /* coordinates */ 8738 if (pcbddc->corner_selected) { 8739 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8740 PetscCall(ISGetLocalSize(corners, &tsize)); 8741 PetscCall(ISGetIndices(corners, &idxs)); 8742 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8743 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8744 PetscCall(ISRestoreIndices(corners, &idxs)); 8745 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8746 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8747 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8748 } 8749 PetscCall(PetscFree(tidxs)); 8750 PetscCall(PetscFree(tidxs2)); 8751 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8752 } else { 8753 nis = 0; 8754 nisdofs = 0; 8755 nisneu = 0; 8756 nisvert = 0; 8757 isarray = NULL; 8758 } 8759 /* destroy no longer needed map */ 8760 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8761 8762 /* subassemble */ 8763 if (multilevel_allowed) { 8764 Vec vp[1]; 8765 PetscInt nvecs = 0; 8766 PetscBool reuse; 8767 8768 vp[0] = NULL; 8769 /* XXX HDIV also */ 8770 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8771 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8772 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8773 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8774 nvecs = 1; 8775 8776 if (pcbddc->divudotp) { 8777 Mat B, loc_divudotp; 8778 Vec v, p; 8779 IS dummy; 8780 PetscInt np; 8781 8782 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8783 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8784 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8785 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8786 PetscCall(MatCreateVecs(B, &v, &p)); 8787 PetscCall(VecSet(p, 1.)); 8788 PetscCall(MatMultTranspose(B, p, v)); 8789 PetscCall(VecDestroy(&p)); 8790 PetscCall(MatDestroy(&B)); 8791 PetscCall(VecGetArray(vp[0], &array)); 8792 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8793 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8794 PetscCall(VecResetArray(pcbddc->vec1_P)); 8795 PetscCall(VecRestoreArray(vp[0], &array)); 8796 PetscCall(ISDestroy(&dummy)); 8797 PetscCall(VecDestroy(&v)); 8798 } 8799 } 8800 if (coarse_mat) reuse = PETSC_TRUE; 8801 else reuse = PETSC_FALSE; 8802 if (multi_element) { 8803 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8804 coarse_mat_is = t_coarse_mat_is; 8805 } else { 8806 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8807 if (reuse) { 8808 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8809 } else { 8810 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8811 } 8812 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8813 PetscScalar *arraym; 8814 const PetscScalar *arrayv; 8815 PetscInt nl; 8816 PetscCall(VecGetLocalSize(vp[0], &nl)); 8817 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8818 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8819 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8820 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8821 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8822 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8823 PetscCall(VecDestroy(&vp[0])); 8824 } else { 8825 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8826 } 8827 } 8828 } else { 8829 PetscBool default_sub; 8830 8831 PetscCall(PetscStrcmp(((PetscObject)pcbddc->coarse_subassembling)->name, "default subassembling", &default_sub)); 8832 if (!default_sub) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8833 else { 8834 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8835 coarse_mat_is = t_coarse_mat_is; 8836 } 8837 } 8838 if (coarse_mat_is || coarse_mat) { 8839 if (!multilevel_allowed) { 8840 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8841 } else { 8842 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8843 if (coarse_mat_is) { 8844 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8845 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8846 coarse_mat = coarse_mat_is; 8847 } 8848 } 8849 } 8850 PetscCall(MatDestroy(&t_coarse_mat_is)); 8851 PetscCall(MatDestroy(&coarse_mat_is)); 8852 8853 /* create local to global scatters for coarse problem */ 8854 if (compute_vecs) { 8855 PetscInt lrows; 8856 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8857 if (coarse_mat) { 8858 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8859 } else { 8860 lrows = 0; 8861 } 8862 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8863 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8864 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8865 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8866 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8867 } 8868 PetscCall(ISDestroy(&coarse_is)); 8869 8870 /* set defaults for coarse KSP and PC */ 8871 if (multilevel_allowed) { 8872 coarse_ksp_type = KSPRICHARDSON; 8873 coarse_pc_type = PCBDDC; 8874 } else { 8875 coarse_ksp_type = KSPPREONLY; 8876 coarse_pc_type = PCREDUNDANT; 8877 } 8878 8879 /* print some info if requested */ 8880 if (pcbddc->dbg_flag) { 8881 if (!multilevel_allowed) { 8882 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8883 if (multilevel_requested) { 8884 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio)); 8885 } else if (pcbddc->max_levels) { 8886 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8887 } 8888 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8889 } 8890 } 8891 8892 /* communicate coarse discrete gradient */ 8893 coarseG = NULL; 8894 if (pcbddc->nedcG && multilevel_allowed) { 8895 MPI_Comm ccomm; 8896 if (coarse_mat) { 8897 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8898 } else { 8899 ccomm = MPI_COMM_NULL; 8900 } 8901 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8902 } 8903 8904 /* create the coarse KSP object only once with defaults */ 8905 if (coarse_mat) { 8906 PetscBool isredundant, isbddc, force, valid; 8907 PetscViewer dbg_viewer = NULL; 8908 PetscBool isset, issym, isher, isspd; 8909 8910 if (pcbddc->dbg_flag) { 8911 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8912 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8913 } 8914 if (!pcbddc->coarse_ksp) { 8915 char prefix[256], str_level[16]; 8916 size_t len; 8917 8918 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8919 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8920 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8921 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8922 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1)); 8923 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8924 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8925 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8926 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8927 /* TODO is this logic correct? should check for coarse_mat type */ 8928 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8929 /* prefix */ 8930 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8931 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8932 if (!pcbddc->current_level) { 8933 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8934 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8935 } else { 8936 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8937 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8938 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8939 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8940 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8941 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 8942 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8943 } 8944 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8945 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8946 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8947 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8948 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8949 /* allow user customization */ 8950 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8951 /* get some info after set from options */ 8952 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8953 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8954 force = PETSC_FALSE; 8955 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8956 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8957 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8958 if (multilevel_allowed && !force && !valid) { 8959 isbddc = PETSC_TRUE; 8960 PetscCall(PCSetType(pc_temp, PCBDDC)); 8961 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8962 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8963 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8964 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8965 PetscObjectOptionsBegin((PetscObject)pc_temp); 8966 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8967 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8968 PetscOptionsEnd(); 8969 pc_temp->setfromoptionscalled++; 8970 } 8971 } 8972 } 8973 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8974 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8975 if (nisdofs) { 8976 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8977 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8978 } 8979 if (nisneu) { 8980 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8981 PetscCall(ISDestroy(&isarray[nisdofs])); 8982 } 8983 if (nisvert) { 8984 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8985 PetscCall(ISDestroy(&isarray[nis - 1])); 8986 } 8987 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8988 8989 /* get some info after set from options */ 8990 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8991 8992 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8993 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8994 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8995 force = PETSC_FALSE; 8996 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8997 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8998 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8999 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 9000 if (isredundant) { 9001 KSP inner_ksp; 9002 PC inner_pc; 9003 9004 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 9005 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 9006 } 9007 9008 /* parameters which miss an API */ 9009 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 9010 if (isbddc) { 9011 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 9012 9013 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 9014 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 9015 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 9016 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 9017 if (pcbddc_coarse->benign_saddle_point) { 9018 Mat coarsedivudotp_is; 9019 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 9020 IS row, col; 9021 const PetscInt *gidxs; 9022 PetscInt n, st, M, N; 9023 9024 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 9025 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 9026 st = st - n; 9027 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 9028 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 9029 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 9030 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 9031 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 9032 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 9033 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 9034 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 9035 PetscCall(ISGetSize(row, &M)); 9036 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 9037 PetscCall(ISDestroy(&row)); 9038 PetscCall(ISDestroy(&col)); 9039 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 9040 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 9041 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 9042 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 9043 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 9044 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 9045 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 9046 PetscCall(MatDestroy(&coarsedivudotp)); 9047 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 9048 PetscCall(MatDestroy(&coarsedivudotp_is)); 9049 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 9050 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 9051 } 9052 } 9053 9054 /* propagate symmetry info of coarse matrix */ 9055 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 9056 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 9057 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 9058 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 9059 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 9060 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 9061 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 9062 9063 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 9064 /* set operators */ 9065 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 9066 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 9067 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 9068 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 9069 } 9070 PetscCall(MatDestroy(&coarseG)); 9071 PetscCall(PetscFree(isarray)); 9072 #if 0 9073 { 9074 PetscViewer viewer; 9075 char filename[256]; 9076 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 9077 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 9078 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 9079 PetscCall(MatView(coarse_mat,viewer)); 9080 PetscCall(PetscViewerPopFormat(viewer)); 9081 PetscCall(PetscViewerDestroy(&viewer)); 9082 } 9083 #endif 9084 9085 if (corners) { 9086 Vec gv; 9087 IS is; 9088 const PetscInt *idxs; 9089 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 9090 PetscScalar *coords; 9091 9092 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 9093 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 9094 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 9095 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 9096 PetscCall(VecSetBlockSize(gv, cdim)); 9097 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 9098 PetscCall(VecSetType(gv, VECSTANDARD)); 9099 PetscCall(VecSetFromOptions(gv)); 9100 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 9101 9102 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 9103 PetscCall(ISGetLocalSize(is, &n)); 9104 PetscCall(ISGetIndices(is, &idxs)); 9105 PetscCall(PetscMalloc1(n * cdim, &coords)); 9106 for (i = 0; i < n; i++) { 9107 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 9108 } 9109 PetscCall(ISRestoreIndices(is, &idxs)); 9110 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 9111 9112 PetscCall(ISGetLocalSize(corners, &n)); 9113 PetscCall(ISGetIndices(corners, &idxs)); 9114 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 9115 PetscCall(ISRestoreIndices(corners, &idxs)); 9116 PetscCall(PetscFree(coords)); 9117 PetscCall(VecAssemblyBegin(gv)); 9118 PetscCall(VecAssemblyEnd(gv)); 9119 PetscCall(VecGetArray(gv, &coords)); 9120 if (pcbddc->coarse_ksp) { 9121 PC coarse_pc; 9122 PetscBool isbddc; 9123 9124 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 9125 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 9126 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 9127 PetscReal *realcoords; 9128 9129 PetscCall(VecGetLocalSize(gv, &n)); 9130 #if defined(PETSC_USE_COMPLEX) 9131 PetscCall(PetscMalloc1(n, &realcoords)); 9132 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 9133 #else 9134 realcoords = coords; 9135 #endif 9136 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 9137 #if defined(PETSC_USE_COMPLEX) 9138 PetscCall(PetscFree(realcoords)); 9139 #endif 9140 } 9141 } 9142 PetscCall(VecRestoreArray(gv, &coords)); 9143 PetscCall(VecDestroy(&gv)); 9144 } 9145 PetscCall(ISDestroy(&corners)); 9146 9147 if (pcbddc->coarse_ksp) { 9148 Vec crhs, csol; 9149 9150 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 9151 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 9152 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL)); 9153 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs)); 9154 } 9155 PetscCall(MatDestroy(&coarsedivudotp)); 9156 9157 /* compute null space for coarse solver if the benign trick has been requested */ 9158 if (pcbddc->benign_null) { 9159 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 9160 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 9161 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 9162 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 9163 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9164 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9165 if (coarse_mat) { 9166 Vec nullv; 9167 PetscScalar *array, *array2; 9168 PetscInt nl; 9169 9170 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 9171 PetscCall(VecGetLocalSize(nullv, &nl)); 9172 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9173 PetscCall(VecGetArray(nullv, &array2)); 9174 PetscCall(PetscArraycpy(array2, array, nl)); 9175 PetscCall(VecRestoreArray(nullv, &array2)); 9176 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9177 PetscCall(VecNormalize(nullv, NULL)); 9178 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 9179 PetscCall(VecDestroy(&nullv)); 9180 } 9181 } 9182 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 9183 9184 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9185 if (pcbddc->coarse_ksp) { 9186 PetscBool ispreonly; 9187 9188 if (CoarseNullSpace) { 9189 PetscBool isnull; 9190 9191 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 9192 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 9193 /* TODO: add local nullspaces (if any) */ 9194 } 9195 /* setup coarse ksp */ 9196 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 9197 /* Check coarse problem if in debug mode or if solving with an iterative method */ 9198 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 9199 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 9200 KSP check_ksp; 9201 KSPType check_ksp_type; 9202 PC check_pc; 9203 Vec check_vec, coarse_vec; 9204 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 9205 PetscInt its; 9206 PetscBool compute_eigs; 9207 PetscReal *eigs_r, *eigs_c; 9208 PetscInt neigs; 9209 const char *prefix; 9210 9211 /* Create ksp object suitable for estimation of extreme eigenvalues */ 9212 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 9213 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 9214 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 9215 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 9216 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 9217 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size)); 9218 /* prevent from setup unneeded object */ 9219 PetscCall(KSPGetPC(check_ksp, &check_pc)); 9220 PetscCall(PCSetType(check_pc, PCNONE)); 9221 if (ispreonly) { 9222 check_ksp_type = KSPPREONLY; 9223 compute_eigs = PETSC_FALSE; 9224 } else { 9225 check_ksp_type = KSPGMRES; 9226 compute_eigs = PETSC_TRUE; 9227 } 9228 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 9229 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 9230 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 9231 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 9232 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 9233 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 9234 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 9235 PetscCall(KSPSetFromOptions(check_ksp)); 9236 PetscCall(KSPSetUp(check_ksp)); 9237 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 9238 PetscCall(KSPSetPC(check_ksp, check_pc)); 9239 /* create random vec */ 9240 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 9241 PetscCall(VecSetRandom(check_vec, NULL)); 9242 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9243 /* solve coarse problem */ 9244 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 9245 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 9246 /* set eigenvalue estimation if preonly has not been requested */ 9247 if (compute_eigs) { 9248 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 9249 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 9250 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 9251 if (neigs) { 9252 lambda_max = eigs_r[neigs - 1]; 9253 lambda_min = eigs_r[0]; 9254 if (pcbddc->use_coarse_estimates) { 9255 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 9256 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 9257 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 9258 } 9259 } 9260 } 9261 } 9262 9263 /* check coarse problem residual error */ 9264 if (pcbddc->dbg_flag) { 9265 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 9266 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9267 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 9268 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 9269 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9270 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 9271 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 9272 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer)); 9273 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer)); 9274 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 9275 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 9276 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 9277 if (compute_eigs) { 9278 PetscReal lambda_max_s, lambda_min_s; 9279 KSPConvergedReason reason; 9280 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 9281 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 9282 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 9283 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 9284 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 9285 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 9286 } 9287 PetscCall(PetscViewerFlush(dbg_viewer)); 9288 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9289 } 9290 PetscCall(VecDestroy(&check_vec)); 9291 PetscCall(VecDestroy(&coarse_vec)); 9292 PetscCall(KSPDestroy(&check_ksp)); 9293 if (compute_eigs) { 9294 PetscCall(PetscFree(eigs_r)); 9295 PetscCall(PetscFree(eigs_c)); 9296 } 9297 } 9298 } 9299 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 9300 /* print additional info */ 9301 if (pcbddc->dbg_flag) { 9302 /* waits until all processes reaches this point */ 9303 PetscCall(PetscBarrier((PetscObject)pc)); 9304 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 9305 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9306 } 9307 9308 /* free memory */ 9309 PetscCall(MatDestroy(&coarse_mat)); 9310 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9311 PetscFunctionReturn(PETSC_SUCCESS); 9312 } 9313 9314 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 9315 { 9316 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9317 PC_IS *pcis = (PC_IS *)pc->data; 9318 IS subset, subset_mult, subset_n; 9319 PetscInt local_size, coarse_size = 0; 9320 PetscInt *local_primal_indices = NULL; 9321 const PetscInt *t_local_primal_indices; 9322 9323 PetscFunctionBegin; 9324 /* Compute global number of coarse dofs */ 9325 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 9326 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 9327 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 9328 PetscCall(ISDestroy(&subset_n)); 9329 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 9330 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 9331 PetscCall(ISDestroy(&subset)); 9332 PetscCall(ISDestroy(&subset_mult)); 9333 PetscCall(ISGetLocalSize(subset_n, &local_size)); 9334 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 9335 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 9336 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 9337 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 9338 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 9339 PetscCall(ISDestroy(&subset_n)); 9340 9341 if (pcbddc->dbg_flag) { 9342 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9343 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 9344 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size)); 9345 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9346 } 9347 9348 /* get back data */ 9349 *coarse_size_n = coarse_size; 9350 *local_primal_indices_n = local_primal_indices; 9351 PetscFunctionReturn(PETSC_SUCCESS); 9352 } 9353 9354 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 9355 { 9356 IS localis_t; 9357 PetscInt i, lsize, *idxs, n; 9358 PetscScalar *vals; 9359 9360 PetscFunctionBegin; 9361 /* get indices in local ordering exploiting local to global map */ 9362 PetscCall(ISGetLocalSize(globalis, &lsize)); 9363 PetscCall(PetscMalloc1(lsize, &vals)); 9364 for (i = 0; i < lsize; i++) vals[i] = 1.0; 9365 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 9366 PetscCall(VecSet(gwork, 0.0)); 9367 PetscCall(VecSet(lwork, 0.0)); 9368 if (idxs) { /* multilevel guard */ 9369 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 9370 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 9371 } 9372 PetscCall(VecAssemblyBegin(gwork)); 9373 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 9374 PetscCall(PetscFree(vals)); 9375 PetscCall(VecAssemblyEnd(gwork)); 9376 /* now compute set in local ordering */ 9377 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9378 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9379 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 9380 PetscCall(VecGetSize(lwork, &n)); 9381 for (i = 0, lsize = 0; i < n; i++) { 9382 if (PetscRealPart(vals[i]) > 0.5) lsize++; 9383 } 9384 PetscCall(PetscMalloc1(lsize, &idxs)); 9385 for (i = 0, lsize = 0; i < n; i++) { 9386 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 9387 } 9388 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 9389 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 9390 *localis = localis_t; 9391 PetscFunctionReturn(PETSC_SUCCESS); 9392 } 9393 9394 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 9395 { 9396 PC_IS *pcis = (PC_IS *)pc->data; 9397 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9398 PC_IS *pcisf; 9399 PC_BDDC *pcbddcf; 9400 PC pcf; 9401 9402 PetscFunctionBegin; 9403 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 9404 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 9405 PetscCall(PCSetType(pcf, PCBDDC)); 9406 9407 pcisf = (PC_IS *)pcf->data; 9408 pcbddcf = (PC_BDDC *)pcf->data; 9409 9410 pcisf->is_B_local = pcis->is_B_local; 9411 pcisf->vec1_N = pcis->vec1_N; 9412 pcisf->BtoNmap = pcis->BtoNmap; 9413 pcisf->n = pcis->n; 9414 pcisf->n_B = pcis->n_B; 9415 9416 PetscCall(PetscFree(pcbddcf->mat_graph)); 9417 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9418 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 9419 pcbddcf->sub_schurs = schurs; 9420 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9421 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9422 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9423 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9424 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9425 pcbddcf->use_faces = PETSC_TRUE; 9426 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9427 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9428 pcbddcf->use_qr_single = (PetscBool)!constraints; 9429 pcbddcf->fake_change = PETSC_TRUE; 9430 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9431 9432 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9433 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9434 9435 *change = pcbddcf->ConstraintMatrix; 9436 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 9437 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 9438 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9439 9440 if (schurs) pcbddcf->sub_schurs = NULL; 9441 pcbddcf->ConstraintMatrix = NULL; 9442 pcbddcf->mat_graph = NULL; 9443 pcisf->is_B_local = NULL; 9444 pcisf->vec1_N = NULL; 9445 pcisf->BtoNmap = NULL; 9446 PetscCall(PCDestroy(&pcf)); 9447 PetscFunctionReturn(PETSC_SUCCESS); 9448 } 9449 9450 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9451 { 9452 PC_IS *pcis = (PC_IS *)pc->data; 9453 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9454 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 9455 Mat S_j; 9456 PetscInt *used_xadj, *used_adjncy; 9457 PetscBool free_used_adj; 9458 9459 PetscFunctionBegin; 9460 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9461 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9462 free_used_adj = PETSC_FALSE; 9463 if (pcbddc->sub_schurs_layers == -1) { 9464 used_xadj = NULL; 9465 used_adjncy = NULL; 9466 } else { 9467 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9468 used_xadj = pcbddc->mat_graph->xadj; 9469 used_adjncy = pcbddc->mat_graph->adjncy; 9470 } else if (pcbddc->computed_rowadj) { 9471 used_xadj = pcbddc->mat_graph->xadj; 9472 used_adjncy = pcbddc->mat_graph->adjncy; 9473 } else { 9474 PetscBool flg_row = PETSC_FALSE; 9475 const PetscInt *xadj, *adjncy; 9476 PetscInt nvtxs; 9477 9478 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9479 if (flg_row) { 9480 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 9481 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 9482 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 9483 free_used_adj = PETSC_TRUE; 9484 } else { 9485 pcbddc->sub_schurs_layers = -1; 9486 used_xadj = NULL; 9487 used_adjncy = NULL; 9488 } 9489 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9490 } 9491 } 9492 9493 /* setup sub_schurs data */ 9494 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 9495 if (!sub_schurs->schur_explicit) { 9496 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9497 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 9498 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 9499 } else { 9500 Mat change = NULL; 9501 Vec scaling = NULL; 9502 IS change_primal = NULL, iP; 9503 PetscInt benign_n; 9504 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9505 PetscBool need_change = PETSC_FALSE; 9506 PetscBool discrete_harmonic = PETSC_FALSE; 9507 9508 if (!pcbddc->use_vertices && reuse_solvers) { 9509 PetscInt n_vertices; 9510 9511 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 9512 reuse_solvers = (PetscBool)!n_vertices; 9513 } 9514 if (!pcbddc->benign_change_explicit) { 9515 benign_n = pcbddc->benign_n; 9516 } else { 9517 benign_n = 0; 9518 } 9519 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9520 We need a global reduction to avoid possible deadlocks. 9521 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9522 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9523 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9524 PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 9525 need_change = (PetscBool)(!need_change); 9526 } 9527 /* If the user defines additional constraints, we import them here */ 9528 if (need_change) { 9529 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 9530 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 9531 } 9532 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9533 9534 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 9535 if (iP) { 9536 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 9537 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 9538 PetscOptionsEnd(); 9539 } 9540 if (discrete_harmonic) { 9541 Mat A; 9542 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 9543 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 9544 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 9545 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 9546 pcbddc->benign_zerodiag_subs, change, change_primal)); 9547 PetscCall(MatDestroy(&A)); 9548 } else { 9549 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 9550 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 9551 } 9552 PetscCall(MatDestroy(&change)); 9553 PetscCall(ISDestroy(&change_primal)); 9554 } 9555 PetscCall(MatDestroy(&S_j)); 9556 9557 /* free adjacency */ 9558 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 9559 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9560 PetscFunctionReturn(PETSC_SUCCESS); 9561 } 9562 9563 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9564 { 9565 PC_IS *pcis = (PC_IS *)pc->data; 9566 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9567 PCBDDCGraph graph; 9568 9569 PetscFunctionBegin; 9570 /* attach interface graph for determining subsets */ 9571 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9572 IS verticesIS, verticescomm; 9573 PetscInt vsize, *idxs; 9574 9575 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9576 PetscCall(ISGetSize(verticesIS, &vsize)); 9577 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 9578 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 9579 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 9580 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9581 PetscCall(PCBDDCGraphCreate(&graph)); 9582 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 9583 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 9584 PetscCall(ISDestroy(&verticescomm)); 9585 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9586 } else { 9587 graph = pcbddc->mat_graph; 9588 } 9589 /* print some info */ 9590 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9591 IS vertices; 9592 PetscInt nv, nedges, nfaces; 9593 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 9594 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9595 PetscCall(ISGetSize(vertices, &nv)); 9596 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9597 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 9598 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 9599 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 9600 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 9601 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9602 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9603 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9604 } 9605 9606 /* sub_schurs init */ 9607 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9608 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 9609 9610 /* free graph struct */ 9611 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 9612 PetscFunctionReturn(PETSC_SUCCESS); 9613 } 9614 9615 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer) 9616 { 9617 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 9618 PetscInt n = pc->pmat->rmap->n, ln, ni, st; 9619 const PetscInt *idxs; 9620 IS gis; 9621 9622 PetscFunctionBegin; 9623 if (!is) PetscFunctionReturn(PETSC_SUCCESS); 9624 PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL)); 9625 PetscCall(MatGetLocalSize(matis->A, NULL, &ln)); 9626 PetscCall(PetscArrayzero(matis->sf_leafdata, ln)); 9627 PetscCall(PetscArrayzero(matis->sf_rootdata, n)); 9628 PetscCall(ISGetLocalSize(is, &ni)); 9629 PetscCall(ISGetIndices(is, &idxs)); 9630 for (PetscInt i = 0; i < ni; i++) { 9631 if (idxs[i] < 0 || idxs[i] >= ln) continue; 9632 matis->sf_leafdata[idxs[i]] = 1; 9633 } 9634 PetscCall(ISRestoreIndices(is, &idxs)); 9635 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9636 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9637 ln = 0; 9638 for (PetscInt i = 0; i < n; i++) { 9639 if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st; 9640 } 9641 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis)); 9642 PetscCall(ISView(gis, viewer)); 9643 PetscCall(ISDestroy(&gis)); 9644 PetscFunctionReturn(PETSC_SUCCESS); 9645 } 9646 9647 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile) 9648 { 9649 PetscInt header[11]; 9650 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9651 PetscViewer viewer; 9652 MPI_Comm comm = PetscObjectComm((PetscObject)pc); 9653 9654 PetscFunctionBegin; 9655 PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer)); 9656 if (load) { 9657 IS is; 9658 Mat A; 9659 9660 PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT)); 9661 PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9662 PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9663 PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9664 PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9665 PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9666 PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9667 PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9668 PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9669 PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9670 PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9671 if (header[0]) { 9672 PetscCall(ISCreate(comm, &is)); 9673 PetscCall(ISLoad(is, viewer)); 9674 PetscCall(PCBDDCSetDirichletBoundaries(pc, is)); 9675 PetscCall(ISDestroy(&is)); 9676 } 9677 if (header[1]) { 9678 PetscCall(ISCreate(comm, &is)); 9679 PetscCall(ISLoad(is, viewer)); 9680 PetscCall(PCBDDCSetNeumannBoundaries(pc, is)); 9681 PetscCall(ISDestroy(&is)); 9682 } 9683 if (header[2]) { 9684 IS *isarray; 9685 9686 PetscCall(PetscMalloc1(header[2], &isarray)); 9687 for (PetscInt i = 0; i < header[2]; i++) { 9688 PetscCall(ISCreate(comm, &isarray[i])); 9689 PetscCall(ISLoad(isarray[i], viewer)); 9690 } 9691 PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray)); 9692 for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i])); 9693 PetscCall(PetscFree(isarray)); 9694 } 9695 if (header[3]) { 9696 PetscCall(ISCreate(comm, &is)); 9697 PetscCall(ISLoad(is, viewer)); 9698 PetscCall(PCBDDCSetPrimalVerticesIS(pc, is)); 9699 PetscCall(ISDestroy(&is)); 9700 } 9701 if (header[4]) { 9702 PetscCall(MatCreate(comm, &A)); 9703 PetscCall(MatSetType(A, MATAIJ)); 9704 PetscCall(MatLoad(A, viewer)); 9705 PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8])); 9706 PetscCall(MatDestroy(&A)); 9707 } 9708 if (header[9]) { 9709 PetscCall(MatCreate(comm, &A)); 9710 PetscCall(MatSetType(A, MATIS)); 9711 PetscCall(MatLoad(A, viewer)); 9712 PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL)); 9713 PetscCall(MatDestroy(&A)); 9714 } 9715 } else { 9716 header[0] = (PetscInt)!!pcbddc->DirichletBoundariesLocal; 9717 header[1] = (PetscInt)!!pcbddc->NeumannBoundariesLocal; 9718 header[2] = pcbddc->n_ISForDofsLocal; 9719 header[3] = (PetscInt)!!pcbddc->user_primal_vertices_local; 9720 header[4] = (PetscInt)!!pcbddc->discretegradient; 9721 header[5] = pcbddc->nedorder; 9722 header[6] = pcbddc->nedfield; 9723 header[7] = (PetscInt)pcbddc->nedglobal; 9724 header[8] = (PetscInt)pcbddc->conforming; 9725 header[9] = (PetscInt)!!pcbddc->divudotp; 9726 header[10] = (PetscInt)pcbddc->divudotp_trans; 9727 if (header[4]) header[3] = 0; 9728 9729 PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT)); 9730 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer)); 9731 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer)); 9732 for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer)); 9733 if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer)); 9734 if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer)); 9735 if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer)); 9736 } 9737 PetscCall(PetscViewerDestroy(&viewer)); 9738 PetscFunctionReturn(PETSC_SUCCESS); 9739 } 9740 9741 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9742 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9743 { 9744 Mat At; 9745 IS rows; 9746 PetscInt rst, ren; 9747 PetscLayout rmap; 9748 9749 PetscFunctionBegin; 9750 rst = ren = 0; 9751 if (ccomm != MPI_COMM_NULL) { 9752 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9753 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9754 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9755 PetscCall(PetscLayoutSetUp(rmap)); 9756 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9757 } 9758 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9759 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9760 PetscCall(ISDestroy(&rows)); 9761 9762 if (ccomm != MPI_COMM_NULL) { 9763 Mat_MPIAIJ *a, *b; 9764 IS from, to; 9765 Vec gvec; 9766 PetscInt lsize; 9767 9768 PetscCall(MatCreate(ccomm, B)); 9769 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9770 PetscCall(MatSetType(*B, MATAIJ)); 9771 PetscCall(PetscLayoutDestroy(&(*B)->rmap)); 9772 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9773 a = (Mat_MPIAIJ *)At->data; 9774 b = (Mat_MPIAIJ *)(*B)->data; 9775 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9776 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9777 PetscCall(PetscObjectReference((PetscObject)a->A)); 9778 PetscCall(PetscObjectReference((PetscObject)a->B)); 9779 b->A = a->A; 9780 b->B = a->B; 9781 9782 b->donotstash = a->donotstash; 9783 b->roworiented = a->roworiented; 9784 b->rowindices = NULL; 9785 b->rowvalues = NULL; 9786 b->getrowactive = PETSC_FALSE; 9787 9788 (*B)->rmap = rmap; 9789 (*B)->factortype = A->factortype; 9790 (*B)->assembled = PETSC_TRUE; 9791 (*B)->insertmode = NOT_SET_VALUES; 9792 (*B)->preallocated = PETSC_TRUE; 9793 9794 if (a->colmap) { 9795 #if defined(PETSC_USE_CTABLE) 9796 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9797 #else 9798 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9799 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9800 #endif 9801 } else b->colmap = NULL; 9802 if (a->garray) { 9803 PetscInt len; 9804 len = a->B->cmap->n; 9805 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9806 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9807 } else b->garray = NULL; 9808 9809 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9810 b->lvec = a->lvec; 9811 9812 /* cannot use VecScatterCopy */ 9813 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9814 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9815 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9816 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9817 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9818 PetscCall(ISDestroy(&from)); 9819 PetscCall(ISDestroy(&to)); 9820 PetscCall(VecDestroy(&gvec)); 9821 } 9822 PetscCall(MatDestroy(&At)); 9823 PetscFunctionReturn(PETSC_SUCCESS); 9824 } 9825 9826 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */ 9827 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA) 9828 { 9829 PetscBool isaij; 9830 MPI_Comm comm; 9831 9832 PetscFunctionBegin; 9833 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9834 PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, "")); 9835 PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented"); 9836 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij)); 9837 if (isaij) { /* SeqAIJ supports repeated rows */ 9838 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA)); 9839 } else { 9840 Mat A_loc; 9841 Mat_SeqAIJ *da; 9842 PetscSF sf; 9843 PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata; 9844 PetscScalar *daa; 9845 const PetscInt *idxs; 9846 const PetscSFNode *iremotes; 9847 PetscSFNode *remotes; 9848 9849 /* SF for incoming rows */ 9850 PetscCall(PetscSFCreate(comm, &sf)); 9851 PetscCall(ISGetLocalSize(rows, &ni)); 9852 PetscCall(ISGetIndices(rows, &idxs)); 9853 PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs)); 9854 PetscCall(ISRestoreIndices(rows, &idxs)); 9855 9856 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc)); 9857 da = (Mat_SeqAIJ *)A_loc->data; 9858 PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata)); 9859 for (PetscInt i = 0; i < m; i++) { 9860 rdata[2 * i + 0] = da->i[i + 1] - da->i[i]; 9861 rdata[2 * i + 1] = da->i[i]; 9862 } 9863 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9864 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9865 PetscCall(PetscMalloc1(ni + 1, &di)); 9866 di[0] = 0; 9867 for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0]; 9868 PetscCall(PetscMalloc1(di[ni], &dj)); 9869 PetscCall(PetscMalloc1(di[ni], &daa)); 9870 PetscCall(PetscMalloc1(di[ni], &remotes)); 9871 9872 PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes)); 9873 9874 /* SF graph for nonzeros */ 9875 c = 0; 9876 for (PetscInt i = 0; i < ni; i++) { 9877 const PetscInt rank = iremotes[i].rank; 9878 const PetscInt rsize = ldata[2 * i]; 9879 for (PetscInt j = 0; j < rsize; j++) { 9880 remotes[c].rank = rank; 9881 remotes[c].index = ldata[2 * i + 1] + j; 9882 c++; 9883 } 9884 } 9885 PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]); 9886 PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER)); 9887 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9888 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9889 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9890 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9891 9892 PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA)); 9893 PetscCall(MatDestroy(&A_loc)); 9894 PetscCall(PetscSFDestroy(&sf)); 9895 PetscCall(PetscFree(di)); 9896 PetscCall(PetscFree(dj)); 9897 PetscCall(PetscFree(daa)); 9898 PetscCall(PetscFree(remotes)); 9899 PetscCall(PetscFree2(ldata, rdata)); 9900 } 9901 PetscFunctionReturn(PETSC_SUCCESS); 9902 } 9903