1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <../src/mat/impls/dense/seq/dense.h> 6 #include <petscdmplex.h> 7 #include <petscblaslapack.h> 8 #include <petsc/private/sfimpl.h> 9 #include <petsc/private/dmpleximpl.h> 10 #include <petscdmda.h> 11 12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 13 14 /* if range is true, it returns B s.t. span{B} = range(A) 15 if range is false, it returns B s.t. range(B) _|_ range(A) */ 16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 17 { 18 PetscScalar *uwork, *data, *U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM, bN, lwork, lierr, di = 1; 21 PetscInt ulw, i, nr, nc, n; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &nr, &nc)); 28 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 33 PetscCall(PetscMalloc1(ulw, &uwork)); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr, nc); 39 if (!rwork) { 40 PetscCall(PetscMalloc1(n, &sing)); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 PetscCall(PetscMalloc1(nr * nr, &U)); 47 PetscCall(PetscBLASIntCast(nr, &bM)); 48 PetscCall(PetscBLASIntCast(nc, &bN)); 49 PetscCall(PetscBLASIntCast(ulw, &lwork)); 50 PetscCall(MatDenseGetArray(A, &data)); 51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 54 #else 55 PetscCall(PetscMalloc1(5 * n, &rwork2)); 56 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 57 PetscCall(PetscFree(rwork2)); 58 #endif 59 PetscCall(PetscFPTrapPop()); 60 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 61 PetscCall(MatDenseRestoreArray(A, &data)); 62 for (i = 0; i < n; i++) 63 if (sing[i] < PETSC_SMALL) break; 64 if (!rwork) PetscCall(PetscFree(sing)); 65 if (!work) PetscCall(PetscFree(uwork)); 66 /* create B */ 67 if (!range) { 68 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 69 PetscCall(MatDenseGetArray(*B, &data)); 70 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 71 } else { 72 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 73 PetscCall(MatDenseGetArray(*B, &data)); 74 PetscCall(PetscArraycpy(data, U, i * nr)); 75 } 76 PetscCall(MatDenseRestoreArray(*B, &data)); 77 PetscCall(PetscFree(U)); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 Mat GE, GEd; 90 PetscInt rsize, csize, esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 PetscCall(ISGetSize(edge, &esize)); 95 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 96 PetscCall(ISGetSize(extrow, &rsize)); 97 PetscCall(ISGetSize(extcol, &csize)); 98 99 /* gradients */ 100 ptr = work + 5 * esize; 101 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 102 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 103 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 104 PetscCall(MatDestroy(&GE)); 105 106 /* constants */ 107 ptr += rsize * csize; 108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 109 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 110 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 111 PetscCall(MatDestroy(&GE)); 112 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 113 PetscCall(MatDestroy(&GEd)); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 121 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 122 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 123 /* v = PetscAbsScalar(vals[0]); */ 124 v = 1.; 125 cvals[0] = vals[0] / v; 126 cvals[1] = vals[1] / v; 127 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 128 PetscCall(MatScale(*GKins, 1. / v)); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 134 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 135 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 136 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 137 PetscCall(MatView(GEc, viewer)); 138 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK")); 139 PetscCall(MatView(*GKins, viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 141 PetscCall(MatView(GEd, viewer)); 142 PetscCall(PetscViewerDestroy(&viewer)); 143 } 144 #endif 145 PetscCall(MatDestroy(&GEd)); 146 PetscCall(MatDestroy(&GEc)); 147 } 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *); 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 156 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 157 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 160 MPI_Comm comm; 161 IS lned, primals, allprimals, nedfieldlocal, elements_corners = NULL; 162 IS *eedges, *extrows, *extcols, *alleedges; 163 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 164 PetscScalar *vals, *work; 165 PetscReal *rwork; 166 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 167 PetscInt ne, nv, Lv, order, n, field; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 187 /* Command line customization */ 188 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 189 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 190 /* print debug info and adaptive order TODO: to be removed */ 191 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 192 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 193 PetscOptionsEnd(); 194 195 /* Return if there are no edges in the decomposition */ 196 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 197 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 198 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 199 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 200 lrc[0] = PETSC_FALSE; 201 for (i = 0; i < n; i++) { 202 if (PetscRealPart(vals[i]) > 2.) { 203 lrc[0] = PETSC_TRUE; 204 break; 205 } 206 } 207 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 208 PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPI_C_BOOL, MPI_LOR, comm)); 209 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 210 211 /* Get Nedelec field */ 212 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 213 if (pcbddc->n_ISForDofsLocal && field >= 0) { 214 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 215 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 216 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 217 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 218 ne = n; 219 nedfieldlocal = NULL; 220 global = PETSC_TRUE; 221 } else if (field == PETSC_DECIDE) { 222 PetscInt rst, ren, *idx; 223 224 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 225 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 226 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 227 for (i = rst; i < ren; i++) { 228 PetscInt nc; 229 230 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 231 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 232 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 233 } 234 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 235 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 236 PetscCall(PetscMalloc1(n, &idx)); 237 for (i = 0, ne = 0; i < n; i++) 238 if (matis->sf_leafdata[i]) idx[ne++] = i; 239 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 240 } else { 241 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 242 } 243 244 /* Sanity checks */ 245 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 246 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 247 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 248 249 /* Just set primal dofs and return */ 250 if (setprimal) { 251 IS enedfieldlocal; 252 PetscInt *eidxs; 253 254 PetscCall(PetscMalloc1(ne, &eidxs)); 255 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 256 if (nedfieldlocal) { 257 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 258 for (i = 0, cum = 0; i < ne; i++) { 259 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 260 } 261 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 262 } else { 263 for (i = 0, cum = 0; i < ne; i++) { 264 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 265 } 266 } 267 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 268 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 269 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 270 PetscCall(PetscFree(eidxs)); 271 PetscCall(ISDestroy(&nedfieldlocal)); 272 PetscCall(ISDestroy(&enedfieldlocal)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 /* Compute some l2g maps */ 277 if (nedfieldlocal) { 278 IS is; 279 280 /* need to map from the local Nedelec field to local numbering */ 281 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 282 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 283 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 284 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 285 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 286 if (global) { 287 PetscCall(PetscObjectReference((PetscObject)al2g)); 288 el2g = al2g; 289 } else { 290 IS gis; 291 292 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 293 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 294 PetscCall(ISDestroy(&gis)); 295 } 296 PetscCall(ISDestroy(&is)); 297 } else { 298 /* one ref for the destruction of al2g, one for el2g */ 299 PetscCall(PetscObjectReference((PetscObject)al2g)); 300 PetscCall(PetscObjectReference((PetscObject)al2g)); 301 el2g = al2g; 302 fl2g = NULL; 303 } 304 305 /* Start communication to drop connections for interior edges (for cc analysis only) */ 306 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 307 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 308 if (nedfieldlocal) { 309 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 310 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 311 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 312 } else { 313 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 314 } 315 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 316 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 317 318 /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting 319 Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */ 320 if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners)); 321 322 /* drop connections with interior edges to avoid unneeded communications and memory movements */ 323 PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view")); 324 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 325 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 326 if (global) { 327 PetscInt rst; 328 329 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 330 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 331 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 332 } 333 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 334 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 335 } else { 336 PetscInt *tbz; 337 338 PetscCall(PetscMalloc1(ne, &tbz)); 339 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 340 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 342 for (i = 0, cum = 0; i < ne; i++) 343 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 344 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 345 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 346 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 347 PetscCall(PetscFree(tbz)); 348 } 349 350 /* Extract subdomain relevant rows of G */ 351 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 352 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 353 PetscCall(MatAIJExtractRows(G, lned, &lGall)); 354 /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */ 355 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 356 PetscCall(ISDestroy(&lned)); 357 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 358 PetscCall(MatDestroy(&lGall)); 359 PetscCall(MatISGetLocalMat(lGis, &lG)); 360 if (matis->allow_repeated) { /* multi-element support */ 361 Mat *lGn, B; 362 IS *is_rows, *tcols, tmap, nmap; 363 PetscInt subnv; 364 const PetscInt *subvidxs; 365 ISLocalToGlobalMapping mapn; 366 367 PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn)); 368 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows)); 369 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols)); 370 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 371 if (fl2g) { 372 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i])); 373 } else { 374 PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i])); 375 is_rows[i] = pcbddc->local_subs[i]; 376 } 377 PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)])); 378 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn)); 379 PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs)); 381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i])); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs)); 383 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 384 } 385 386 /* Create new MATIS with repeated vertices */ 387 PetscCall(MatCreate(comm, &B)); 388 PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N)); 389 PetscCall(MatSetType(B, MATIS)); 390 PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE)); 391 PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap)); 392 PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap)); 393 PetscCall(ISDestroy(&tmap)); 394 PetscCall(ISGetLocalSize(nmap, &subnv)); 395 PetscCall(ISGetIndices(nmap, &subvidxs)); 396 PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap)); 397 PetscCall(ISRestoreIndices(nmap, &subvidxs)); 398 PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn)); 399 PetscCall(ISDestroy(&tmap)); 400 PetscCall(ISDestroy(&nmap)); 401 PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn)); 402 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 403 PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG)); 404 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 405 PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)])); 406 PetscCall(ISDestroy(&is_rows[i])); 407 PetscCall(ISDestroy(&tcols[i])); 408 } 409 PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG)); 410 PetscCall(PetscFree(lGn)); 411 PetscCall(PetscFree(is_rows)); 412 PetscCall(PetscFree(tcols)); 413 PetscCall(MatISSetLocalMat(B, lG)); 414 PetscCall(MatDestroy(&lG)); 415 416 PetscCall(MatDestroy(&lGis)); 417 lGis = B; 418 419 lGis->assembled = PETSC_TRUE; 420 } 421 PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view")); 422 423 /* SF for nodal dofs communications */ 424 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 425 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 426 PetscCall(PetscObjectReference((PetscObject)vl2g)); 427 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 428 PetscCall(PetscSFCreate(comm, &sfv)); 429 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 430 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 431 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 432 433 if (elements_corners) { 434 IS tmp; 435 Vec global, local; 436 Mat_IS *tGis = (Mat_IS *)lGis->data; 437 438 PetscCall(MatCreateVecs(lGis, &global, NULL)); 439 PetscCall(MatCreateVecs(tGis->A, &local, NULL)); 440 PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp)); 441 PetscCall(VecDestroy(&global)); 442 PetscCall(VecDestroy(&local)); 443 elements_corners = tmp; 444 } 445 446 /* Destroy temporary G */ 447 PetscCall(MatISGetLocalMat(lGis, &lG)); 448 PetscCall(PetscObjectReference((PetscObject)lG)); 449 PetscCall(MatDestroy(&G)); 450 PetscCall(MatDestroy(&lGis)); 451 452 if (print) { 453 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 454 PetscCall(MatView(lG, NULL)); 455 } 456 457 /* Save lG for values insertion in change of basis */ 458 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 459 460 /* Analyze the edge-nodes connections (duplicate lG) */ 461 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 462 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 463 PetscCall(PetscBTCreate(nv, &btv)); 464 PetscCall(PetscBTCreate(ne, &bte)); 465 PetscCall(PetscBTCreate(ne, &btb)); 466 PetscCall(PetscBTCreate(ne, &btbd)); 467 /* need to import the boundary specification to ensure the 468 proper detection of coarse edges' endpoints */ 469 if (pcbddc->DirichletBoundariesLocal) { 470 IS is; 471 472 if (fl2g) { 473 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 474 } else { 475 is = pcbddc->DirichletBoundariesLocal; 476 } 477 PetscCall(ISGetLocalSize(is, &cum)); 478 PetscCall(ISGetIndices(is, &idxs)); 479 for (i = 0; i < cum; i++) { 480 if (idxs[i] >= 0 && idxs[i] < ne) { 481 PetscCall(PetscBTSet(btb, idxs[i])); 482 PetscCall(PetscBTSet(btbd, idxs[i])); 483 } 484 } 485 PetscCall(ISRestoreIndices(is, &idxs)); 486 if (fl2g) PetscCall(ISDestroy(&is)); 487 } 488 if (pcbddc->NeumannBoundariesLocal) { 489 IS is; 490 491 if (fl2g) { 492 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 493 } else { 494 is = pcbddc->NeumannBoundariesLocal; 495 } 496 PetscCall(ISGetLocalSize(is, &cum)); 497 PetscCall(ISGetIndices(is, &idxs)); 498 for (i = 0; i < cum; i++) { 499 if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i])); 500 } 501 PetscCall(ISRestoreIndices(is, &idxs)); 502 if (fl2g) PetscCall(ISDestroy(&is)); 503 } 504 505 /* Count neighs per dof */ 506 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL)); 507 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL)); 508 509 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 510 for proper detection of coarse edges' endpoints */ 511 PetscCall(PetscBTCreate(ne, &btee)); 512 for (i = 0; i < ne; i++) { 513 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 514 } 515 PetscCall(PetscMalloc1(ne, &marks)); 516 if (!conforming) { 517 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 518 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 519 } 520 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 521 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 522 cum = 0; 523 for (i = 0; i < ne; i++) { 524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 525 if (!PetscBTLookup(btee, i)) { 526 marks[cum++] = i; 527 continue; 528 } 529 /* set badly connected edge dofs as primal */ 530 if (!conforming) { 531 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 532 marks[cum++] = i; 533 PetscCall(PetscBTSet(bte, i)); 534 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 535 } else { 536 /* every edge dofs should be connected through a certain number of nodal dofs 537 to other edge dofs belonging to coarse edges 538 - at most 2 endpoints 539 - order-1 interior nodal dofs 540 - no undefined nodal dofs (nconn < order) 541 */ 542 PetscInt ends = 0, ints = 0, undef = 0; 543 for (j = ii[i]; j < ii[i + 1]; j++) { 544 PetscInt v = jj[j], k; 545 PetscInt nconn = iit[v + 1] - iit[v]; 546 for (k = iit[v]; k < iit[v + 1]; k++) 547 if (!PetscBTLookup(btee, jjt[k])) nconn--; 548 if (nconn > order) ends++; 549 else if (nconn == order) ints++; 550 else undef++; 551 } 552 if (undef || ends > 2 || ints != order - 1) { 553 marks[cum++] = i; 554 PetscCall(PetscBTSet(bte, i)); 555 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i + 1] != ii[i]) { 561 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 562 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 563 } 564 } 565 PetscCall(PetscBTDestroy(&btee)); 566 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 567 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 568 if (!conforming) { 569 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 570 PetscCall(MatDestroy(&lGt)); 571 } 572 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 573 574 /* identify splitpoints and corner candidates */ 575 PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots)); 576 PetscCall(PetscBTCreate(nv, &btvcand)); 577 if (elements_corners) { 578 PetscCall(ISGetLocalSize(elements_corners, &cum)); 579 PetscCall(ISGetIndices(elements_corners, &idxs)); 580 for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i])); 581 PetscCall(ISRestoreIndices(elements_corners, &idxs)); 582 } 583 584 if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */ 585 PetscSF emlsf, vmlsf; 586 PetscInt *eleaves, *vleaves, *meleaves, *mvleaves; 587 PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl; 588 589 PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs)); 590 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 591 PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs)); 592 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 593 594 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf)); 595 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf)); 596 597 PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL)); 598 for (i = 0, j = 0; i < ne; i++) j += ecount[i]; 599 PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne); 600 PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j); 601 602 PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL)); 603 for (i = 0, j = 0; i < nv; i++) j += vcount[i]; 604 PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv); 605 PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j); 606 607 PetscCall(PetscMalloc1(ne, &eleaves)); 608 PetscCall(PetscMalloc1(nv, &vleaves)); 609 for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX; 610 for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX; 611 PetscCall(PetscMalloc1(emnl, &meleaves)); 612 PetscCall(PetscMalloc1(vmnl, &mvleaves)); 613 614 PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm)); 615 PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 616 for (i = 0; i < n_subs; i++) { 617 const PetscInt *idxs; 618 const PetscInt subid = cum_subs + i; 619 PetscInt ns; 620 621 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns)); 622 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 623 for (j = 0; j < ns; j++) { 624 const PetscInt e = idxs[j]; 625 626 eleaves[e] = subid; 627 for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid; 628 } 629 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 630 } 631 PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 632 PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 633 PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 634 PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 635 PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 636 PetscCall(PetscFree(eleaves)); 637 PetscCall(PetscFree(vleaves)); 638 639 PetscCall(PetscMalloc1(ne + 1, &eneighs)); 640 eneighs[0] = meleaves; 641 for (i = 0; i < ne; i++) { 642 PetscCall(PetscSortInt(ecount[i], eneighs[i])); 643 eneighs[i + 1] = eneighs[i] + ecount[i]; 644 } 645 PetscCall(PetscMalloc1(nv + 1, &vneighs)); 646 vneighs[0] = mvleaves; 647 for (i = 0; i < nv; i++) { 648 PetscCall(PetscSortInt(vcount[i], vneighs[i])); 649 vneighs[i + 1] = vneighs[i] + vcount[i]; 650 } 651 } else { 652 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs)); 653 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs)); 654 } 655 656 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 657 if (print) { 658 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 659 PetscCall(MatView(lGe, NULL)); 660 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 661 PetscCall(MatView(lGt, NULL)); 662 } 663 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 664 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 665 for (i = 0; i < nv; i++) { 666 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 667 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 668 if (!order) { /* variable order */ 669 PetscReal vorder = 0.; 670 671 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 672 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 673 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 674 ord = 1; 675 } 676 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 677 const PetscInt e = jj[j]; 678 679 if (PetscBTLookup(btbd, e)) { 680 bdir = PETSC_TRUE; 681 break; 682 } 683 if (vc != ecount[e]) { 684 sneighs = PETSC_FALSE; 685 } else { 686 const PetscInt *vn = vneighs[i], *en = eneighs[e]; 687 688 for (PetscInt k = 0; k < vc; k++) { 689 if (vn[k] != en[k]) { 690 sneighs = PETSC_FALSE; 691 break; 692 } 693 } 694 } 695 } 696 if (elements_corners) test = 0; 697 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 698 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 699 PetscCall(PetscBTSet(btv, i)); 700 } else if (test == ord) { 701 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 702 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 703 PetscCall(PetscBTSet(btv, i)); 704 } else if (!elements_corners) { 705 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 706 PetscCall(PetscBTSet(btvcand, i)); 707 } 708 } 709 } 710 PetscCall(PetscBTDestroy(&btbd)); 711 712 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 713 if (order != 1) { 714 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 715 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 716 for (i = 0; i < nv; i++) { 717 if (PetscBTLookup(btvcand, i)) { 718 PetscBool found = PETSC_FALSE; 719 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 720 PetscInt k, e = jj[j]; 721 if (PetscBTLookup(bte, e)) continue; 722 for (k = iit[e]; k < iit[e + 1]; k++) { 723 PetscInt v = jjt[k]; 724 if (v != i && PetscBTLookup(btvcand, v)) { 725 found = PETSC_TRUE; 726 break; 727 } 728 } 729 } 730 if (!found) { 731 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 732 PetscCall(PetscBTClear(btvcand, i)); 733 } else { 734 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 735 } 736 } 737 } 738 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 739 } 740 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 741 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 742 PetscCall(MatDestroy(&lGe)); 743 744 /* Get the local G^T explicitly */ 745 PetscCall(MatDestroy(&lGt)); 746 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 747 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 748 749 /* Mark shared nodal dofs */ 750 PetscCall(PetscBTCreate(nv, &btvi)); 751 for (i = 0; i < nv; i++) { 752 if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i)); 753 } 754 755 if (matis->allow_repeated) { 756 PetscCall(PetscFree(eneighs[0])); 757 PetscCall(PetscFree(vneighs[0])); 758 PetscCall(PetscFree(eneighs)); 759 PetscCall(PetscFree(vneighs)); 760 } 761 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 762 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 763 764 /* communicate corners and splitpoints */ 765 PetscCall(PetscMalloc1(nv, &vmarks)); 766 PetscCall(PetscArrayzero(sfvleaves, nv)); 767 PetscCall(PetscArrayzero(sfvroots, Lv)); 768 for (i = 0; i < nv; i++) 769 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 770 771 if (print) { 772 IS tbz; 773 774 cum = 0; 775 for (i = 0; i < nv; i++) 776 if (sfvleaves[i]) vmarks[cum++] = i; 777 778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 779 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 780 PetscCall(ISView(tbz, NULL)); 781 PetscCall(ISDestroy(&tbz)); 782 } 783 784 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 785 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 786 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 787 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 788 789 /* Zero rows of lGt corresponding to identified corners 790 and interior nodal dofs */ 791 cum = 0; 792 for (i = 0; i < nv; i++) { 793 if (sfvleaves[i]) { 794 vmarks[cum++] = i; 795 PetscCall(PetscBTSet(btv, i)); 796 } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 797 } 798 PetscCall(PetscBTDestroy(&btvi)); 799 if (print) { 800 IS tbz; 801 802 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 803 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 804 PetscCall(ISView(tbz, NULL)); 805 PetscCall(ISDestroy(&tbz)); 806 } 807 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 808 PetscCall(PetscFree(vmarks)); 809 PetscCall(PetscSFDestroy(&sfv)); 810 PetscCall(PetscFree2(sfvleaves, sfvroots)); 811 812 /* Recompute G */ 813 PetscCall(MatDestroy(&lG)); 814 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 815 if (print) { 816 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 817 PetscCall(MatView(lG, NULL)); 818 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 819 PetscCall(MatView(lGt, NULL)); 820 } 821 822 /* Get primal dofs (if any) */ 823 cum = 0; 824 for (i = 0; i < ne; i++) { 825 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 826 } 827 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 828 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 829 if (print) { 830 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 831 PetscCall(ISView(primals, NULL)); 832 } 833 PetscCall(PetscBTDestroy(&bte)); 834 /* TODO: what if the user passed in some of them ? */ 835 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 836 PetscCall(ISDestroy(&primals)); 837 838 /* Compute edge connectivity */ 839 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 840 841 /* Symbolic conn = lG*lGt */ 842 if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */ 843 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 844 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 845 PetscCall(MatProductSetAlgorithm(conn, "default")); 846 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 847 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 848 PetscCall(MatProductSetFromOptions(conn)); 849 PetscCall(MatProductSymbolic(conn)); 850 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 851 if (fl2g) { 852 PetscBT btf; 853 PetscInt *iia, *jja, *iiu, *jju; 854 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 855 856 /* create CSR for all local dofs */ 857 PetscCall(PetscMalloc1(n + 1, &iia)); 858 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 859 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 860 iiu = pcbddc->mat_graph->xadj; 861 jju = pcbddc->mat_graph->adjncy; 862 } else if (pcbddc->use_local_adj) { 863 rest = PETSC_TRUE; 864 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 865 } else { 866 free = PETSC_TRUE; 867 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 868 iiu[0] = 0; 869 for (i = 0; i < n; i++) { 870 iiu[i + 1] = i + 1; 871 jju[i] = -1; 872 } 873 } 874 875 /* import sizes of CSR */ 876 iia[0] = 0; 877 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 878 879 /* overwrite entries corresponding to the Nedelec field */ 880 PetscCall(PetscBTCreate(n, &btf)); 881 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 882 for (i = 0; i < ne; i++) { 883 PetscCall(PetscBTSet(btf, idxs[i])); 884 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 885 } 886 887 /* iia in CSR */ 888 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 889 890 /* jja in CSR */ 891 PetscCall(PetscMalloc1(iia[n], &jja)); 892 for (i = 0; i < n; i++) 893 if (!PetscBTLookup(btf, i)) 894 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 895 896 /* map edge dofs connectivity */ 897 if (jj) { 898 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 899 for (i = 0; i < ne; i++) { 900 PetscInt e = idxs[i]; 901 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 902 } 903 } 904 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 905 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES)); 906 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 907 if (free) PetscCall(PetscFree2(iiu, jju)); 908 PetscCall(PetscBTDestroy(&btf)); 909 } else { 910 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES)); 911 } 912 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 913 PetscCall(MatDestroy(&conn)); 914 } 915 916 /* Analyze interface for edge dofs */ 917 PetscCall(PCBDDCAnalyzeInterface(pc)); 918 pcbddc->mat_graph->twodim = PETSC_FALSE; 919 920 /* Get coarse edges in the edge space */ 921 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 922 923 if (fl2g) { 924 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 925 PetscCall(PetscMalloc1(nee, &eedges)); 926 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 927 } else { 928 eedges = alleedges; 929 primals = allprimals; 930 } 931 932 /* Mark fine edge dofs with their coarse edge id */ 933 PetscCall(PetscArrayzero(marks, ne)); 934 PetscCall(ISGetLocalSize(primals, &cum)); 935 PetscCall(ISGetIndices(primals, &idxs)); 936 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 937 PetscCall(ISRestoreIndices(primals, &idxs)); 938 if (print) { 939 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 940 PetscCall(ISView(primals, NULL)); 941 } 942 943 maxsize = 0; 944 for (i = 0; i < nee; i++) { 945 PetscInt size, mark = i + 1; 946 947 PetscCall(ISGetLocalSize(eedges[i], &size)); 948 PetscCall(ISGetIndices(eedges[i], &idxs)); 949 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 950 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 951 maxsize = PetscMax(maxsize, size); 952 } 953 954 /* Find coarse edge endpoints */ 955 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 956 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 957 for (i = 0; i < nee; i++) { 958 PetscInt mark = i + 1, size; 959 960 PetscCall(ISGetLocalSize(eedges[i], &size)); 961 if (!size && nedfieldlocal) continue; 962 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 963 PetscCall(ISGetIndices(eedges[i], &idxs)); 964 if (print) { 965 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 966 PetscCall(ISView(eedges[i], NULL)); 967 } 968 for (j = 0; j < size; j++) { 969 PetscInt k, ee = idxs[j]; 970 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 971 for (k = ii[ee]; k < ii[ee + 1]; k++) { 972 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 973 if (PetscBTLookup(btv, jj[k])) { 974 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 975 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 976 PetscInt k2; 977 PetscBool corner = PETSC_FALSE; 978 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 979 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 980 /* it's a corner if either is connected with an edge dof belonging to a different cc or 981 if the edge dof lie on the natural part of the boundary */ 982 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 983 corner = PETSC_TRUE; 984 break; 985 } 986 } 987 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 988 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 989 PetscCall(PetscBTSet(btv, jj[k])); 990 } else { 991 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 992 } 993 } 994 } 995 } 996 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 997 } 998 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 999 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1000 PetscCall(PetscBTDestroy(&btb)); 1001 1002 /* Reset marked primal dofs */ 1003 PetscCall(ISGetLocalSize(primals, &cum)); 1004 PetscCall(ISGetIndices(primals, &idxs)); 1005 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 1006 PetscCall(ISRestoreIndices(primals, &idxs)); 1007 1008 /* Now use the initial lG */ 1009 PetscCall(MatDestroy(&lG)); 1010 PetscCall(MatDestroy(&lGt)); 1011 lG = lGinit; 1012 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 1013 1014 /* Compute extended cols indices */ 1015 PetscCall(PetscBTCreate(nv, &btvc)); 1016 PetscCall(PetscBTCreate(nee, &bter)); 1017 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1018 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 1019 i *= maxsize; 1020 PetscCall(PetscCalloc1(nee, &extcols)); 1021 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 1022 eerr = PETSC_FALSE; 1023 for (i = 0; i < nee; i++) { 1024 PetscInt size, found = 0; 1025 1026 cum = 0; 1027 PetscCall(ISGetLocalSize(eedges[i], &size)); 1028 if (!size && nedfieldlocal) continue; 1029 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1030 PetscCall(ISGetIndices(eedges[i], &idxs)); 1031 PetscCall(PetscBTMemzero(nv, btvc)); 1032 for (j = 0; j < size; j++) { 1033 PetscInt k, ee = idxs[j]; 1034 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1035 PetscInt vv = jj[k]; 1036 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 1037 else if (!PetscBTLookupSet(btvc, vv)) found++; 1038 } 1039 } 1040 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1041 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1042 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1043 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1044 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1045 /* it may happen that endpoints are not defined at this point 1046 if it is the case, mark this edge for a second pass */ 1047 if (cum != size - 1 || found != 2) { 1048 PetscCall(PetscBTSet(bter, i)); 1049 if (print) { 1050 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 1051 PetscCall(ISView(eedges[i], NULL)); 1052 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 1053 PetscCall(ISView(extcols[i], NULL)); 1054 } 1055 eerr = PETSC_TRUE; 1056 } 1057 } 1058 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 1059 PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPI_C_BOOL, MPI_LOR, comm)); 1060 if (done) { 1061 PetscInt *newprimals; 1062 1063 PetscCall(PetscMalloc1(ne, &newprimals)); 1064 PetscCall(ISGetLocalSize(primals, &cum)); 1065 PetscCall(ISGetIndices(primals, &idxs)); 1066 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 1067 PetscCall(ISRestoreIndices(primals, &idxs)); 1068 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1069 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 1070 for (i = 0; i < nee; i++) { 1071 PetscBool has_candidates = PETSC_FALSE; 1072 if (PetscBTLookup(bter, i)) { 1073 PetscInt size, mark = i + 1; 1074 1075 PetscCall(ISGetLocalSize(eedges[i], &size)); 1076 PetscCall(ISGetIndices(eedges[i], &idxs)); 1077 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1078 for (j = 0; j < size; j++) { 1079 PetscInt k, ee = idxs[j]; 1080 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 1081 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1082 /* set all candidates located on the edge as corners */ 1083 if (PetscBTLookup(btvcand, jj[k])) { 1084 PetscInt k2, vv = jj[k]; 1085 has_candidates = PETSC_TRUE; 1086 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 1087 PetscCall(PetscBTSet(btv, vv)); 1088 /* set all edge dofs connected to candidate as primals */ 1089 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 1090 if (marks[jjt[k2]] == mark) { 1091 PetscInt k3, ee2 = jjt[k2]; 1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 1093 newprimals[cum++] = ee2; 1094 /* finally set the new corners */ 1095 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 1096 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 1097 PetscCall(PetscBTSet(btv, jj[k3])); 1098 } 1099 } 1100 } 1101 } else { 1102 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 1103 } 1104 } 1105 } 1106 if (!has_candidates) { /* circular edge */ 1107 PetscInt k, ee = idxs[0], *tmarks; 1108 1109 PetscCall(PetscCalloc1(ne, &tmarks)); 1110 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 1111 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1112 PetscInt k2; 1113 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 1114 PetscCall(PetscBTSet(btv, jj[k])); 1115 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 1116 } 1117 for (j = 0; j < size; j++) { 1118 if (tmarks[idxs[j]] > 1) { 1119 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 1120 newprimals[cum++] = idxs[j]; 1121 } 1122 } 1123 PetscCall(PetscFree(tmarks)); 1124 } 1125 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1126 } 1127 PetscCall(ISDestroy(&extcols[i])); 1128 } 1129 PetscCall(PetscFree(extcols)); 1130 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1131 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 1132 if (fl2g) { 1133 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 1134 PetscCall(ISDestroy(&primals)); 1135 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1136 PetscCall(PetscFree(eedges)); 1137 } 1138 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1139 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 1140 PetscCall(PetscFree(newprimals)); 1141 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 1142 PetscCall(ISDestroy(&primals)); 1143 PetscCall(PCBDDCAnalyzeInterface(pc)); 1144 pcbddc->mat_graph->twodim = PETSC_FALSE; 1145 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1146 if (fl2g) { 1147 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 1148 PetscCall(PetscMalloc1(nee, &eedges)); 1149 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 1150 } else { 1151 eedges = alleedges; 1152 primals = allprimals; 1153 } 1154 PetscCall(PetscCalloc1(nee, &extcols)); 1155 1156 /* Mark again */ 1157 PetscCall(PetscArrayzero(marks, ne)); 1158 for (i = 0; i < nee; i++) { 1159 PetscInt size, mark = i + 1; 1160 1161 PetscCall(ISGetLocalSize(eedges[i], &size)); 1162 PetscCall(ISGetIndices(eedges[i], &idxs)); 1163 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1164 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1165 } 1166 if (print) { 1167 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1168 PetscCall(ISView(primals, NULL)); 1169 } 1170 1171 /* Recompute extended cols */ 1172 eerr = PETSC_FALSE; 1173 for (i = 0; i < nee; i++) { 1174 PetscInt size; 1175 1176 cum = 0; 1177 PetscCall(ISGetLocalSize(eedges[i], &size)); 1178 if (!size && nedfieldlocal) continue; 1179 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1180 PetscCall(ISGetIndices(eedges[i], &idxs)); 1181 for (j = 0; j < size; j++) { 1182 PetscInt k, ee = idxs[j]; 1183 for (k = ii[ee]; k < ii[ee + 1]; k++) 1184 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1185 } 1186 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1187 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1188 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1189 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1190 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1191 if (cum != size - 1) { 1192 if (print) { 1193 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1194 PetscCall(ISView(eedges[i], NULL)); 1195 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1196 PetscCall(ISView(extcols[i], NULL)); 1197 } 1198 eerr = PETSC_TRUE; 1199 } 1200 } 1201 } 1202 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1203 PetscCall(PetscFree2(extrow, gidxs)); 1204 PetscCall(PetscBTDestroy(&bter)); 1205 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1206 /* an error should not occur at this point */ 1207 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1208 1209 /* Check the number of endpoints */ 1210 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1211 PetscCall(PetscMalloc1(2 * nee, &corners)); 1212 PetscCall(PetscMalloc1(nee, &cedges)); 1213 for (i = 0; i < nee; i++) { 1214 PetscInt size, found = 0, gc[2]; 1215 1216 /* init with defaults */ 1217 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1218 PetscCall(ISGetLocalSize(eedges[i], &size)); 1219 if (!size && nedfieldlocal) continue; 1220 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1221 PetscCall(ISGetIndices(eedges[i], &idxs)); 1222 PetscCall(PetscBTMemzero(nv, btvc)); 1223 for (j = 0; j < size; j++) { 1224 PetscInt k, ee = idxs[j]; 1225 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1226 PetscInt vv = jj[k]; 1227 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1228 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1229 corners[i * 2 + found++] = vv; 1230 } 1231 } 1232 } 1233 if (found != 2) { 1234 PetscInt e; 1235 if (fl2g) { 1236 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1237 } else { 1238 e = idxs[0]; 1239 } 1240 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1241 } 1242 1243 /* get primal dof index on this coarse edge */ 1244 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1245 if (gc[0] > gc[1]) { 1246 PetscInt swap = corners[2 * i]; 1247 corners[2 * i] = corners[2 * i + 1]; 1248 corners[2 * i + 1] = swap; 1249 } 1250 cedges[i] = idxs[size - 1]; 1251 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1252 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1253 } 1254 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1255 PetscCall(PetscBTDestroy(&btvc)); 1256 1257 if (PetscDefined(USE_DEBUG)) { 1258 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1259 not interfere with neighbouring coarse edges */ 1260 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1261 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1262 for (i = 0; i < nv; i++) { 1263 PetscInt emax = 0, eemax = 0; 1264 1265 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1266 PetscCall(PetscArrayzero(emarks, nee + 1)); 1267 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1268 for (j = 1; j < nee + 1; j++) { 1269 if (emax < emarks[j]) { 1270 emax = emarks[j]; 1271 eemax = j; 1272 } 1273 } 1274 /* not relevant for edges */ 1275 if (!eemax) continue; 1276 1277 for (j = ii[i]; j < ii[i + 1]; j++) { 1278 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1279 } 1280 } 1281 PetscCall(PetscFree(emarks)); 1282 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1283 } 1284 1285 /* Compute extended rows indices for edge blocks of the change of basis */ 1286 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1287 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1288 extmem *= maxsize; 1289 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1290 PetscCall(PetscMalloc1(nee, &extrows)); 1291 PetscCall(PetscCalloc1(nee, &extrowcum)); 1292 for (i = 0; i < nv; i++) { 1293 PetscInt mark = 0, size, start; 1294 1295 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1296 for (j = ii[i]; j < ii[i + 1]; j++) 1297 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1298 1299 /* not relevant */ 1300 if (!mark) continue; 1301 1302 /* import extended row */ 1303 mark--; 1304 start = mark * extmem + extrowcum[mark]; 1305 size = ii[i + 1] - ii[i]; 1306 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1307 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1308 extrowcum[mark] += size; 1309 } 1310 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1311 PetscCall(MatDestroy(&lGt)); 1312 PetscCall(PetscFree(marks)); 1313 1314 /* Compress extrows */ 1315 cum = 0; 1316 for (i = 0; i < nee; i++) { 1317 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1318 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1320 cum = PetscMax(cum, size); 1321 } 1322 PetscCall(PetscFree(extrowcum)); 1323 PetscCall(PetscBTDestroy(&btv)); 1324 PetscCall(PetscBTDestroy(&btvcand)); 1325 1326 /* Workspace for lapack inner calls and VecSetValues */ 1327 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1328 1329 /* Create change of basis matrix (no preallocation) */ 1330 PetscCall(MatCreate(comm, &T)); 1331 PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap)); 1332 PetscCall(MatSetType(T, MATAIJ)); 1333 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1334 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1335 PetscCall(MatSetOption(T, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 1336 //PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL)); 1337 //PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL)); 1338 //PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1339 1340 /* Defaults to identity */ 1341 { 1342 Vec w; 1343 const PetscScalar *wa; 1344 1345 PetscCall(MatCreateVecs(T, &w, NULL)); 1346 PetscCall(VecSetLocalToGlobalMapping(w, al2g)); 1347 PetscCall(VecSet(w, 1.0)); 1348 for (i = 0; i < nee; i++) { 1349 const PetscInt *idxs; 1350 PetscInt nl; 1351 1352 PetscCall(ISGetLocalSize(eedges[i], &nl)); 1353 PetscCall(ISGetIndices(eedges[i], &idxs)); 1354 PetscCall(VecSetValuesLocal(w, nl, idxs, NULL, INSERT_VALUES)); 1355 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1356 } 1357 PetscCall(VecAssemblyBegin(w)); 1358 PetscCall(VecAssemblyEnd(w)); 1359 PetscCall(VecGetArrayRead(w, &wa)); 1360 for (i = T->rmap->rstart; i < T->rmap->rend; i++) 1361 if (PetscAbsScalar(wa[i - T->rmap->rstart])) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES)); 1362 PetscCall(VecRestoreArrayRead(w, &wa)); 1363 PetscCall(VecDestroy(&w)); 1364 } 1365 1366 /* Create discrete gradient for the coarser level if needed */ 1367 PetscCall(MatDestroy(&pcbddc->nedcG)); 1368 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1369 if (pcbddc->current_level < pcbddc->max_levels) { 1370 ISLocalToGlobalMapping cel2g, cvl2g; 1371 IS wis, gwis; 1372 PetscInt cnv, cne; 1373 1374 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1375 if (fl2g) { 1376 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1377 } else { 1378 PetscCall(PetscObjectReference((PetscObject)wis)); 1379 pcbddc->nedclocal = wis; 1380 } 1381 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1382 PetscCall(ISDestroy(&wis)); 1383 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1384 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1385 PetscCall(ISDestroy(&wis)); 1386 PetscCall(ISDestroy(&gwis)); 1387 1388 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1389 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1390 PetscCall(ISDestroy(&wis)); 1391 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1392 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1393 PetscCall(ISDestroy(&wis)); 1394 PetscCall(ISDestroy(&gwis)); 1395 1396 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1397 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1398 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1399 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1400 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1401 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1402 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1403 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1404 } 1405 1406 MatNullSpace nnsp; 1407 PetscBool nnsp_has_const = PETSC_FALSE; 1408 const Vec *nnsp_vecs = NULL; 1409 PetscInt nnsp_nvecs = 0; 1410 VecScatter nnsp_vscat = NULL; 1411 PetscCall(MatGetNullSpace(pcbddc->discretegradient, &nnsp)); 1412 if (nnsp) PetscCall(MatNullSpaceGetVecs(nnsp, &nnsp_has_const, &nnsp_nvecs, &nnsp_vecs)); 1413 if (nnsp_has_const || nnsp_nvecs) { /* create scatter to import edge constraints */ 1414 IS allextcols, gallextcols, galleedges, is_E_to_zero; 1415 Vec E, V; 1416 PetscInt *eedgesidxs; 1417 const PetscScalar *evals; 1418 1419 PetscCall(MatCreateVecs(pc->pmat, &E, NULL)); 1420 PetscCall(MatCreateVecs(pcbddc->discretegradient, &V, NULL)); 1421 PetscCall(ISConcatenate(PETSC_COMM_SELF, nee, extcols, &allextcols)); 1422 cum = 0; 1423 for (i = 0; i < nee; i++) { 1424 PetscInt j; 1425 1426 PetscCall(ISGetLocalSize(eedges[i], &j)); 1427 PetscCheck(j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Zero sized edge %" PetscInt_FMT, i); 1428 cum += j - 1; 1429 } 1430 PetscCall(PetscMalloc1(PetscMax(cum, pc->pmat->rmap->n), &eedgesidxs)); 1431 cum = 0; 1432 for (i = 0; i < nee; i++) { 1433 const PetscInt *idxs; 1434 PetscInt j; 1435 1436 PetscCall(ISGetLocalSize(eedges[i], &j)); 1437 PetscCall(ISGetIndices(eedges[i], &idxs)); 1438 PetscCall(PetscArraycpy(eedgesidxs + cum, idxs, j - 1)); /* last on the edge is primal */ 1439 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1440 cum += j - 1; 1441 } 1442 PetscCall(ISLocalToGlobalMappingApply(al2g, cum, eedgesidxs, eedgesidxs)); 1443 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_USE_POINTER, &galleedges)); 1444 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, allextcols, &gallextcols)); 1445 PetscCall(VecScatterCreate(V, gallextcols, E, galleedges, &nnsp_vscat)); 1446 PetscCall(ISDestroy(&allextcols)); 1447 PetscCall(ISDestroy(&gallextcols)); 1448 PetscCall(ISDestroy(&galleedges)); 1449 1450 /* identify dofs we must zero if importing user-defined near nullspace from pmat */ 1451 PetscCall(VecSet(E, 1.0)); 1452 PetscCall(VecSetValues(E, cum, eedgesidxs, NULL, INSERT_VALUES)); 1453 PetscCall(VecAssemblyBegin(E)); 1454 PetscCall(VecAssemblyEnd(E)); 1455 PetscCall(VecGetArrayRead(E, &evals)); 1456 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) 1457 if (evals[i] == 0.0) eedgesidxs[cum++] = i + pc->pmat->rmap->rstart; 1458 PetscCall(VecRestoreArrayRead(E, &evals)); 1459 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_COPY_VALUES, &is_E_to_zero)); 1460 PetscCall(PetscFree(eedgesidxs)); 1461 1462 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject)V)); 1463 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject)E)); 1464 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_zero", (PetscObject)is_E_to_zero)); 1465 PetscCall(ISDestroy(&is_E_to_zero)); 1466 PetscCall(VecDestroy(&V)); 1467 PetscCall(VecDestroy(&E)); 1468 } 1469 #if defined(PRINT_GDET) 1470 inc = 0; 1471 lev = pcbddc->current_level; 1472 #endif 1473 1474 /* Insert values in the change of basis matrix */ 1475 for (i = 0; i < nee; i++) { 1476 Mat Gins = NULL, GKins = NULL; 1477 IS cornersis = NULL; 1478 PetscScalar cvals[2]; 1479 1480 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1481 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1482 if (Gins && GKins) { 1483 const PetscScalar *data; 1484 const PetscInt *rows, *cols; 1485 PetscInt nrh, nch, nrc, ncc; 1486 1487 PetscCall(ISGetIndices(eedges[i], &cols)); 1488 /* H1 */ 1489 PetscCall(ISGetIndices(extrows[i], &rows)); 1490 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1491 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1492 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1493 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1494 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1495 /* complement */ 1496 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1497 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1498 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1499 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1500 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1501 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1502 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1503 1504 /* coarse discrete gradient */ 1505 if (pcbddc->nedcG) { 1506 PetscInt cols[2]; 1507 1508 cols[0] = 2 * i; 1509 cols[1] = 2 * i + 1; 1510 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1511 } 1512 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1513 } 1514 PetscCall(ISDestroy(&extrows[i])); 1515 PetscCall(ISDestroy(&extcols[i])); 1516 PetscCall(ISDestroy(&cornersis)); 1517 PetscCall(MatDestroy(&Gins)); 1518 PetscCall(MatDestroy(&GKins)); 1519 } 1520 1521 /* import edge constraints */ 1522 if (nnsp_vscat) { 1523 Vec V, E, *quadvecs; 1524 PetscInt nvecs, nvecs_orth; 1525 MatNullSpace onnsp = NULL; 1526 PetscBool onnsp_has_const = PETSC_FALSE; 1527 const Vec *onnsp_vecs = NULL; 1528 PetscInt onnsp_nvecs = 0, new_nnsp_nvecs, old_nnsp_nvecs; 1529 IS is_E_to_zero; 1530 1531 /* import nearnullspace from preconditioning matrix if user-defined */ 1532 PetscCall(MatGetNearNullSpace(pc->pmat, &onnsp)); 1533 if (onnsp) { 1534 PetscBool isinternal; 1535 1536 PetscCall(PetscStrcmp("_internal_BDDC_nedelec_nnsp", ((PetscObject)onnsp)->name, &isinternal)); 1537 if (!isinternal) PetscCall(MatNullSpaceGetVecs(onnsp, &onnsp_has_const, &onnsp_nvecs, &onnsp_vecs)); 1538 } 1539 new_nnsp_nvecs = nnsp_nvecs + (nnsp_has_const ? 1 : 0); 1540 old_nnsp_nvecs = onnsp_nvecs + (onnsp_has_const ? 1 : 0); 1541 nvecs = old_nnsp_nvecs + new_nnsp_nvecs; 1542 PetscCall(PetscMalloc1(nvecs, &quadvecs)); 1543 1544 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject *)&V)); 1545 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject *)&E)); 1546 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_zero", (PetscObject *)&is_E_to_zero)); 1547 for (i = 0; i < nvecs; i++) PetscCall(VecDuplicate(E, &quadvecs[i])); 1548 cum = 0; 1549 if (nnsp_has_const) { 1550 PetscCall(VecSet(V, 1.0)); 1551 PetscCall(VecScatterBegin(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD)); 1552 PetscCall(VecScatterEnd(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD)); 1553 cum = 1; 1554 } 1555 for (i = 0; i < nnsp_nvecs; i++) { 1556 PetscCall(VecScatterBegin(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD)); 1557 PetscCall(VecScatterEnd(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD)); 1558 } 1559 1560 /* Now add old nnsp if present */ 1561 cum = 0; 1562 if (onnsp_has_const) { 1563 PetscCall(VecSet(quadvecs[new_nnsp_nvecs], 1.0)); 1564 PetscCall(VecISSet(quadvecs[new_nnsp_nvecs], is_E_to_zero, 0)); 1565 cum = 1; 1566 } 1567 for (i = 0; i < onnsp_nvecs; i++) { 1568 PetscCall(VecCopy(onnsp_vecs[i], quadvecs[i + cum + new_nnsp_nvecs])); 1569 PetscCall(VecISSet(quadvecs[i + cum + new_nnsp_nvecs], is_E_to_zero, 0)); 1570 } 1571 nvecs_orth = nvecs; 1572 PetscCall(PCBDDCOrthonormalizeVecs(&nvecs_orth, quadvecs)); 1573 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, nvecs_orth, quadvecs, &nnsp)); 1574 for (i = 0; i < nvecs; i++) PetscCall(VecDestroy(&quadvecs[i])); 1575 PetscCall(PetscFree(quadvecs)); 1576 PetscCall(PetscObjectSetName((PetscObject)nnsp, "_internal_BDDC_nedelec_nnsp")); 1577 PetscCall(MatSetNearNullSpace(pc->pmat, nnsp)); 1578 PetscCall(MatNullSpaceDestroy(&nnsp)); 1579 } 1580 PetscCall(VecScatterDestroy(&nnsp_vscat)); 1581 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1582 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1583 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1584 1585 /* Start assembling */ 1586 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1587 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1588 1589 /* Free */ 1590 if (fl2g) { 1591 PetscCall(ISDestroy(&primals)); 1592 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1593 PetscCall(PetscFree(eedges)); 1594 } 1595 1596 /* hack mat_graph with primal dofs on the coarse edges */ 1597 { 1598 PCBDDCGraph graph = pcbddc->mat_graph; 1599 PetscInt *oqueue = graph->queue; 1600 PetscInt *ocptr = graph->cptr; 1601 PetscInt ncc, *idxs; 1602 1603 /* find first primal edge */ 1604 if (pcbddc->nedclocal) { 1605 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1606 } else { 1607 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1608 idxs = cedges; 1609 } 1610 cum = 0; 1611 while (cum < nee && cedges[cum] < 0) cum++; 1612 1613 /* adapt connected components */ 1614 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1615 graph->cptr[0] = 0; 1616 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1617 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1618 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1619 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1620 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1621 ncc++; 1622 lc--; 1623 cum++; 1624 while (cum < nee && cedges[cum] < 0) cum++; 1625 } 1626 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1627 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1628 ncc++; 1629 } 1630 graph->ncc = ncc; 1631 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1632 PetscCall(PetscFree2(ocptr, oqueue)); 1633 } 1634 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1635 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1636 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1637 1638 PetscCall(ISDestroy(&nedfieldlocal)); 1639 PetscCall(PetscFree(extrow)); 1640 PetscCall(PetscFree2(work, rwork)); 1641 PetscCall(PetscFree(corners)); 1642 PetscCall(PetscFree(cedges)); 1643 PetscCall(PetscFree(extrows)); 1644 PetscCall(PetscFree(extcols)); 1645 PetscCall(MatDestroy(&lG)); 1646 1647 /* Complete assembling */ 1648 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view")); 1650 if (pcbddc->nedcG) { 1651 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1652 PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view")); 1653 } 1654 1655 PetscCall(ISDestroy(&elements_corners)); 1656 1657 /* set change of basis */ 1658 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE)); 1659 PetscCall(MatDestroy(&T)); 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 /* the near-null space of BDDC carries information on quadrature weights, 1664 and these can be collinear -> so cheat with MatNullSpaceCreate 1665 and create a suitable set of basis vectors first */ 1666 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1667 { 1668 PetscInt i; 1669 1670 PetscFunctionBegin; 1671 for (i = 0; i < nvecs; i++) { 1672 PetscInt first, last; 1673 1674 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1675 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1676 if (i >= first && i < last) { 1677 PetscScalar *data; 1678 PetscCall(VecGetArray(quad_vecs[i], &data)); 1679 if (!has_const) { 1680 data[i - first] = 1.; 1681 } else { 1682 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1683 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1684 } 1685 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1686 } 1687 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1688 } 1689 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1690 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1691 PetscInt first, last; 1692 PetscCall(VecLockReadPop(quad_vecs[i])); 1693 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1694 if (i >= first && i < last) { 1695 PetscScalar *data; 1696 PetscCall(VecGetArray(quad_vecs[i], &data)); 1697 if (!has_const) { 1698 data[i - first] = 0.; 1699 } else { 1700 data[2 * i - first] = 0.; 1701 data[2 * i - first + 1] = 0.; 1702 } 1703 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1704 } 1705 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1706 PetscCall(VecLockReadPush(quad_vecs[i])); 1707 } 1708 PetscFunctionReturn(PETSC_SUCCESS); 1709 } 1710 1711 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1712 { 1713 Mat loc_divudotp; 1714 Vec p, v, quad_vec; 1715 ISLocalToGlobalMapping map; 1716 PetscScalar *array; 1717 1718 PetscFunctionBegin; 1719 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1720 if (!transpose) { 1721 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1722 } else { 1723 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1724 } 1725 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp)); 1726 PetscCall(VecLockReadPop(quad_vec)); 1727 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1728 1729 /* compute local quad vec */ 1730 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1731 if (!transpose) { 1732 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1733 } else { 1734 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1735 } 1736 /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */ 1737 PetscCall(VecSet(p, 1.)); 1738 if (!transpose) { 1739 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1740 } else { 1741 PetscCall(MatMult(loc_divudotp, p, v)); 1742 } 1743 PetscCall(VecDestroy(&p)); 1744 if (vl2l) { 1745 Mat lA; 1746 VecScatter sc; 1747 Vec vins; 1748 1749 PetscCall(MatISGetLocalMat(A, &lA)); 1750 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1751 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1752 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1753 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1754 PetscCall(VecScatterDestroy(&sc)); 1755 PetscCall(VecDestroy(&v)); 1756 v = vins; 1757 } 1758 1759 /* mask summation of interface values */ 1760 PetscInt n, *mmask, *mask, *idxs, nmr, nr; 1761 const PetscInt *degree; 1762 PetscSF msf; 1763 1764 PetscCall(VecGetLocalSize(v, &n)); 1765 PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL)); 1766 PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf)); 1767 PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL)); 1768 PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs)); 1769 PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree)); 1770 PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree)); 1771 for (PetscInt i = 0, c = 0; i < nr; i++) { 1772 mmask[c] = 1; 1773 c += degree[i]; 1774 } 1775 PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1776 PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1777 PetscCall(VecGetArray(v, &array)); 1778 for (PetscInt i = 0; i < n; i++) { 1779 array[i] *= mask[i]; 1780 idxs[i] = i; 1781 } 1782 PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES)); 1783 PetscCall(VecRestoreArray(v, &array)); 1784 PetscCall(PetscFree3(mmask, mask, idxs)); 1785 PetscCall(VecDestroy(&v)); 1786 PetscCall(VecAssemblyBegin(quad_vec)); 1787 PetscCall(VecAssemblyEnd(quad_vec)); 1788 PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view")); 1789 PetscCall(VecLockReadPush(quad_vec)); 1790 PetscCall(VecDestroy(&quad_vec)); 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1795 { 1796 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1797 1798 PetscFunctionBegin; 1799 if (primalv) { 1800 if (pcbddc->user_primal_vertices_local) { 1801 IS list[2], newp; 1802 1803 list[0] = primalv; 1804 list[1] = pcbddc->user_primal_vertices_local; 1805 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1806 PetscCall(ISSortRemoveDups(newp)); 1807 PetscCall(ISDestroy(&list[1])); 1808 pcbddc->user_primal_vertices_local = newp; 1809 } else { 1810 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1811 } 1812 } 1813 PetscFunctionReturn(PETSC_SUCCESS); 1814 } 1815 1816 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1817 { 1818 PetscInt f, *comp = (PetscInt *)ctx; 1819 1820 PetscFunctionBegin; 1821 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1822 PetscFunctionReturn(PETSC_SUCCESS); 1823 } 1824 1825 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1826 { 1827 Vec local, global; 1828 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1829 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1830 PetscBool monolithic = PETSC_FALSE; 1831 1832 PetscFunctionBegin; 1833 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1834 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1835 PetscOptionsEnd(); 1836 /* need to convert from global to local topology information and remove references to information in global ordering */ 1837 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1838 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1839 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1840 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1841 if (monolithic) { /* just get block size to properly compute vertices */ 1842 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1843 goto boundary; 1844 } 1845 1846 if (pcbddc->user_provided_isfordofs) { 1847 if (pcbddc->n_ISForDofs) { 1848 PetscInt i; 1849 1850 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1851 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1852 PetscInt bs; 1853 1854 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1855 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1856 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1857 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1858 } 1859 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1860 pcbddc->n_ISForDofs = 0; 1861 PetscCall(PetscFree(pcbddc->ISForDofs)); 1862 } 1863 } else { 1864 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1865 DM dm; 1866 1867 PetscCall(MatGetDM(pc->pmat, &dm)); 1868 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1869 if (dm) { 1870 IS *fields; 1871 PetscInt nf, i; 1872 1873 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1874 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1875 for (i = 0; i < nf; i++) { 1876 PetscInt bs; 1877 1878 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1879 PetscCall(ISGetBlockSize(fields[i], &bs)); 1880 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1881 PetscCall(ISDestroy(&fields[i])); 1882 } 1883 PetscCall(PetscFree(fields)); 1884 pcbddc->n_ISForDofsLocal = nf; 1885 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1886 PetscContainer c; 1887 1888 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1889 if (c) { 1890 MatISLocalFields lf; 1891 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1892 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1893 } else { /* fallback, create the default fields if bs > 1 */ 1894 PetscInt i, n = matis->A->rmap->n; 1895 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1896 if (i > 1) { 1897 pcbddc->n_ISForDofsLocal = i; 1898 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1899 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1900 } 1901 } 1902 } 1903 } else { 1904 PetscInt i; 1905 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1906 } 1907 } 1908 1909 boundary: 1910 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1911 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1912 } else if (pcbddc->DirichletBoundariesLocal) { 1913 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1914 } 1915 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1916 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1917 } else if (pcbddc->NeumannBoundariesLocal) { 1918 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1919 } 1920 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1921 PetscCall(VecDestroy(&global)); 1922 PetscCall(VecDestroy(&local)); 1923 /* detect local disconnected subdomains if requested or needed */ 1924 if (pcbddc->detect_disconnected || matis->allow_repeated) { 1925 IS primalv = NULL; 1926 PetscInt nel; 1927 PetscBool filter = pcbddc->detect_disconnected_filter; 1928 1929 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1930 PetscCall(PetscFree(pcbddc->local_subs)); 1931 PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL)); 1932 if (matis->allow_repeated && nel) { 1933 const PetscInt *elsizes; 1934 1935 pcbddc->n_local_subs = nel; 1936 PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes)); 1937 PetscCall(PetscMalloc1(nel, &pcbddc->local_subs)); 1938 for (PetscInt i = 0, c = 0; i < nel; i++) { 1939 PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i])); 1940 c += elsizes[i]; 1941 } 1942 } else { 1943 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1944 } 1945 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1946 PetscCall(ISDestroy(&primalv)); 1947 } 1948 /* early stage corner detection */ 1949 { 1950 DM dm; 1951 1952 PetscCall(MatGetDM(pc->pmat, &dm)); 1953 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1954 if (dm) { 1955 PetscBool isda; 1956 1957 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1958 if (isda) { 1959 ISLocalToGlobalMapping l2l; 1960 IS corners; 1961 Mat lA; 1962 PetscBool gl, lo; 1963 1964 { 1965 Vec cvec; 1966 const PetscScalar *coords; 1967 PetscInt dof, n, cdim; 1968 PetscBool memc = PETSC_TRUE; 1969 1970 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1971 PetscCall(DMGetCoordinates(dm, &cvec)); 1972 PetscCall(VecGetLocalSize(cvec, &n)); 1973 PetscCall(VecGetBlockSize(cvec, &cdim)); 1974 n /= cdim; 1975 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1976 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1977 PetscCall(VecGetArrayRead(cvec, &coords)); 1978 #if defined(PETSC_USE_COMPLEX) 1979 memc = PETSC_FALSE; 1980 #endif 1981 if (dof != 1) memc = PETSC_FALSE; 1982 if (memc) { 1983 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1984 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1985 PetscReal *bcoords = pcbddc->mat_graph->coords; 1986 PetscInt i, b, d; 1987 1988 for (i = 0; i < n; i++) { 1989 for (b = 0; b < dof; b++) { 1990 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1991 } 1992 } 1993 } 1994 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1995 pcbddc->mat_graph->cdim = cdim; 1996 pcbddc->mat_graph->cnloc = dof * n; 1997 pcbddc->mat_graph->cloc = PETSC_FALSE; 1998 } 1999 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 2000 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 2001 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 2002 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 2003 lo = (PetscBool)(l2l && corners); 2004 PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2005 if (gl) { /* From PETSc's DMDA */ 2006 const PetscInt *idx; 2007 PetscInt dof, bs, *idxout, n; 2008 2009 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 2010 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 2011 PetscCall(ISGetLocalSize(corners, &n)); 2012 PetscCall(ISGetIndices(corners, &idx)); 2013 if (bs == dof) { 2014 PetscCall(PetscMalloc1(n, &idxout)); 2015 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 2016 } else { /* the original DMDA local-to-local map have been modified */ 2017 PetscInt i, d; 2018 2019 PetscCall(PetscMalloc1(dof * n, &idxout)); 2020 for (i = 0; i < n; i++) 2021 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 2022 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 2023 2024 bs = 1; 2025 n *= dof; 2026 } 2027 PetscCall(ISRestoreIndices(corners, &idx)); 2028 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 2029 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 2030 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 2031 PetscCall(ISDestroy(&corners)); 2032 pcbddc->corner_selected = PETSC_TRUE; 2033 pcbddc->corner_selection = PETSC_TRUE; 2034 } 2035 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 2036 } 2037 } 2038 } 2039 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 2040 DM dm; 2041 2042 PetscCall(MatGetDM(pc->pmat, &dm)); 2043 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2044 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 2045 Vec vcoords; 2046 PetscSection section; 2047 PetscReal *coords; 2048 PetscInt d, cdim, nl, nf, **ctxs; 2049 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 2050 /* debug coordinates */ 2051 PetscViewer viewer; 2052 PetscBool flg; 2053 PetscViewerFormat format; 2054 const char *prefix; 2055 2056 PetscCall(DMGetCoordinateDim(dm, &cdim)); 2057 PetscCall(DMGetLocalSection(dm, §ion)); 2058 PetscCall(PetscSectionGetNumFields(section, &nf)); 2059 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 2060 PetscCall(VecGetLocalSize(vcoords, &nl)); 2061 PetscCall(PetscMalloc1(nl * cdim, &coords)); 2062 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 2063 PetscCall(PetscMalloc1(nf, &ctxs[0])); 2064 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 2065 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 2066 2067 /* debug coordinates */ 2068 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 2069 PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 2070 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 2071 for (d = 0; d < cdim; d++) { 2072 PetscInt i; 2073 const PetscScalar *v; 2074 char name[16]; 2075 2076 for (i = 0; i < nf; i++) ctxs[i][0] = d; 2077 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d)); 2078 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 2079 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 2080 if (flg) PetscCall(VecView(vcoords, viewer)); 2081 PetscCall(VecGetArrayRead(vcoords, &v)); 2082 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 2083 PetscCall(VecRestoreArrayRead(vcoords, &v)); 2084 } 2085 PetscCall(VecDestroy(&vcoords)); 2086 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 2087 PetscCall(PetscFree(coords)); 2088 PetscCall(PetscFree(ctxs[0])); 2089 PetscCall(PetscFree2(funcs, ctxs)); 2090 if (flg) { 2091 PetscCall(PetscViewerPopFormat(viewer)); 2092 PetscCall(PetscViewerDestroy(&viewer)); 2093 } 2094 } 2095 } 2096 PetscFunctionReturn(PETSC_SUCCESS); 2097 } 2098 2099 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 2100 { 2101 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2102 IS nis; 2103 const PetscInt *idxs; 2104 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 2105 2106 PetscFunctionBegin; 2107 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 2108 if (mop == MPI_LAND) { 2109 /* init rootdata with true */ 2110 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 2111 } else { 2112 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 2113 } 2114 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 2115 PetscCall(ISGetLocalSize(*is, &nd)); 2116 PetscCall(ISGetIndices(*is, &idxs)); 2117 for (i = 0; i < nd; i++) 2118 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 2119 PetscCall(ISRestoreIndices(*is, &idxs)); 2120 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2121 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2122 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2123 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2124 if (mop == MPI_LAND) { 2125 PetscCall(PetscMalloc1(nd, &nidxs)); 2126 } else { 2127 PetscCall(PetscMalloc1(n, &nidxs)); 2128 } 2129 for (i = 0, nnd = 0; i < n; i++) 2130 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 2131 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 2132 PetscCall(ISDestroy(is)); 2133 *is = nis; 2134 PetscFunctionReturn(PETSC_SUCCESS); 2135 } 2136 2137 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 2138 { 2139 PC_IS *pcis = (PC_IS *)pc->data; 2140 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2141 2142 PetscFunctionBegin; 2143 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 2144 if (pcbddc->ChangeOfBasisMatrix) { 2145 Vec swap; 2146 2147 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 2148 swap = pcbddc->work_change; 2149 pcbddc->work_change = r; 2150 r = swap; 2151 } 2152 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2153 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2154 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2155 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 2156 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2157 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 2158 PetscCall(VecSet(z, 0.)); 2159 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2160 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2161 if (pcbddc->ChangeOfBasisMatrix) { 2162 pcbddc->work_change = r; 2163 PetscCall(VecCopy(z, pcbddc->work_change)); 2164 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 2165 } 2166 PetscFunctionReturn(PETSC_SUCCESS); 2167 } 2168 2169 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 2170 { 2171 PCBDDCBenignMatMult_ctx ctx; 2172 PetscBool apply_right, apply_left, reset_x; 2173 2174 PetscFunctionBegin; 2175 PetscCall(MatShellGetContext(A, &ctx)); 2176 if (transpose) { 2177 apply_right = ctx->apply_left; 2178 apply_left = ctx->apply_right; 2179 } else { 2180 apply_right = ctx->apply_right; 2181 apply_left = ctx->apply_left; 2182 } 2183 reset_x = PETSC_FALSE; 2184 if (apply_right) { 2185 const PetscScalar *ax; 2186 PetscInt nl, i; 2187 2188 PetscCall(VecGetLocalSize(x, &nl)); 2189 PetscCall(VecGetArrayRead(x, &ax)); 2190 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 2191 PetscCall(VecRestoreArrayRead(x, &ax)); 2192 for (i = 0; i < ctx->benign_n; i++) { 2193 PetscScalar sum, val; 2194 const PetscInt *idxs; 2195 PetscInt nz, j; 2196 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2197 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2198 sum = 0.; 2199 if (ctx->apply_p0) { 2200 val = ctx->work[idxs[nz - 1]]; 2201 for (j = 0; j < nz - 1; j++) { 2202 sum += ctx->work[idxs[j]]; 2203 ctx->work[idxs[j]] += val; 2204 } 2205 } else { 2206 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 2207 } 2208 ctx->work[idxs[nz - 1]] -= sum; 2209 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2210 } 2211 PetscCall(VecPlaceArray(x, ctx->work)); 2212 reset_x = PETSC_TRUE; 2213 } 2214 if (transpose) { 2215 PetscCall(MatMultTranspose(ctx->A, x, y)); 2216 } else { 2217 PetscCall(MatMult(ctx->A, x, y)); 2218 } 2219 if (reset_x) PetscCall(VecResetArray(x)); 2220 if (apply_left) { 2221 PetscScalar *ay; 2222 PetscInt i; 2223 2224 PetscCall(VecGetArray(y, &ay)); 2225 for (i = 0; i < ctx->benign_n; i++) { 2226 PetscScalar sum, val; 2227 const PetscInt *idxs; 2228 PetscInt nz, j; 2229 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2230 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2231 val = -ay[idxs[nz - 1]]; 2232 if (ctx->apply_p0) { 2233 sum = 0.; 2234 for (j = 0; j < nz - 1; j++) { 2235 sum += ay[idxs[j]]; 2236 ay[idxs[j]] += val; 2237 } 2238 ay[idxs[nz - 1]] += sum; 2239 } else { 2240 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 2241 ay[idxs[nz - 1]] = 0.; 2242 } 2243 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2244 } 2245 PetscCall(VecRestoreArray(y, &ay)); 2246 } 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } 2249 2250 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2251 { 2252 PetscFunctionBegin; 2253 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 2254 PetscFunctionReturn(PETSC_SUCCESS); 2255 } 2256 2257 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2258 { 2259 PetscFunctionBegin; 2260 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 2261 PetscFunctionReturn(PETSC_SUCCESS); 2262 } 2263 2264 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2265 { 2266 PC_IS *pcis = (PC_IS *)pc->data; 2267 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2268 PCBDDCBenignMatMult_ctx ctx; 2269 2270 PetscFunctionBegin; 2271 if (!restore) { 2272 Mat A_IB, A_BI; 2273 PetscScalar *work; 2274 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2275 2276 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 2277 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 2278 PetscCall(PetscMalloc1(pcis->n, &work)); 2279 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 2280 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 2281 PetscCall(MatSetType(A_IB, MATSHELL)); 2282 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (PetscErrorCodeFn *)PCBDDCBenignMatMult_Private)); 2283 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)PCBDDCBenignMatMultTranspose_Private)); 2284 PetscCall(PetscNew(&ctx)); 2285 PetscCall(MatShellSetContext(A_IB, ctx)); 2286 ctx->apply_left = PETSC_TRUE; 2287 ctx->apply_right = PETSC_FALSE; 2288 ctx->apply_p0 = PETSC_FALSE; 2289 ctx->benign_n = pcbddc->benign_n; 2290 if (reuse) { 2291 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2292 ctx->free = PETSC_FALSE; 2293 } else { /* TODO: could be optimized for successive solves */ 2294 ISLocalToGlobalMapping N_to_D; 2295 PetscInt i; 2296 2297 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 2298 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 2299 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 2300 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2301 ctx->free = PETSC_TRUE; 2302 } 2303 ctx->A = pcis->A_IB; 2304 ctx->work = work; 2305 PetscCall(MatSetUp(A_IB)); 2306 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2307 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2308 pcis->A_IB = A_IB; 2309 2310 /* A_BI as A_IB^T */ 2311 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2312 pcbddc->benign_original_mat = pcis->A_BI; 2313 pcis->A_BI = A_BI; 2314 } else { 2315 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2316 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2317 PetscCall(MatDestroy(&pcis->A_IB)); 2318 pcis->A_IB = ctx->A; 2319 ctx->A = NULL; 2320 PetscCall(MatDestroy(&pcis->A_BI)); 2321 pcis->A_BI = pcbddc->benign_original_mat; 2322 pcbddc->benign_original_mat = NULL; 2323 if (ctx->free) { 2324 PetscInt i; 2325 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2326 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2327 } 2328 PetscCall(PetscFree(ctx->work)); 2329 PetscCall(PetscFree(ctx)); 2330 } 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 /* used just in bddc debug mode */ 2335 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2336 { 2337 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2338 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2339 Mat An; 2340 2341 PetscFunctionBegin; 2342 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2343 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2344 if (is1) { 2345 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2346 PetscCall(MatDestroy(&An)); 2347 } else { 2348 *B = An; 2349 } 2350 PetscFunctionReturn(PETSC_SUCCESS); 2351 } 2352 2353 /* TODO: add reuse flag */ 2354 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2355 { 2356 Mat Bt; 2357 PetscScalar *a, *bdata; 2358 const PetscInt *ii, *ij; 2359 PetscInt m, n, i, nnz, *bii, *bij; 2360 PetscBool flg_row; 2361 2362 PetscFunctionBegin; 2363 PetscCall(MatGetSize(A, &n, &m)); 2364 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2365 PetscCall(MatSeqAIJGetArray(A, &a)); 2366 nnz = n; 2367 for (i = 0; i < ii[n]; i++) { 2368 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2369 } 2370 PetscCall(PetscMalloc1(n + 1, &bii)); 2371 PetscCall(PetscMalloc1(nnz, &bij)); 2372 PetscCall(PetscMalloc1(nnz, &bdata)); 2373 nnz = 0; 2374 bii[0] = 0; 2375 for (i = 0; i < n; i++) { 2376 PetscInt j; 2377 for (j = ii[i]; j < ii[i + 1]; j++) { 2378 PetscScalar entry = a[j]; 2379 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2380 bij[nnz] = ij[j]; 2381 bdata[nnz] = entry; 2382 nnz++; 2383 } 2384 } 2385 bii[i + 1] = nnz; 2386 } 2387 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2388 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2389 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2390 { 2391 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data; 2392 b->free_a = PETSC_TRUE; 2393 b->free_ij = PETSC_TRUE; 2394 } 2395 if (*B == A) PetscCall(MatDestroy(&A)); 2396 *B = Bt; 2397 PetscFunctionReturn(PETSC_SUCCESS); 2398 } 2399 2400 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2401 { 2402 Mat B = NULL; 2403 DM dm; 2404 IS is_dummy, *cc_n; 2405 ISLocalToGlobalMapping l2gmap_dummy; 2406 PCBDDCGraph graph; 2407 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2408 PetscInt i, n; 2409 PetscInt *xadj, *adjncy; 2410 PetscBool isplex = PETSC_FALSE; 2411 2412 PetscFunctionBegin; 2413 if (ncc) *ncc = 0; 2414 if (cc) *cc = NULL; 2415 if (primalv) *primalv = NULL; 2416 PetscCall(PCBDDCGraphCreate(&graph)); 2417 PetscCall(MatGetDM(pc->pmat, &dm)); 2418 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2419 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2420 if (filter) isplex = PETSC_FALSE; 2421 2422 if (isplex) { /* this code has been modified from plexpartition.c */ 2423 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2424 PetscInt *adj = NULL; 2425 IS cellNumbering; 2426 const PetscInt *cellNum; 2427 PetscBool useCone, useClosure; 2428 PetscSection section; 2429 PetscSegBuffer adjBuffer; 2430 PetscSF sfPoint; 2431 2432 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2433 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2434 PetscCall(DMGetPointSF(dm, &sfPoint)); 2435 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2436 /* Build adjacency graph via a section/segbuffer */ 2437 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2438 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2439 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2440 /* Always use FVM adjacency to create partitioner graph */ 2441 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2442 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2443 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2444 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2445 for (n = 0, p = pStart; p < pEnd; p++) { 2446 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2447 if (nroots > 0) { 2448 if (cellNum[p] < 0) continue; 2449 } 2450 adjSize = PETSC_DETERMINE; 2451 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2452 for (a = 0; a < adjSize; ++a) { 2453 const PetscInt point = adj[a]; 2454 if (pStart <= point && point < pEnd) { 2455 PetscInt *PETSC_RESTRICT pBuf; 2456 PetscCall(PetscSectionAddDof(section, p, 1)); 2457 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2458 *pBuf = point; 2459 } 2460 } 2461 n++; 2462 } 2463 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2464 /* Derive CSR graph from section/segbuffer */ 2465 PetscCall(PetscSectionSetUp(section)); 2466 PetscCall(PetscSectionGetStorageSize(section, &size)); 2467 PetscCall(PetscMalloc1(n + 1, &xadj)); 2468 for (idx = 0, p = pStart; p < pEnd; p++) { 2469 if (nroots > 0) { 2470 if (cellNum[p] < 0) continue; 2471 } 2472 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++])); 2473 } 2474 xadj[n] = size; 2475 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2476 /* Clean up */ 2477 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2478 PetscCall(PetscSectionDestroy(§ion)); 2479 PetscCall(PetscFree(adj)); 2480 graph->xadj = xadj; 2481 graph->adjncy = adjncy; 2482 } else { 2483 Mat A; 2484 PetscBool isseqaij, flg_row; 2485 2486 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2487 if (!A->rmap->N || !A->cmap->N) { 2488 PetscCall(PCBDDCGraphDestroy(&graph)); 2489 PetscFunctionReturn(PETSC_SUCCESS); 2490 } 2491 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2492 if (!isseqaij && filter) { 2493 PetscBool isseqdense; 2494 2495 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2496 if (!isseqdense) { 2497 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2498 } else { /* TODO: rectangular case and LDA */ 2499 PetscScalar *array; 2500 PetscReal chop = 1.e-6; 2501 2502 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2503 PetscCall(MatDenseGetArray(B, &array)); 2504 PetscCall(MatGetSize(B, &n, NULL)); 2505 for (i = 0; i < n; i++) { 2506 PetscInt j; 2507 for (j = i + 1; j < n; j++) { 2508 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2509 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2510 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2511 } 2512 } 2513 PetscCall(MatDenseRestoreArray(B, &array)); 2514 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2515 } 2516 } else { 2517 PetscCall(PetscObjectReference((PetscObject)A)); 2518 B = A; 2519 } 2520 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2521 2522 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2523 if (filter) { 2524 PetscScalar *data; 2525 PetscInt j, cum; 2526 2527 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2528 PetscCall(MatSeqAIJGetArray(B, &data)); 2529 cum = 0; 2530 for (i = 0; i < n; i++) { 2531 PetscInt t; 2532 2533 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2534 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2535 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2536 } 2537 t = xadj_filtered[i]; 2538 xadj_filtered[i] = cum; 2539 cum += t; 2540 } 2541 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2542 graph->xadj = xadj_filtered; 2543 graph->adjncy = adjncy_filtered; 2544 } else { 2545 graph->xadj = xadj; 2546 graph->adjncy = adjncy; 2547 } 2548 } 2549 /* compute local connected components using PCBDDCGraph */ 2550 graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */ 2551 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2552 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2553 PetscCall(ISDestroy(&is_dummy)); 2554 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX)); 2555 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2556 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2557 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2558 2559 /* partial clean up */ 2560 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2561 if (B) { 2562 PetscBool flg_row; 2563 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2564 PetscCall(MatDestroy(&B)); 2565 } 2566 if (isplex) { 2567 PetscCall(PetscFree(xadj)); 2568 PetscCall(PetscFree(adjncy)); 2569 } 2570 2571 /* get back data */ 2572 if (isplex) { 2573 if (ncc) *ncc = graph->ncc; 2574 if (cc || primalv) { 2575 Mat A; 2576 PetscBT btv, btvt, btvc; 2577 PetscSection subSection; 2578 PetscInt *ids, cum, cump, *cids, *pids; 2579 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2580 2581 PetscCall(DMGetDimension(dm, &dim)); 2582 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2583 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2584 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2585 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2586 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2587 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2588 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2589 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2590 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2591 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2592 2593 /* First see if we find corners for the subdomains, i.e. a vertex 2594 shared by at least dim subdomain boundary faces. This does not 2595 cover all the possible cases with simplices but it is enough 2596 for tensor cells */ 2597 if (vStart != fStart && dim <= 3) { 2598 for (PetscInt c = cStart; c < cEnd; c++) { 2599 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2600 const PetscInt *faces; 2601 2602 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2603 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2604 PetscCall(DMPlexGetCone(dm, c, &faces)); 2605 for (PetscInt f = 0; f < nf; f++) { 2606 PetscInt nc, ff; 2607 2608 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2609 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2610 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2611 } 2612 if (cnt >= mcnt) { 2613 PetscInt size, *closure = NULL; 2614 2615 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2616 for (PetscInt k = 0; k < 2 * size; k += 2) { 2617 PetscInt v = closure[k]; 2618 if (v >= vStart && v < vEnd) { 2619 PetscInt vsize, *vclosure = NULL; 2620 2621 cnt = 0; 2622 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2623 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2624 PetscInt f = vclosure[vk]; 2625 if (f >= fStart && f < fEnd) { 2626 PetscInt nc, ff; 2627 PetscBool valid = PETSC_FALSE; 2628 2629 for (PetscInt fk = 0; fk < nf; fk++) 2630 if (f == cfaces[fk]) valid = PETSC_TRUE; 2631 if (!valid) continue; 2632 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2633 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2634 if (nc == 1 && f == ff) cnt++; 2635 } 2636 } 2637 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2638 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2639 } 2640 } 2641 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2642 } 2643 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2644 } 2645 } 2646 2647 cids[0] = 0; 2648 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2649 PetscInt j; 2650 2651 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2652 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2653 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2654 2655 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2656 for (k = 0; k < 2 * size; k += 2) { 2657 PetscInt s, pp, p = closure[k], off, dof, cdof; 2658 2659 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2660 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2661 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2662 for (s = 0; s < dof - cdof; s++) { 2663 if (PetscBTLookupSet(btvt, off + s)) continue; 2664 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2665 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2666 else pids[cump++] = off + s; /* cross-vertex */ 2667 } 2668 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2669 if (pp != p) { 2670 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2671 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2672 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2673 for (s = 0; s < dof - cdof; s++) { 2674 if (PetscBTLookupSet(btvt, off + s)) continue; 2675 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2676 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2677 else pids[cump++] = off + s; /* cross-vertex */ 2678 } 2679 } 2680 } 2681 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2682 } 2683 cids[i + 1] = cum; 2684 /* mark dofs as already assigned */ 2685 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2686 } 2687 if (cc) { 2688 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2689 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2690 *cc = cc_n; 2691 } 2692 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2693 PetscCall(PetscFree3(ids, cids, pids)); 2694 PetscCall(PetscBTDestroy(&btv)); 2695 PetscCall(PetscBTDestroy(&btvt)); 2696 PetscCall(PetscBTDestroy(&btvc)); 2697 PetscCall(DMDestroy(&dm)); 2698 } 2699 } else { 2700 if (ncc) *ncc = graph->ncc; 2701 if (cc) { 2702 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2703 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2704 *cc = cc_n; 2705 } 2706 } 2707 /* clean up graph */ 2708 graph->xadj = NULL; 2709 graph->adjncy = NULL; 2710 PetscCall(PCBDDCGraphDestroy(&graph)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2715 { 2716 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2717 PC_IS *pcis = (PC_IS *)pc->data; 2718 IS dirIS = NULL; 2719 PetscInt i; 2720 2721 PetscFunctionBegin; 2722 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2723 if (zerodiag) { 2724 Mat A; 2725 Vec vec3_N; 2726 PetscScalar *vals; 2727 const PetscInt *idxs; 2728 PetscInt nz, *count; 2729 2730 /* p0 */ 2731 PetscCall(VecSet(pcis->vec1_N, 0.)); 2732 PetscCall(PetscMalloc1(pcis->n, &vals)); 2733 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2734 PetscCall(ISGetIndices(zerodiag, &idxs)); 2735 for (i = 0; i < nz; i++) vals[i] = 1.; 2736 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2737 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2738 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2739 /* v_I */ 2740 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2741 for (i = 0; i < nz; i++) vals[i] = 0.; 2742 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2743 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2744 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2745 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2746 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2747 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2748 if (dirIS) { 2749 PetscInt n; 2750 2751 PetscCall(ISGetLocalSize(dirIS, &n)); 2752 PetscCall(ISGetIndices(dirIS, &idxs)); 2753 for (i = 0; i < n; i++) vals[i] = 0.; 2754 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2755 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2756 } 2757 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2758 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2759 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2760 PetscCall(VecSet(vec3_N, 0.)); 2761 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2762 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2763 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2764 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2765 PetscCall(PetscFree(vals)); 2766 PetscCall(VecDestroy(&vec3_N)); 2767 2768 /* there should not be any pressure dofs lying on the interface */ 2769 PetscCall(PetscCalloc1(pcis->n, &count)); 2770 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2771 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2772 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2773 PetscCall(ISGetIndices(zerodiag, &idxs)); 2774 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2775 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2776 PetscCall(PetscFree(count)); 2777 } 2778 PetscCall(ISDestroy(&dirIS)); 2779 2780 /* check PCBDDCBenignGetOrSetP0 */ 2781 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2782 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2783 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2784 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2785 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2786 for (i = 0; i < pcbddc->benign_n; i++) { 2787 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2788 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2789 } 2790 PetscFunctionReturn(PETSC_SUCCESS); 2791 } 2792 2793 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2794 { 2795 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2796 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2797 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2798 PetscInt nz, n, benign_n, bsp = 1; 2799 PetscInt *interior_dofs, n_interior_dofs, nneu; 2800 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2801 2802 PetscFunctionBegin; 2803 if (reuse) goto project_b0; 2804 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2805 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2806 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2807 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2808 has_null_pressures = PETSC_TRUE; 2809 have_null = PETSC_TRUE; 2810 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2811 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2812 Checks if all the pressure dofs in each subdomain have a zero diagonal 2813 If not, a change of basis on pressures is not needed 2814 since the local Schur complements are already SPD 2815 */ 2816 if (pcbddc->n_ISForDofsLocal) { 2817 IS iP = NULL; 2818 PetscInt p, *pp; 2819 PetscBool flg, blocked = PETSC_FALSE; 2820 2821 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2822 n = pcbddc->n_ISForDofsLocal; 2823 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2824 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2825 PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL)); 2826 PetscOptionsEnd(); 2827 if (!flg) { 2828 n = 1; 2829 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2830 } 2831 2832 bsp = 0; 2833 for (p = 0; p < n; p++) { 2834 PetscInt bs = 1; 2835 2836 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2837 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2838 bsp += bs; 2839 } 2840 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2841 bsp = 0; 2842 for (p = 0; p < n; p++) { 2843 const PetscInt *idxs; 2844 PetscInt b, bs = 1, npl, *bidxs; 2845 2846 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2847 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2848 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2849 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2850 for (b = 0; b < bs; b++) { 2851 PetscInt i; 2852 2853 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2854 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2855 bsp++; 2856 } 2857 PetscCall(PetscFree(bidxs)); 2858 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2859 } 2860 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2861 2862 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2863 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2864 if (iP) { 2865 IS newpressures; 2866 2867 PetscCall(ISDifference(pressures, iP, &newpressures)); 2868 PetscCall(ISDestroy(&pressures)); 2869 pressures = newpressures; 2870 } 2871 PetscCall(ISSorted(pressures, &sorted)); 2872 if (!sorted) PetscCall(ISSort(pressures)); 2873 PetscCall(PetscFree(pp)); 2874 } 2875 2876 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2877 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2878 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2879 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2880 PetscCall(ISSorted(zerodiag, &sorted)); 2881 if (!sorted) PetscCall(ISSort(zerodiag)); 2882 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2883 zerodiag_save = zerodiag; 2884 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2885 if (!nz) { 2886 if (n) have_null = PETSC_FALSE; 2887 has_null_pressures = PETSC_FALSE; 2888 PetscCall(ISDestroy(&zerodiag)); 2889 } 2890 recompute_zerodiag = PETSC_FALSE; 2891 2892 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2893 zerodiag_subs = NULL; 2894 benign_n = 0; 2895 n_interior_dofs = 0; 2896 interior_dofs = NULL; 2897 nneu = 0; 2898 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2899 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2900 if (checkb) { /* need to compute interior nodes */ 2901 PetscInt n, i; 2902 PetscInt *count; 2903 ISLocalToGlobalMapping mapping; 2904 2905 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL)); 2906 PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL)); 2907 PetscCall(PetscMalloc1(n, &interior_dofs)); 2908 for (i = 0; i < n; i++) 2909 if (count[i] < 2) interior_dofs[n_interior_dofs++] = i; 2910 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL)); 2911 } 2912 if (has_null_pressures) { 2913 IS *subs; 2914 PetscInt nsubs, i, j, nl; 2915 const PetscInt *idxs; 2916 PetscScalar *array; 2917 Vec *work; 2918 2919 subs = pcbddc->local_subs; 2920 nsubs = pcbddc->n_local_subs; 2921 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2922 if (checkb) { 2923 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2924 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2925 PetscCall(ISGetIndices(zerodiag, &idxs)); 2926 /* work[0] = 1_p */ 2927 PetscCall(VecSet(work[0], 0.)); 2928 PetscCall(VecGetArray(work[0], &array)); 2929 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2930 PetscCall(VecRestoreArray(work[0], &array)); 2931 /* work[0] = 1_v */ 2932 PetscCall(VecSet(work[1], 1.)); 2933 PetscCall(VecGetArray(work[1], &array)); 2934 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2935 PetscCall(VecRestoreArray(work[1], &array)); 2936 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2937 } 2938 2939 if (nsubs > 1 || bsp > 1) { 2940 IS *is; 2941 PetscInt b, totb; 2942 2943 totb = bsp; 2944 is = bsp > 1 ? bzerodiag : &zerodiag; 2945 nsubs = PetscMax(nsubs, 1); 2946 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2947 for (b = 0; b < totb; b++) { 2948 for (i = 0; i < nsubs; i++) { 2949 ISLocalToGlobalMapping l2g; 2950 IS t_zerodiag_subs; 2951 PetscInt nl; 2952 2953 if (subs) { 2954 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2955 } else { 2956 IS tis; 2957 2958 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2959 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2960 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2961 PetscCall(ISDestroy(&tis)); 2962 } 2963 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2964 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2965 if (nl) { 2966 PetscBool valid = PETSC_TRUE; 2967 2968 if (checkb) { 2969 PetscCall(VecSet(matis->x, 0)); 2970 PetscCall(ISGetLocalSize(subs[i], &nl)); 2971 PetscCall(ISGetIndices(subs[i], &idxs)); 2972 PetscCall(VecGetArray(matis->x, &array)); 2973 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2974 PetscCall(VecRestoreArray(matis->x, &array)); 2975 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2976 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2977 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2978 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2979 PetscCall(VecGetArray(matis->y, &array)); 2980 for (j = 0; j < n_interior_dofs; j++) { 2981 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2982 valid = PETSC_FALSE; 2983 break; 2984 } 2985 } 2986 PetscCall(VecRestoreArray(matis->y, &array)); 2987 } 2988 if (valid && nneu) { 2989 const PetscInt *idxs; 2990 PetscInt nzb; 2991 2992 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2993 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2994 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2995 if (nzb) valid = PETSC_FALSE; 2996 } 2997 if (valid && pressures) { 2998 IS t_pressure_subs, tmp; 2999 PetscInt i1, i2; 3000 3001 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 3002 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 3003 PetscCall(ISGetLocalSize(tmp, &i1)); 3004 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 3005 if (i2 != i1) valid = PETSC_FALSE; 3006 PetscCall(ISDestroy(&t_pressure_subs)); 3007 PetscCall(ISDestroy(&tmp)); 3008 } 3009 if (valid) { 3010 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 3011 benign_n++; 3012 } else recompute_zerodiag = PETSC_TRUE; 3013 } 3014 PetscCall(ISDestroy(&t_zerodiag_subs)); 3015 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 3016 } 3017 } 3018 } else { /* there's just one subdomain (or zero if they have not been detected */ 3019 PetscBool valid = PETSC_TRUE; 3020 3021 if (nneu) valid = PETSC_FALSE; 3022 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 3023 if (valid && checkb) { 3024 PetscCall(MatMult(matis->A, work[0], matis->x)); 3025 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 3026 PetscCall(VecGetArray(matis->x, &array)); 3027 for (j = 0; j < n_interior_dofs; j++) { 3028 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 3029 valid = PETSC_FALSE; 3030 break; 3031 } 3032 } 3033 PetscCall(VecRestoreArray(matis->x, &array)); 3034 } 3035 if (valid) { 3036 benign_n = 1; 3037 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 3038 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 3039 zerodiag_subs[0] = zerodiag; 3040 } 3041 } 3042 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 3043 } 3044 PetscCall(PetscFree(interior_dofs)); 3045 3046 if (!benign_n) { 3047 PetscInt n; 3048 3049 PetscCall(ISDestroy(&zerodiag)); 3050 recompute_zerodiag = PETSC_FALSE; 3051 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3052 if (n) have_null = PETSC_FALSE; 3053 } 3054 3055 /* final check for null pressures */ 3056 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 3057 3058 if (recompute_zerodiag) { 3059 PetscCall(ISDestroy(&zerodiag)); 3060 if (benign_n == 1) { 3061 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 3062 zerodiag = zerodiag_subs[0]; 3063 } else { 3064 PetscInt i, nzn, *new_idxs; 3065 3066 nzn = 0; 3067 for (i = 0; i < benign_n; i++) { 3068 PetscInt ns; 3069 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 3070 nzn += ns; 3071 } 3072 PetscCall(PetscMalloc1(nzn, &new_idxs)); 3073 nzn = 0; 3074 for (i = 0; i < benign_n; i++) { 3075 PetscInt ns, *idxs; 3076 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 3077 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 3078 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 3079 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 3080 nzn += ns; 3081 } 3082 PetscCall(PetscSortInt(nzn, new_idxs)); 3083 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 3084 } 3085 have_null = PETSC_FALSE; 3086 } 3087 3088 /* determines if the coarse solver will be singular or not */ 3089 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 3090 3091 /* Prepare matrix to compute no-net-flux */ 3092 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 3093 Mat A, loc_divudotp; 3094 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 3095 IS row, col, isused = NULL; 3096 PetscInt M, N, n, st, n_isused; 3097 3098 if (pressures) { 3099 isused = pressures; 3100 } else { 3101 isused = zerodiag_save; 3102 } 3103 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 3104 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 3105 PetscCall(MatGetLocalSize(A, &n, NULL)); 3106 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 3107 n_isused = 0; 3108 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 3109 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 3110 st = st - n_isused; 3111 if (n) { 3112 const PetscInt *gidxs; 3113 3114 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 3115 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 3116 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 3117 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3118 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 3119 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 3120 } else { 3121 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 3122 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3123 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 3124 } 3125 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 3126 PetscCall(ISGetSize(row, &M)); 3127 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 3128 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 3129 PetscCall(ISDestroy(&row)); 3130 PetscCall(ISDestroy(&col)); 3131 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 3132 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 3133 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3134 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 3135 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 3136 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 3137 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 3138 PetscCall(MatDestroy(&loc_divudotp)); 3139 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3140 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3141 } 3142 PetscCall(ISDestroy(&zerodiag_save)); 3143 PetscCall(ISDestroy(&pressures)); 3144 if (bzerodiag) { 3145 PetscInt i; 3146 3147 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 3148 PetscCall(PetscFree(bzerodiag)); 3149 } 3150 pcbddc->benign_n = benign_n; 3151 pcbddc->benign_zerodiag_subs = zerodiag_subs; 3152 3153 /* determines if the problem has subdomains with 0 pressure block */ 3154 have_null = (PetscBool)(!!pcbddc->benign_n); 3155 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 3156 3157 project_b0: 3158 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3159 /* change of basis and p0 dofs */ 3160 if (pcbddc->benign_n) { 3161 PetscInt i, s, *nnz; 3162 3163 /* local change of basis for pressures */ 3164 PetscCall(MatDestroy(&pcbddc->benign_change)); 3165 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 3166 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 3167 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 3168 PetscCall(PetscMalloc1(n, &nnz)); 3169 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 3170 for (i = 0; i < pcbddc->benign_n; i++) { 3171 const PetscInt *idxs; 3172 PetscInt nzs, j; 3173 3174 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 3175 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3176 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 3177 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 3178 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3179 } 3180 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 3181 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3182 PetscCall(PetscFree(nnz)); 3183 /* set identity by default */ 3184 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 3185 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3186 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 3187 /* set change on pressures */ 3188 for (s = 0; s < pcbddc->benign_n; s++) { 3189 PetscScalar *array; 3190 const PetscInt *idxs; 3191 PetscInt nzs; 3192 3193 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 3194 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3195 for (i = 0; i < nzs - 1; i++) { 3196 PetscScalar vals[2]; 3197 PetscInt cols[2]; 3198 3199 cols[0] = idxs[i]; 3200 cols[1] = idxs[nzs - 1]; 3201 vals[0] = 1.; 3202 vals[1] = 1.; 3203 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 3204 } 3205 PetscCall(PetscMalloc1(nzs, &array)); 3206 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 3207 array[nzs - 1] = 1.; 3208 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 3209 /* store local idxs for p0 */ 3210 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 3211 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3212 PetscCall(PetscFree(array)); 3213 } 3214 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3215 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3216 3217 /* project if needed */ 3218 if (pcbddc->benign_change_explicit) { 3219 Mat M; 3220 3221 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 3222 PetscCall(MatDestroy(&pcbddc->local_mat)); 3223 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 3224 PetscCall(MatDestroy(&M)); 3225 } 3226 /* store global idxs for p0 */ 3227 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 3228 } 3229 *zerodiaglocal = zerodiag; 3230 PetscFunctionReturn(PETSC_SUCCESS); 3231 } 3232 3233 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3234 { 3235 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3236 PetscScalar *array; 3237 3238 PetscFunctionBegin; 3239 if (!pcbddc->benign_sf) { 3240 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 3241 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 3242 } 3243 if (get) { 3244 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 3245 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3246 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3247 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 3248 } else { 3249 PetscCall(VecGetArray(v, &array)); 3250 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3251 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3252 PetscCall(VecRestoreArray(v, &array)); 3253 } 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3258 { 3259 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3260 3261 PetscFunctionBegin; 3262 /* TODO: add error checking 3263 - avoid nested pop (or push) calls. 3264 - cannot push before pop. 3265 - cannot call this if pcbddc->local_mat is NULL 3266 */ 3267 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 3268 if (pop) { 3269 if (pcbddc->benign_change_explicit) { 3270 IS is_p0; 3271 MatReuse reuse; 3272 3273 /* extract B_0 */ 3274 reuse = MAT_INITIAL_MATRIX; 3275 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 3276 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 3277 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 3278 /* remove rows and cols from local problem */ 3279 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 3280 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 3281 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 3282 PetscCall(ISDestroy(&is_p0)); 3283 } else { 3284 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 3285 PetscScalar *vals; 3286 PetscInt i, n, *idxs_ins; 3287 3288 PetscCall(VecGetLocalSize(matis->y, &n)); 3289 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 3290 if (!pcbddc->benign_B0) { 3291 PetscInt *nnz; 3292 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 3293 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 3294 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 3295 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 3296 for (i = 0; i < pcbddc->benign_n; i++) { 3297 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 3298 nnz[i] = n - nnz[i]; 3299 } 3300 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 3301 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3302 PetscCall(PetscFree(nnz)); 3303 } 3304 3305 for (i = 0; i < pcbddc->benign_n; i++) { 3306 PetscScalar *array; 3307 PetscInt *idxs, j, nz, cum; 3308 3309 PetscCall(VecSet(matis->x, 0.)); 3310 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3311 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3312 for (j = 0; j < nz; j++) vals[j] = 1.; 3313 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3314 PetscCall(VecAssemblyBegin(matis->x)); 3315 PetscCall(VecAssemblyEnd(matis->x)); 3316 PetscCall(VecSet(matis->y, 0.)); 3317 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3318 PetscCall(VecGetArray(matis->y, &array)); 3319 cum = 0; 3320 for (j = 0; j < n; j++) { 3321 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3322 vals[cum] = array[j]; 3323 idxs_ins[cum] = j; 3324 cum++; 3325 } 3326 } 3327 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3328 PetscCall(VecRestoreArray(matis->y, &array)); 3329 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3330 } 3331 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3332 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3333 PetscCall(PetscFree2(idxs_ins, vals)); 3334 } 3335 } else { /* push */ 3336 3337 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3338 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3339 PetscScalar *B0_vals; 3340 PetscInt *B0_cols, B0_ncol; 3341 3342 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3343 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3344 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3345 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3346 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3347 } 3348 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3349 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3350 } 3351 PetscFunctionReturn(PETSC_SUCCESS); 3352 } 3353 3354 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3355 { 3356 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3357 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3358 PetscBLASInt B_neigs, B_ierr, B_lwork; 3359 PetscBLASInt *B_iwork, *B_ifail; 3360 PetscScalar *work, lwork; 3361 PetscScalar *St, *S, *eigv; 3362 PetscScalar *Sarray, *Starray; 3363 PetscReal *eigs, thresh, lthresh, uthresh; 3364 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3365 PetscBool allocated_S_St, upart; 3366 #if defined(PETSC_USE_COMPLEX) 3367 PetscReal *rwork; 3368 #endif 3369 3370 PetscFunctionBegin; 3371 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3372 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3373 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3374 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3375 sub_schurs->is_posdef); 3376 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3377 3378 if (pcbddc->dbg_flag) { 3379 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3380 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3381 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3382 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3383 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3384 } 3385 3386 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3387 3388 /* max size of subsets */ 3389 mss = 0; 3390 for (i = 0; i < sub_schurs->n_subs; i++) { 3391 PetscInt subset_size; 3392 3393 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3394 mss = PetscMax(mss, subset_size); 3395 } 3396 3397 /* min/max and threshold */ 3398 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3399 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3400 nmax = PetscMax(nmin, nmax); 3401 allocated_S_St = PETSC_FALSE; 3402 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3403 allocated_S_St = PETSC_TRUE; 3404 } 3405 3406 /* allocate lapack workspace */ 3407 cum = cum2 = 0; 3408 maxneigs = 0; 3409 for (i = 0; i < sub_schurs->n_subs; i++) { 3410 PetscInt n, subset_size; 3411 3412 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3413 n = PetscMin(subset_size, nmax); 3414 cum += subset_size; 3415 cum2 += subset_size * n; 3416 maxneigs = PetscMax(maxneigs, n); 3417 } 3418 lwork = 0; 3419 if (mss) { 3420 PetscScalar sdummy = 0.; 3421 PetscBLASInt B_itype = 1; 3422 PetscBLASInt B_N, idummy = 0; 3423 PetscReal rdummy = 0., zero = 0.0; 3424 PetscReal eps = 0.0; /* dlamch? */ 3425 3426 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3427 PetscCall(PetscBLASIntCast(mss, &B_N)); 3428 B_lwork = -1; 3429 /* some implementations may complain about NULL pointers, even if we are querying */ 3430 S = &sdummy; 3431 St = &sdummy; 3432 eigs = &rdummy; 3433 eigv = &sdummy; 3434 B_iwork = &idummy; 3435 B_ifail = &idummy; 3436 #if defined(PETSC_USE_COMPLEX) 3437 rwork = &rdummy; 3438 #endif 3439 thresh = 1.0; 3440 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3441 #if defined(PETSC_USE_COMPLEX) 3442 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3443 #else 3444 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3445 #endif 3446 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr); 3447 PetscCall(PetscFPTrapPop()); 3448 } 3449 3450 nv = 0; 3451 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3452 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3453 } 3454 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3455 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3456 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3457 #if defined(PETSC_USE_COMPLEX) 3458 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3459 #endif 3460 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3461 &pcbddc->adaptive_constraints_data)); 3462 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3463 3464 maxneigs = 0; 3465 cum = cumarray = 0; 3466 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3467 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3468 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3469 const PetscInt *idxs; 3470 3471 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3472 for (cum = 0; cum < nv; cum++) { 3473 pcbddc->adaptive_constraints_n[cum] = 1; 3474 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3475 pcbddc->adaptive_constraints_data[cum] = 1.0; 3476 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3477 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3478 } 3479 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3480 } 3481 3482 if (mss) { /* multilevel */ 3483 if (sub_schurs->gdsw) { 3484 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3485 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3486 } else { 3487 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3488 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3489 } 3490 } 3491 3492 lthresh = pcbddc->adaptive_threshold[0]; 3493 uthresh = pcbddc->adaptive_threshold[1]; 3494 upart = pcbddc->use_deluxe_scaling; 3495 for (i = 0; i < sub_schurs->n_subs; i++) { 3496 const PetscInt *idxs; 3497 PetscReal upper, lower; 3498 PetscInt j, subset_size, eigs_start = 0; 3499 PetscBLASInt B_N; 3500 PetscBool same_data = PETSC_FALSE; 3501 PetscBool scal = PETSC_FALSE; 3502 3503 if (upart) { 3504 upper = PETSC_MAX_REAL; 3505 lower = uthresh; 3506 } else { 3507 if (sub_schurs->gdsw) { 3508 upper = uthresh; 3509 lower = PETSC_MIN_REAL; 3510 } else { 3511 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3512 upper = 1. / uthresh; 3513 lower = 0.; 3514 } 3515 } 3516 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3517 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3518 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3519 /* this is experimental: we assume the dofs have been properly grouped to have 3520 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3521 if (!sub_schurs->is_posdef) { 3522 Mat T; 3523 3524 for (j = 0; j < subset_size; j++) { 3525 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3526 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3527 PetscCall(MatScale(T, -1.0)); 3528 PetscCall(MatDestroy(&T)); 3529 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3530 PetscCall(MatScale(T, -1.0)); 3531 PetscCall(MatDestroy(&T)); 3532 if (sub_schurs->change_primal_sub) { 3533 PetscInt nz, k; 3534 const PetscInt *idxs; 3535 3536 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3537 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3538 for (k = 0; k < nz; k++) { 3539 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3540 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3541 } 3542 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3543 } 3544 scal = PETSC_TRUE; 3545 break; 3546 } 3547 } 3548 } 3549 3550 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3551 if (sub_schurs->is_symmetric) { 3552 PetscInt j, k; 3553 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3554 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3555 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3556 } 3557 for (j = 0; j < subset_size; j++) { 3558 for (k = j; k < subset_size; k++) { 3559 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3560 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3561 } 3562 } 3563 } else { 3564 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3565 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3566 } 3567 } else { 3568 S = Sarray + cumarray; 3569 St = Starray + cumarray; 3570 } 3571 /* see if we can save some work */ 3572 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3573 3574 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3575 B_neigs = 0; 3576 } else { 3577 PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0; 3578 PetscReal eps = -1.0; /* dlamch? */ 3579 PetscInt nmin_s; 3580 PetscBool compute_range; 3581 3582 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3583 B_neigs = 0; 3584 compute_range = (PetscBool)!same_data; 3585 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3586 3587 if (pcbddc->dbg_flag) { 3588 PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof; 3589 3590 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3591 PetscCall( 3592 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc)); 3593 } 3594 3595 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3596 if (compute_range) { 3597 /* ask for eigenvalues larger than thresh */ 3598 if (sub_schurs->is_posdef) { 3599 #if defined(PETSC_USE_COMPLEX) 3600 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3601 #else 3602 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3603 #endif 3604 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3605 } else { /* no theory so far, but it works nicely */ 3606 PetscInt recipe = 0, recipe_m = 1; 3607 PetscReal bb[2]; 3608 3609 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3610 switch (recipe) { 3611 case 0: 3612 if (scal) { 3613 bb[0] = PETSC_MIN_REAL; 3614 bb[1] = lthresh; 3615 } else { 3616 bb[0] = uthresh; 3617 bb[1] = PETSC_MAX_REAL; 3618 } 3619 #if defined(PETSC_USE_COMPLEX) 3620 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3621 #else 3622 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3623 #endif 3624 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3625 break; 3626 case 1: 3627 bb[0] = PETSC_MIN_REAL; 3628 bb[1] = lthresh * lthresh; 3629 #if defined(PETSC_USE_COMPLEX) 3630 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3631 #else 3632 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3633 #endif 3634 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3635 if (!scal) { 3636 PetscBLASInt B_neigs2 = 0; 3637 3638 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3639 bb[1] = PETSC_MAX_REAL; 3640 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3641 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3642 #if defined(PETSC_USE_COMPLEX) 3643 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3644 #else 3645 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3646 #endif 3647 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3648 B_neigs += B_neigs2; 3649 } 3650 break; 3651 case 2: 3652 if (scal) { 3653 bb[0] = PETSC_MIN_REAL; 3654 bb[1] = 0; 3655 #if defined(PETSC_USE_COMPLEX) 3656 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3657 #else 3658 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3659 #endif 3660 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3661 } else { 3662 PetscBLASInt B_neigs2 = 0; 3663 PetscBool do_copy = PETSC_FALSE; 3664 3665 lthresh = PetscMax(lthresh, 0.0); 3666 if (lthresh > 0.0) { 3667 bb[0] = PETSC_MIN_REAL; 3668 bb[1] = lthresh * lthresh; 3669 3670 do_copy = PETSC_TRUE; 3671 #if defined(PETSC_USE_COMPLEX) 3672 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3673 #else 3674 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3675 #endif 3676 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3677 } 3678 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3679 bb[1] = PETSC_MAX_REAL; 3680 if (do_copy) { 3681 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3682 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3683 } 3684 #if defined(PETSC_USE_COMPLEX) 3685 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3686 #else 3687 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3688 #endif 3689 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3690 B_neigs += B_neigs2; 3691 } 3692 break; 3693 case 3: 3694 if (scal) { 3695 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3696 } else { 3697 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3698 } 3699 if (!scal) { 3700 bb[0] = uthresh; 3701 bb[1] = PETSC_MAX_REAL; 3702 #if defined(PETSC_USE_COMPLEX) 3703 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3704 #else 3705 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3706 #endif 3707 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3708 } 3709 if (recipe_m > 0 && B_N - B_neigs > 0) { 3710 PetscBLASInt B_neigs2 = 0; 3711 3712 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3713 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3714 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3715 #if defined(PETSC_USE_COMPLEX) 3716 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3717 #else 3718 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3719 #endif 3720 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3721 B_neigs += B_neigs2; 3722 } 3723 break; 3724 case 4: 3725 bb[0] = PETSC_MIN_REAL; 3726 bb[1] = lthresh; 3727 #if defined(PETSC_USE_COMPLEX) 3728 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3729 #else 3730 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3731 #endif 3732 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3733 { 3734 PetscBLASInt B_neigs2 = 0; 3735 3736 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3737 bb[1] = PETSC_MAX_REAL; 3738 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3739 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3740 #if defined(PETSC_USE_COMPLEX) 3741 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3742 #else 3743 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3744 #endif 3745 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3746 B_neigs += B_neigs2; 3747 } 3748 break; 3749 case 5: /* same as before: first compute all eigenvalues, then filter */ 3750 #if defined(PETSC_USE_COMPLEX) 3751 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3752 #else 3753 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3754 #endif 3755 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3756 { 3757 PetscInt e, k, ne; 3758 for (e = 0, ne = 0; e < B_neigs; e++) { 3759 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3760 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3761 eigs[ne] = eigs[e]; 3762 ne++; 3763 } 3764 } 3765 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3766 PetscCall(PetscBLASIntCast(ne, &B_neigs)); 3767 } 3768 break; 3769 default: 3770 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3771 } 3772 } 3773 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3774 PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU)); 3775 #if defined(PETSC_USE_COMPLEX) 3776 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3777 #else 3778 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3779 #endif 3780 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3781 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3782 PetscInt k; 3783 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3784 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3785 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3786 nmin = nmax; 3787 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3788 for (k = 0; k < nmax; k++) { 3789 eigs[k] = 1. / PETSC_SMALL; 3790 eigv[k * (subset_size + 1)] = 1.0; 3791 } 3792 } 3793 PetscCall(PetscFPTrapPop()); 3794 if (B_ierr) { 3795 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3796 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3797 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3798 } 3799 3800 if (B_neigs > nmax) { 3801 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3802 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3803 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3804 } 3805 3806 nmin_s = PetscMin(nmin, B_N); 3807 if (B_neigs < nmin_s) { 3808 PetscBLASInt B_neigs2 = 0; 3809 3810 if (upart) { 3811 if (scal) { 3812 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3813 B_IL = B_neigs + 1; 3814 } else { 3815 PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL)); 3816 B_IU = B_N - B_neigs; 3817 } 3818 } else { 3819 B_IL = B_neigs + 1; 3820 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3821 } 3822 if (pcbddc->dbg_flag) { 3823 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3824 } 3825 if (sub_schurs->is_symmetric) { 3826 PetscInt j, k; 3827 for (j = 0; j < subset_size; j++) { 3828 for (k = j; k < subset_size; k++) { 3829 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3830 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3831 } 3832 } 3833 } else { 3834 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3835 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3836 } 3837 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3838 #if defined(PETSC_USE_COMPLEX) 3839 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3840 #else 3841 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3842 #endif 3843 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3844 PetscCall(PetscFPTrapPop()); 3845 B_neigs += B_neigs2; 3846 } 3847 if (B_ierr) { 3848 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3849 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3850 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3851 } 3852 if (pcbddc->dbg_flag) { 3853 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3854 for (j = 0; j < B_neigs; j++) { 3855 if (!sub_schurs->gdsw) { 3856 if (eigs[j] == 0.0) { 3857 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3858 } else { 3859 if (upart) { 3860 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3861 } else { 3862 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1 / eigs[j + eigs_start]))); 3863 } 3864 } 3865 } else { 3866 double pg = (double)eigs[j + eigs_start]; 3867 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3868 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3869 } 3870 } 3871 } 3872 } 3873 /* change the basis back to the original one */ 3874 if (sub_schurs->change) { 3875 Mat change, phi, phit; 3876 3877 if (pcbddc->dbg_flag > 2) { 3878 PetscInt ii; 3879 for (ii = 0; ii < B_neigs; ii++) { 3880 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3881 for (j = 0; j < B_N; j++) { 3882 #if defined(PETSC_USE_COMPLEX) 3883 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3884 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3885 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3886 #else 3887 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3888 #endif 3889 } 3890 } 3891 } 3892 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3893 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3894 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi)); 3895 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3896 PetscCall(MatDestroy(&phit)); 3897 PetscCall(MatDestroy(&phi)); 3898 } 3899 maxneigs = PetscMax(B_neigs, maxneigs); 3900 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3901 if (B_neigs) { 3902 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3903 3904 if (pcbddc->dbg_flag > 1) { 3905 PetscInt ii; 3906 for (ii = 0; ii < B_neigs; ii++) { 3907 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3908 for (j = 0; j < B_N; j++) { 3909 #if defined(PETSC_USE_COMPLEX) 3910 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3911 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3912 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3913 #else 3914 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3915 #endif 3916 } 3917 } 3918 } 3919 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3920 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3921 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3922 cum++; 3923 } 3924 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3925 /* shift for next computation */ 3926 cumarray += subset_size * subset_size; 3927 } 3928 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3929 3930 if (mss) { 3931 if (sub_schurs->gdsw) { 3932 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3933 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3934 } else { 3935 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3936 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3937 /* destroy matrices (junk) */ 3938 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3939 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3940 } 3941 } 3942 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3943 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3944 #if defined(PETSC_USE_COMPLEX) 3945 PetscCall(PetscFree(rwork)); 3946 #endif 3947 if (pcbddc->dbg_flag) { 3948 PetscInt maxneigs_r; 3949 PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3950 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3951 } 3952 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3953 PetscFunctionReturn(PETSC_SUCCESS); 3954 } 3955 3956 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3957 { 3958 Mat coarse_submat; 3959 3960 PetscFunctionBegin; 3961 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3962 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3963 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3964 3965 /* Setup local neumann solver ksp_R */ 3966 /* PCBDDCSetUpLocalScatters should be called first! */ 3967 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3968 3969 /* 3970 Setup local correction and local part of coarse basis. 3971 Gives back the dense local part of the coarse matrix in column major ordering 3972 */ 3973 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat)); 3974 3975 /* Compute total number of coarse nodes and setup coarse solver */ 3976 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat)); 3977 PetscCall(MatDestroy(&coarse_submat)); 3978 PetscFunctionReturn(PETSC_SUCCESS); 3979 } 3980 3981 PetscErrorCode PCBDDCResetCustomization(PC pc) 3982 { 3983 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3984 3985 PetscFunctionBegin; 3986 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3987 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3988 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3989 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3990 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3991 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3992 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3993 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3994 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3995 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3996 PetscFunctionReturn(PETSC_SUCCESS); 3997 } 3998 3999 PetscErrorCode PCBDDCResetTopography(PC pc) 4000 { 4001 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4002 PetscInt i; 4003 4004 PetscFunctionBegin; 4005 PetscCall(MatDestroy(&pcbddc->nedcG)); 4006 PetscCall(ISDestroy(&pcbddc->nedclocal)); 4007 PetscCall(MatDestroy(&pcbddc->discretegradient)); 4008 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 4009 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 4010 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 4011 PetscCall(VecDestroy(&pcbddc->work_change)); 4012 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 4013 PetscCall(MatDestroy(&pcbddc->divudotp)); 4014 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 4015 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 4016 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 4017 pcbddc->n_local_subs = 0; 4018 PetscCall(PetscFree(pcbddc->local_subs)); 4019 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 4020 pcbddc->graphanalyzed = PETSC_FALSE; 4021 pcbddc->recompute_topography = PETSC_TRUE; 4022 pcbddc->corner_selected = PETSC_FALSE; 4023 PetscFunctionReturn(PETSC_SUCCESS); 4024 } 4025 4026 PetscErrorCode PCBDDCResetSolvers(PC pc) 4027 { 4028 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4029 4030 PetscFunctionBegin; 4031 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 4032 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4033 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4034 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4035 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4036 PetscCall(VecDestroy(&pcbddc->vec1_P)); 4037 PetscCall(VecDestroy(&pcbddc->vec1_C)); 4038 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4039 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4040 PetscCall(VecDestroy(&pcbddc->vec1_R)); 4041 PetscCall(VecDestroy(&pcbddc->vec2_R)); 4042 PetscCall(ISDestroy(&pcbddc->is_R_local)); 4043 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 4044 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 4045 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 4046 PetscCall(KSPReset(pcbddc->ksp_D)); 4047 PetscCall(KSPReset(pcbddc->ksp_R)); 4048 PetscCall(KSPReset(pcbddc->coarse_ksp)); 4049 PetscCall(MatDestroy(&pcbddc->local_mat)); 4050 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 4051 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 4052 PetscCall(PetscFree(pcbddc->global_primal_indices)); 4053 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 4054 PetscCall(MatDestroy(&pcbddc->benign_change)); 4055 PetscCall(VecDestroy(&pcbddc->benign_vec)); 4056 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 4057 PetscCall(MatDestroy(&pcbddc->benign_B0)); 4058 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 4059 if (pcbddc->benign_zerodiag_subs) { 4060 PetscInt i; 4061 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 4062 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 4063 } 4064 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 4065 PetscFunctionReturn(PETSC_SUCCESS); 4066 } 4067 4068 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 4069 { 4070 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4071 PC_IS *pcis = (PC_IS *)pc->data; 4072 VecType impVecType; 4073 PetscInt n_constraints, n_R, old_size; 4074 4075 PetscFunctionBegin; 4076 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 4077 n_R = pcis->n - pcbddc->n_vertices; 4078 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 4079 /* local work vectors (try to avoid unneeded work)*/ 4080 /* R nodes */ 4081 old_size = -1; 4082 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 4083 if (n_R != old_size) { 4084 PetscCall(VecDestroy(&pcbddc->vec1_R)); 4085 PetscCall(VecDestroy(&pcbddc->vec2_R)); 4086 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 4087 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 4088 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 4089 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 4090 } 4091 /* local primal dofs */ 4092 old_size = -1; 4093 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 4094 if (pcbddc->local_primal_size != old_size) { 4095 PetscCall(VecDestroy(&pcbddc->vec1_P)); 4096 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 4097 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 4098 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 4099 } 4100 /* local explicit constraints */ 4101 old_size = -1; 4102 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 4103 if (n_constraints && n_constraints != old_size) { 4104 PetscCall(VecDestroy(&pcbddc->vec1_C)); 4105 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 4106 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 4107 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 4108 } 4109 PetscFunctionReturn(PETSC_SUCCESS); 4110 } 4111 4112 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode) 4113 { 4114 PetscBool flg; 4115 const PetscScalar *a; 4116 4117 PetscFunctionBegin; 4118 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg)); 4119 if (flg) { 4120 PetscCall(MatDenseGetArrayRead(S, &a)); 4121 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE)); 4122 PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode)); 4123 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE)); 4124 PetscCall(MatDenseRestoreArrayRead(S, &a)); 4125 } else { 4126 const PetscInt *ii, *jj; 4127 PetscInt n; 4128 PetscInt buf[8192], *bufc = NULL; 4129 PetscBool freeb = PETSC_FALSE; 4130 Mat Sm = S; 4131 4132 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg)); 4133 if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm)); 4134 else PetscCall(PetscObjectReference((PetscObject)S)); 4135 PetscCall(MatSeqAIJGetArrayRead(Sm, &a)); 4136 PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4137 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure"); 4138 if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) { 4139 bufc = buf; 4140 } else { 4141 PetscCall(PetscMalloc1(nc, &bufc)); 4142 freeb = PETSC_TRUE; 4143 } 4144 4145 for (PetscInt i = 0; i < n; i++) { 4146 const PetscInt nci = ii[i + 1] - ii[i]; 4147 4148 for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]]; 4149 PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode)); 4150 } 4151 PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4152 PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a)); 4153 PetscCall(MatDestroy(&Sm)); 4154 if (freeb) PetscCall(PetscFree(bufc)); 4155 } 4156 PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY)); 4157 PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY)); 4158 PetscFunctionReturn(PETSC_SUCCESS); 4159 } 4160 4161 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat) 4162 { 4163 Mat_SeqAIJ *aij; 4164 PetscInt *ii, *jj; 4165 PetscScalar *aa; 4166 PetscInt nnz = 0, m, nc; 4167 const PetscScalar *a; 4168 const PetscScalar zero = 0.0; 4169 4170 PetscFunctionBegin; 4171 PetscCall(MatGetLocalSize(D, &m, &nc)); 4172 PetscCall(MatDenseGetArrayRead(D, &a)); 4173 PetscCall(PetscMalloc1(m + 1, &ii)); 4174 PetscCall(PetscMalloc1(m * nc, &jj)); 4175 PetscCall(PetscMalloc1(m * nc, &aa)); 4176 ii[0] = 0; 4177 for (PetscInt k = 0; k < m; k++) { 4178 for (PetscInt s = 0; s < nc; s++) { 4179 const PetscInt c = s + k * nc; 4180 const PetscScalar v = a[k + s * m]; 4181 4182 if (PetscUnlikely(j[c] < 0 || v == zero)) continue; 4183 jj[nnz] = j[c]; 4184 aa[nnz] = a[k + s * m]; 4185 nnz++; 4186 } 4187 ii[k + 1] = nnz; 4188 } 4189 4190 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat)); 4191 PetscCall(MatDenseRestoreArrayRead(D, &a)); 4192 4193 aij = (Mat_SeqAIJ *)(*mat)->data; 4194 aij->free_a = PETSC_TRUE; 4195 aij->free_ij = PETSC_TRUE; 4196 PetscFunctionReturn(PETSC_SUCCESS); 4197 } 4198 4199 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */ 4200 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B) 4201 { 4202 PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL; 4203 const PetscBool allowzeropivot = PETSC_FALSE; 4204 PetscBool zeropivotdetected = PETSC_FALSE; 4205 const PetscReal shift = 0.0; 4206 PetscInt ipvt[5], *ii, *jj, *indi, *indj; 4207 PetscScalar work[25], *v_work = NULL, *aa, *diag; 4208 PetscLogDouble flops = 0.0; 4209 4210 PetscFunctionBegin; 4211 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices"); 4212 for (PetscInt i = 0; i < nblocks; i++) { 4213 ncnt += bsizes[i]; 4214 ncnt2 += PetscSqr(bsizes[i]); 4215 } 4216 PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n); 4217 for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]); 4218 if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots)); 4219 4220 PetscCall(PetscMalloc1(n + 1, &ii)); 4221 PetscCall(PetscMalloc1(ncnt2, &jj)); 4222 PetscCall(PetscCalloc1(ncnt2, &aa)); 4223 4224 ncnt = 0; 4225 ii[0] = 0; 4226 indi = ii; 4227 indj = jj; 4228 diag = aa; 4229 for (PetscInt i = 0; i < nblocks; i++) { 4230 const PetscInt bs = bsizes[i]; 4231 4232 for (PetscInt k = 0; k < bs; k++) { 4233 indi[k + 1] = indi[k] + bs; 4234 for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j; 4235 } 4236 PetscCall(MatGetValues(A, bs, indj, bs, indj, diag)); 4237 switch (bs) { 4238 case 1: 4239 *diag = 1.0 / (*diag); 4240 break; 4241 case 2: 4242 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 4243 break; 4244 case 3: 4245 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 4246 break; 4247 case 4: 4248 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 4249 break; 4250 case 5: 4251 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4252 break; 4253 case 6: 4254 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 4255 break; 4256 case 7: 4257 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 4258 break; 4259 default: 4260 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4261 } 4262 ncnt += bs; 4263 flops += 2.0 * PetscPowInt(bs, 3) / 3.0; 4264 diag += bs * bs; 4265 indj += bs * bs; 4266 indi += bs; 4267 } 4268 PetscCall(PetscLogFlops(flops)); 4269 PetscCall(PetscFree2(v_work, v_pivots)); 4270 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B)); 4271 { 4272 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data; 4273 aij->free_a = PETSC_TRUE; 4274 aij->free_ij = PETSC_TRUE; 4275 } 4276 PetscFunctionReturn(PETSC_SUCCESS); 4277 } 4278 4279 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B) 4280 { 4281 const PetscScalar *rarr; 4282 PetscScalar *larr; 4283 PetscSF vsf; 4284 PetscInt n, rld, lld; 4285 4286 PetscFunctionBegin; 4287 PetscCall(MatGetSize(A, NULL, &n)); 4288 PetscCall(MatDenseGetLDA(A, &rld)); 4289 PetscCall(MatDenseGetLDA(B, &lld)); 4290 PetscCall(MatDenseGetArrayRead(A, &rarr)); 4291 PetscCall(MatDenseGetArrayWrite(B, &larr)); 4292 PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf)); 4293 PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4294 PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4295 PetscCall(MatDenseRestoreArrayRead(A, &rarr)); 4296 PetscCall(MatDenseRestoreArrayWrite(B, &larr)); 4297 PetscCall(PetscSFDestroy(&vsf)); 4298 PetscFunctionReturn(PETSC_SUCCESS); 4299 } 4300 4301 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat) 4302 { 4303 PC_IS *pcis = (PC_IS *)pc->data; 4304 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4305 PCBDDCGraph graph = pcbddc->mat_graph; 4306 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4307 /* submatrices of local problem */ 4308 Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL; 4309 /* submatrices of local coarse problem */ 4310 Mat S_CV = NULL, S_VC = NULL, S_CC = NULL; 4311 /* working matrices */ 4312 Mat C_CR; 4313 4314 /* additional working stuff */ 4315 PC pc_R; 4316 IS is_R, is_V, is_C; 4317 const PetscInt *idx_V, *idx_C; 4318 Mat F, Brhs = NULL; 4319 Vec dummy_vec; 4320 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 4321 PetscInt *idx_V_B; 4322 PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I; 4323 PetscInt n_eff_vertices, n_eff_constraints; 4324 PetscInt i, n_R, n_D, n_B; 4325 PetscScalar one = 1.0, m_one = -1.0; 4326 4327 /* Multi-element support */ 4328 PetscBool multi_element = graph->multi_element; 4329 PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL; 4330 PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL; 4331 IS is_C_perm = NULL; 4332 PetscInt n_C_bss = 0, *C_bss = NULL; 4333 Mat coarse_phi_multi; 4334 4335 PetscFunctionBegin; 4336 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 4337 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4338 4339 /* Set Non-overlapping dimensions */ 4340 n_vertices = pcbddc->n_vertices; 4341 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 4342 n_B = pcis->n_B; 4343 n_D = pcis->n - n_B; 4344 n_R = pcis->n - n_vertices; 4345 4346 /* vertices in boundary numbering */ 4347 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 4348 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 4349 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 4350 4351 /* these two cases still need to be optimized */ 4352 if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE; 4353 4354 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 4355 if (multi_element) { 4356 PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 4357 4358 PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat)); 4359 PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size)); 4360 PetscCall(MatSetType(*coarse_submat, MATSEQAIJ)); 4361 PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 4362 PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 4363 4364 /* group vertices and constraints by subdomain id */ 4365 const PetscInt *vidxs = pcbddc->primal_indices_local_idxs; 4366 const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices; 4367 PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz; 4368 PetscInt n_el = PetscMax(graph->n_local_subs, 1); 4369 4370 PetscCall(PetscCalloc1(2 * n_el, &count_eff)); 4371 PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V)); 4372 PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C)); 4373 for (PetscInt i = 0; i < n_vertices; i++) { 4374 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4375 4376 V_to_eff_V[i] = count_eff[s]; 4377 count_eff[s] += 1; 4378 } 4379 for (PetscInt i = 0; i < n_constraints; i++) { 4380 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1; 4381 4382 C_to_eff_C[i] = count_eff[s]; 4383 count_eff[s] += 1; 4384 } 4385 4386 /* preallocation */ 4387 PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz)); 4388 for (PetscInt i = 0; i < n_vertices; i++) { 4389 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4390 4391 nnz[i] = count_eff[s] + count_eff[s + 1]; 4392 } 4393 for (PetscInt i = 0; i < n_constraints; i++) { 4394 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub; 4395 4396 nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1]; 4397 } 4398 PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz)); 4399 PetscCall(PetscFree(nnz)); 4400 4401 n_eff_vertices = 0; 4402 n_eff_constraints = 0; 4403 for (PetscInt i = 0; i < n_el; i++) { 4404 n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]); 4405 n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]); 4406 count_eff[2 * i] = 0; 4407 count_eff[2 * i + 1] = 0; 4408 } 4409 4410 const PetscInt *idx; 4411 PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C)); 4412 4413 for (PetscInt i = 0; i < n_vertices; i++) { 4414 const PetscInt e = graph->nodes[vidxs[i]].local_sub; 4415 const PetscInt s = 2 * e; 4416 4417 V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i; 4418 count_eff[s] += 1; 4419 } 4420 for (PetscInt i = 0; i < n_constraints; i++) { 4421 const PetscInt e = graph->nodes[cidxs[i]].local_sub; 4422 const PetscInt s = 2 * e + 1; 4423 4424 C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i; 4425 count_eff[s] += 1; 4426 } 4427 4428 PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J)); 4429 PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J)); 4430 PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J)); 4431 PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J)); 4432 for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1; 4433 for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1; 4434 for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1; 4435 for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1; 4436 4437 PetscCall(ISGetIndices(pcbddc->is_R_local, &idx)); 4438 for (PetscInt i = 0; i < n_R; i++) { 4439 const PetscInt e = graph->nodes[idx[i]].local_sub; 4440 const PetscInt s = 2 * e; 4441 PetscInt j; 4442 4443 for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4444 for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4445 } 4446 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx)); 4447 PetscCall(ISGetIndices(pcis->is_B_local, &idx)); 4448 for (PetscInt i = 0; i < n_B; i++) { 4449 const PetscInt e = graph->nodes[idx[i]].local_sub; 4450 const PetscInt s = 2 * e; 4451 PetscInt j; 4452 4453 for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4454 for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4455 } 4456 PetscCall(ISRestoreIndices(pcis->is_B_local, &idx)); 4457 4458 /* permutation and blocksizes for block invert of S_CC */ 4459 PetscInt *idxp; 4460 4461 PetscCall(PetscMalloc1(n_constraints, &idxp)); 4462 PetscCall(PetscMalloc1(n_el, &C_bss)); 4463 n_C_bss = 0; 4464 for (PetscInt e = 0, cnt = 0; e < n_el; e++) { 4465 const PetscInt nc = count_eff[2 * e + 1]; 4466 4467 if (nc) C_bss[n_C_bss++] = nc; 4468 for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; 4469 cnt += nc; 4470 } 4471 4472 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm)); 4473 4474 PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C)); 4475 PetscCall(PetscFree(count_eff)); 4476 } else { 4477 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat)); 4478 n_eff_constraints = n_constraints; 4479 n_eff_vertices = n_vertices; 4480 } 4481 4482 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 4483 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 4484 PetscCall(PCSetUp(pc_R)); 4485 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 4486 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 4487 lda_rhs = n_R; 4488 need_benign_correction = PETSC_FALSE; 4489 if (isLU || isCHOL) { 4490 PetscCall(PCFactorGetMatrix(pc_R, &F)); 4491 } else if (sub_schurs && sub_schurs->reuse_solver) { 4492 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4493 MatFactorType type; 4494 4495 F = reuse_solver->F; 4496 PetscCall(MatGetFactorType(F, &type)); 4497 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 4498 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 4499 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 4500 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 4501 } else F = NULL; 4502 4503 /* determine if we can use a sparse right-hand side */ 4504 sparserhs = PETSC_FALSE; 4505 if (F && !multi_element) { 4506 MatSolverType solver; 4507 4508 PetscCall(MatFactorGetSolverType(F, &solver)); 4509 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 4510 } 4511 4512 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 4513 dummy_vec = NULL; 4514 if (need_benign_correction && lda_rhs != n_R && F) { 4515 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 4516 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 4517 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 4518 } 4519 4520 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4521 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4522 4523 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R)); 4524 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V)); 4525 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C)); 4526 PetscCall(ISGetIndices(is_V, &idx_V)); 4527 PetscCall(ISGetIndices(is_C, &idx_C)); 4528 4529 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4530 if (n_constraints) { 4531 Mat C_B; 4532 4533 /* Extract constraints on R nodes: C_{CR} */ 4534 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 4535 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4536 4537 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4538 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4539 if (!sparserhs) { 4540 PetscScalar *marr; 4541 4542 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs)); 4543 PetscCall(MatDenseGetArrayWrite(Brhs, &marr)); 4544 for (i = 0; i < n_constraints; i++) { 4545 const PetscScalar *row_cmat_values; 4546 const PetscInt *row_cmat_indices; 4547 PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i; 4548 4549 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4550 for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j]; 4551 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4552 } 4553 PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr)); 4554 } else { 4555 Mat tC_CR; 4556 4557 PetscCall(MatScale(C_CR, -1.0)); 4558 if (lda_rhs != n_R) { 4559 PetscScalar *aa; 4560 PetscInt r, *ii, *jj; 4561 PetscBool done; 4562 4563 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4564 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4565 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 4566 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 4567 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4568 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4569 } else { 4570 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4571 tC_CR = C_CR; 4572 } 4573 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 4574 PetscCall(MatDestroy(&tC_CR)); 4575 } 4576 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R)); 4577 if (F) { 4578 if (need_benign_correction) { 4579 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4580 4581 /* rhs is already zero on interior dofs, no need to change the rhs */ 4582 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 4583 } 4584 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 4585 if (need_benign_correction) { 4586 PetscScalar *marr; 4587 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4588 4589 /* XXX multi_element? */ 4590 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4591 if (lda_rhs != n_R) { 4592 for (i = 0; i < n_eff_constraints; i++) { 4593 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4594 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4595 PetscCall(VecResetArray(dummy_vec)); 4596 } 4597 } else { 4598 for (i = 0; i < n_eff_constraints; i++) { 4599 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4600 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4601 PetscCall(VecResetArray(pcbddc->vec1_R)); 4602 } 4603 } 4604 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4605 } 4606 } else { 4607 const PetscScalar *barr; 4608 PetscScalar *marr; 4609 4610 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4611 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4612 for (i = 0; i < n_eff_constraints; i++) { 4613 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4614 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4615 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4616 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4617 PetscCall(VecResetArray(pcbddc->vec1_R)); 4618 PetscCall(VecResetArray(pcbddc->vec2_R)); 4619 } 4620 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4621 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4622 } 4623 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4624 PetscCall(MatDestroy(&Brhs)); 4625 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4626 if (!pcbddc->switch_static) { 4627 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2)); 4628 for (i = 0; i < n_eff_constraints; i++) { 4629 Vec r, b; 4630 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 4631 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 4632 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4633 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4634 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 4635 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 4636 } 4637 if (multi_element) { 4638 Mat T; 4639 4640 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4641 PetscCall(MatDestroy(&local_auxmat2_R)); 4642 local_auxmat2_R = T; 4643 PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T)); 4644 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4645 pcbddc->local_auxmat2 = T; 4646 } 4647 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4648 } else { 4649 if (multi_element) { 4650 Mat T; 4651 4652 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4653 PetscCall(MatDestroy(&local_auxmat2_R)); 4654 local_auxmat2_R = T; 4655 } 4656 if (lda_rhs != n_R) { 4657 PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4658 } else { 4659 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4660 pcbddc->local_auxmat2 = local_auxmat2_R; 4661 } 4662 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4663 } 4664 PetscCall(MatScale(S_CC, m_one)); 4665 if (multi_element) { 4666 Mat T, T2; 4667 IS isp, ispi; 4668 4669 isp = is_C_perm; 4670 4671 PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi)); 4672 PetscCall(MatPermute(S_CC, isp, isp, &T)); 4673 PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2)); 4674 PetscCall(MatDestroy(&T)); 4675 PetscCall(MatDestroy(&S_CC)); 4676 PetscCall(MatPermute(T2, ispi, ispi, &S_CC)); 4677 PetscCall(MatDestroy(&T2)); 4678 PetscCall(ISDestroy(&ispi)); 4679 } else { 4680 if (isCHOL) { 4681 PetscCall(MatCholeskyFactor(S_CC, NULL, NULL)); 4682 } else { 4683 PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL)); 4684 } 4685 PetscCall(MatSeqDenseInvertFactors_Private(S_CC)); 4686 } 4687 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4688 PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1)); 4689 PetscCall(MatDestroy(&C_B)); 4690 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES)); 4691 } 4692 4693 /* Get submatrices from subdomain matrix */ 4694 if (n_vertices) { 4695 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4696 PetscBool oldpin; 4697 #endif 4698 IS is_aux; 4699 4700 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4701 IS tis; 4702 4703 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4704 PetscCall(ISSort(tis)); 4705 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4706 PetscCall(ISDestroy(&tis)); 4707 } else { 4708 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4709 } 4710 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4711 oldpin = pcbddc->local_mat->boundtocpu; 4712 #endif 4713 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4714 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4715 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4716 /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4717 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4718 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4719 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4720 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4721 #endif 4722 PetscCall(ISDestroy(&is_aux)); 4723 } 4724 PetscCall(ISDestroy(&is_C_perm)); 4725 PetscCall(PetscFree(C_bss)); 4726 4727 p0_lidx_I = NULL; 4728 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4729 const PetscInt *idxs; 4730 4731 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4732 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4733 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4734 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4735 } 4736 4737 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4738 4739 /* Matrices of coarse basis functions (local) */ 4740 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4741 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4742 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4743 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4744 if (!multi_element) { 4745 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B)); 4746 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D)); 4747 coarse_phi_multi = NULL; 4748 } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */ 4749 IS is_rows[2] = {pcbddc->is_R_local, NULL}; 4750 IS is_cols[2] = {is_V, is_C}; 4751 4752 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1])); 4753 PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi)); 4754 PetscCall(ISDestroy(&is_rows[1])); 4755 } 4756 4757 /* vertices */ 4758 if (n_vertices) { 4759 PetscBool restoreavr = PETSC_FALSE; 4760 Mat A_RRmA_RV = NULL; 4761 4762 PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4763 PetscCall(MatDestroy(&A_VV)); 4764 4765 if (n_R) { 4766 Mat A_RV_bcorr = NULL, S_VV; 4767 4768 PetscCall(MatScale(A_RV, m_one)); 4769 if (need_benign_correction) { 4770 ISLocalToGlobalMapping RtoN; 4771 IS is_p0; 4772 PetscInt *idxs_p0, n; 4773 4774 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4775 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4776 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4777 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4778 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4779 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4780 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4781 PetscCall(ISDestroy(&is_p0)); 4782 } 4783 4784 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV)); 4785 if (!sparserhs || need_benign_correction) { 4786 if (lda_rhs == n_R && !multi_element) { 4787 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4788 } else { 4789 Mat T; 4790 PetscScalar *av, *array; 4791 const PetscInt *xadj, *adjncy; 4792 PetscInt n; 4793 PetscBool flg_row; 4794 4795 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T)); 4796 PetscCall(MatDenseGetArrayWrite(T, &array)); 4797 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4798 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4799 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4800 for (i = 0; i < n; i++) { 4801 PetscInt j; 4802 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j]; 4803 } 4804 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4805 PetscCall(MatDenseRestoreArrayWrite(T, &array)); 4806 PetscCall(MatDestroy(&A_RV)); 4807 A_RV = T; 4808 } 4809 if (need_benign_correction) { 4810 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4811 PetscScalar *marr; 4812 4813 /* XXX multi_element */ 4814 PetscCall(MatDenseGetArray(A_RV, &marr)); 4815 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4816 4817 | 0 0 0 | (V) 4818 L = | 0 0 -1 | (P-p0) 4819 | 0 0 -1 | (p0) 4820 4821 */ 4822 for (i = 0; i < reuse_solver->benign_n; i++) { 4823 const PetscScalar *vals; 4824 const PetscInt *idxs, *idxs_zero; 4825 PetscInt n, j, nz; 4826 4827 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4828 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4829 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4830 for (j = 0; j < n; j++) { 4831 PetscScalar val = vals[j]; 4832 PetscInt k, col = idxs[j]; 4833 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4834 } 4835 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4836 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4837 } 4838 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4839 } 4840 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4841 Brhs = A_RV; 4842 } else { 4843 Mat tA_RVT, A_RVT; 4844 4845 if (!pcbddc->symmetric_primal) { 4846 /* A_RV already scaled by -1 */ 4847 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4848 } else { 4849 restoreavr = PETSC_TRUE; 4850 PetscCall(MatScale(A_VR, -1.0)); 4851 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4852 A_RVT = A_VR; 4853 } 4854 if (lda_rhs != n_R) { 4855 PetscScalar *aa; 4856 PetscInt r, *ii, *jj; 4857 PetscBool done; 4858 4859 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4860 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4861 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4862 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4863 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4864 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4865 } else { 4866 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4867 tA_RVT = A_RVT; 4868 } 4869 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4870 PetscCall(MatDestroy(&tA_RVT)); 4871 PetscCall(MatDestroy(&A_RVT)); 4872 } 4873 if (F) { 4874 /* need to correct the rhs */ 4875 if (need_benign_correction) { 4876 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4877 PetscScalar *marr; 4878 4879 PetscCall(MatDenseGetArray(Brhs, &marr)); 4880 if (lda_rhs != n_R) { 4881 for (i = 0; i < n_eff_vertices; i++) { 4882 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4883 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4884 PetscCall(VecResetArray(dummy_vec)); 4885 } 4886 } else { 4887 for (i = 0; i < n_eff_vertices; i++) { 4888 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4889 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4890 PetscCall(VecResetArray(pcbddc->vec1_R)); 4891 } 4892 } 4893 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4894 } 4895 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4896 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4897 /* need to correct the solution */ 4898 if (need_benign_correction) { 4899 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4900 PetscScalar *marr; 4901 4902 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4903 if (lda_rhs != n_R) { 4904 for (i = 0; i < n_eff_vertices; i++) { 4905 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4906 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4907 PetscCall(VecResetArray(dummy_vec)); 4908 } 4909 } else { 4910 for (i = 0; i < n_eff_vertices; i++) { 4911 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4912 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4913 PetscCall(VecResetArray(pcbddc->vec1_R)); 4914 } 4915 } 4916 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4917 } 4918 } else { 4919 const PetscScalar *barr; 4920 PetscScalar *marr; 4921 4922 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4923 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4924 for (i = 0; i < n_eff_vertices; i++) { 4925 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4926 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4927 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4928 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4929 PetscCall(VecResetArray(pcbddc->vec1_R)); 4930 PetscCall(VecResetArray(pcbddc->vec2_R)); 4931 } 4932 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4933 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4934 } 4935 PetscCall(MatDestroy(&A_RV)); 4936 PetscCall(MatDestroy(&Brhs)); 4937 /* S_VV and S_CV */ 4938 if (n_constraints) { 4939 Mat B; 4940 4941 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B)); 4942 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B)); 4943 4944 /* S_CV = pcbddc->local_auxmat1 * B */ 4945 if (multi_element) { 4946 Mat T; 4947 4948 PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T)); 4949 PetscCall(MatDestroy(&B)); 4950 B = T; 4951 } 4952 PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV)); 4953 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4954 PetscCall(MatProductSetFromOptions(S_CV)); 4955 PetscCall(MatProductSymbolic(S_CV)); 4956 PetscCall(MatProductNumeric(S_CV)); 4957 PetscCall(MatProductClear(S_CV)); 4958 PetscCall(MatDestroy(&B)); 4959 4960 /* B = local_auxmat2_R * S_CV */ 4961 PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B)); 4962 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4963 PetscCall(MatProductSetFromOptions(B)); 4964 PetscCall(MatProductSymbolic(B)); 4965 PetscCall(MatProductNumeric(B)); 4966 4967 PetscCall(MatScale(S_CV, m_one)); 4968 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES)); 4969 4970 if (multi_element) { 4971 Mat T; 4972 4973 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4974 PetscCall(MatDestroy(&A_RRmA_RV)); 4975 A_RRmA_RV = T; 4976 } 4977 PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */ 4978 PetscCall(MatDestroy(&B)); 4979 } else if (multi_element) { 4980 Mat T; 4981 4982 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4983 PetscCall(MatDestroy(&A_RRmA_RV)); 4984 A_RRmA_RV = T; 4985 } 4986 4987 if (lda_rhs != n_R) { 4988 Mat T; 4989 4990 PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T)); 4991 PetscCall(MatDestroy(&A_RRmA_RV)); 4992 A_RRmA_RV = T; 4993 } 4994 4995 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4996 if (need_benign_correction) { /* XXX SPARSE */ 4997 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4998 PetscScalar *sums; 4999 const PetscScalar *marr; 5000 5001 PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr)); 5002 PetscCall(PetscMalloc1(n_vertices, &sums)); 5003 for (i = 0; i < reuse_solver->benign_n; i++) { 5004 const PetscScalar *vals; 5005 const PetscInt *idxs, *idxs_zero; 5006 PetscInt n, j, nz; 5007 5008 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 5009 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 5010 for (j = 0; j < n_vertices; j++) { 5011 sums[j] = 0.; 5012 for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R]; 5013 } 5014 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 5015 for (j = 0; j < n; j++) { 5016 PetscScalar val = vals[j]; 5017 for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES)); 5018 } 5019 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 5020 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 5021 } 5022 PetscCall(PetscFree(sums)); 5023 PetscCall(MatDestroy(&A_RV_bcorr)); 5024 PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr)); 5025 } 5026 5027 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV)); 5028 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 5029 PetscCall(MatDestroy(&S_VV)); 5030 } 5031 5032 /* coarse basis functions */ 5033 if (coarse_phi_multi) { 5034 Mat Vid; 5035 5036 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid)); 5037 PetscCall(MatShift_Basic(Vid, 1.0)); 5038 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV)); 5039 PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid)); 5040 PetscCall(MatDestroy(&Vid)); 5041 } else { 5042 if (A_RRmA_RV) { 5043 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B)); 5044 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5045 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D)); 5046 if (pcbddc->benign_n) { 5047 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); 5048 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 5049 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 5050 } 5051 } 5052 } 5053 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES)); 5054 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 5055 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 5056 } 5057 PetscCall(MatDestroy(&A_RRmA_RV)); 5058 } 5059 PetscCall(MatDestroy(&A_RV)); 5060 PetscCall(VecDestroy(&dummy_vec)); 5061 5062 if (n_constraints) { 5063 Mat B, B2; 5064 5065 PetscCall(MatScale(S_CC, m_one)); 5066 PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B)); 5067 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 5068 PetscCall(MatProductSetFromOptions(B)); 5069 PetscCall(MatProductSymbolic(B)); 5070 PetscCall(MatProductNumeric(B)); 5071 5072 if (n_vertices) { 5073 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 5074 PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC)); 5075 } else { 5076 if (lda_rhs != n_R) { 5077 Mat tB; 5078 5079 PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB)); 5080 PetscCall(MatDestroy(&B)); 5081 B = tB; 5082 } 5083 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC)); 5084 } 5085 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES)); 5086 } 5087 5088 /* coarse basis functions */ 5089 if (coarse_phi_multi) { 5090 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B)); 5091 } else { 5092 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 5093 PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2)); 5094 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2)); 5095 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5096 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 5097 PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2)); 5098 if (pcbddc->benign_n) { 5099 for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); 5100 } 5101 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2)); 5102 } 5103 } 5104 PetscCall(MatDestroy(&B)); 5105 } 5106 5107 /* assemble sparse coarse basis functions */ 5108 if (coarse_phi_multi) { 5109 Mat T; 5110 5111 PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T)); 5112 PetscCall(MatDestroy(&coarse_phi_multi)); 5113 PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B)); 5114 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); 5115 PetscCall(MatDestroy(&T)); 5116 } 5117 PetscCall(MatDestroy(&local_auxmat2_R)); 5118 PetscCall(PetscFree(p0_lidx_I)); 5119 5120 /* coarse matrix entries relative to B_0 */ 5121 if (pcbddc->benign_n) { 5122 Mat B0_B, B0_BPHI; 5123 IS is_dummy; 5124 const PetscScalar *data; 5125 PetscInt j; 5126 5127 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5128 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5129 PetscCall(ISDestroy(&is_dummy)); 5130 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5131 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5132 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 5133 for (j = 0; j < pcbddc->benign_n; j++) { 5134 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5135 for (i = 0; i < pcbddc->local_primal_size; i++) { 5136 PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5137 PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5138 } 5139 } 5140 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 5141 PetscCall(MatDestroy(&B0_B)); 5142 PetscCall(MatDestroy(&B0_BPHI)); 5143 } 5144 5145 /* compute other basis functions for non-symmetric problems */ 5146 if (!pcbddc->symmetric_primal) { 5147 Mat B_V = NULL, B_C = NULL; 5148 PetscScalar *marray, *work; 5149 5150 /* TODO multi_element MatDenseScatter */ 5151 if (n_constraints) { 5152 Mat S_CCT, C_CRT; 5153 5154 PetscCall(MatScale(S_CC, m_one)); 5155 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 5156 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 5157 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C)); 5158 PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C)); 5159 PetscCall(MatDestroy(&S_CCT)); 5160 if (n_vertices) { 5161 Mat S_VCT; 5162 5163 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 5164 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V)); 5165 PetscCall(MatDestroy(&S_VCT)); 5166 PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V)); 5167 } 5168 PetscCall(MatDestroy(&C_CRT)); 5169 } else { 5170 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 5171 } 5172 if (n_vertices && n_R) { 5173 PetscScalar *av, *marray; 5174 const PetscInt *xadj, *adjncy; 5175 PetscInt n; 5176 PetscBool flg_row; 5177 5178 /* B_V = B_V - A_VR^T */ 5179 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 5180 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5181 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 5182 PetscCall(MatDenseGetArray(B_V, &marray)); 5183 for (i = 0; i < n; i++) { 5184 PetscInt j; 5185 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 5186 } 5187 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5188 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5189 PetscCall(MatDestroy(&A_VR)); 5190 } 5191 5192 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 5193 PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work)); 5194 if (n_vertices) { 5195 PetscCall(MatDenseGetArray(B_V, &marray)); 5196 for (i = 0; i < n_vertices; i++) { 5197 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 5198 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5199 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5200 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5201 PetscCall(VecResetArray(pcbddc->vec1_R)); 5202 PetscCall(VecResetArray(pcbddc->vec2_R)); 5203 } 5204 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5205 } 5206 if (B_C) { 5207 PetscCall(MatDenseGetArray(B_C, &marray)); 5208 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 5209 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 5210 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5211 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5212 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5213 PetscCall(VecResetArray(pcbddc->vec1_R)); 5214 PetscCall(VecResetArray(pcbddc->vec2_R)); 5215 } 5216 PetscCall(MatDenseRestoreArray(B_C, &marray)); 5217 } 5218 /* coarse basis functions */ 5219 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B)); 5220 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D)); 5221 for (i = 0; i < pcbddc->local_primal_size; i++) { 5222 Vec v; 5223 5224 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 5225 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 5226 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5227 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5228 if (i < n_vertices) { 5229 PetscScalar one = 1.0; 5230 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 5231 PetscCall(VecAssemblyBegin(v)); 5232 PetscCall(VecAssemblyEnd(v)); 5233 } 5234 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 5235 5236 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5237 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 5238 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5239 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5240 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 5241 } 5242 PetscCall(VecResetArray(pcbddc->vec1_R)); 5243 } 5244 PetscCall(MatDestroy(&B_V)); 5245 PetscCall(MatDestroy(&B_C)); 5246 PetscCall(PetscFree(work)); 5247 } else { 5248 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 5249 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 5250 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 5251 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 5252 } 5253 PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5254 PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5255 5256 /* free memory */ 5257 PetscCall(PetscFree(V_to_eff_V)); 5258 PetscCall(PetscFree(C_to_eff_C)); 5259 PetscCall(PetscFree(R_eff_V_J)); 5260 PetscCall(PetscFree(R_eff_C_J)); 5261 PetscCall(PetscFree(B_eff_V_J)); 5262 PetscCall(PetscFree(B_eff_C_J)); 5263 PetscCall(ISDestroy(&is_R)); 5264 PetscCall(ISRestoreIndices(is_V, &idx_V)); 5265 PetscCall(ISRestoreIndices(is_C, &idx_C)); 5266 PetscCall(ISDestroy(&is_V)); 5267 PetscCall(ISDestroy(&is_C)); 5268 PetscCall(PetscFree(idx_V_B)); 5269 PetscCall(MatDestroy(&S_CV)); 5270 PetscCall(MatDestroy(&S_VC)); 5271 PetscCall(MatDestroy(&S_CC)); 5272 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 5273 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 5274 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 5275 5276 /* Checking coarse_sub_mat and coarse basis functions */ 5277 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5278 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5279 if (pcbddc->dbg_flag) { 5280 Mat AUXMAT, TM1, TM2, TM3, TM4; 5281 Mat coarse_phi_D, coarse_phi_B; 5282 Mat coarse_psi_D, coarse_psi_B; 5283 Mat A_II, A_BB, A_IB, A_BI; 5284 Mat C_B, CPHI; 5285 IS is_dummy; 5286 Vec mones; 5287 MatType checkmattype = MATSEQAIJ; 5288 PetscReal real_value; 5289 5290 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5291 Mat A; 5292 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 5293 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 5294 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 5295 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 5296 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 5297 PetscCall(MatDestroy(&A)); 5298 } else { 5299 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 5300 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 5301 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 5302 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 5303 } 5304 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 5305 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 5306 if (!pcbddc->symmetric_primal) { 5307 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 5308 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 5309 } 5310 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5311 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 5312 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5313 if (!pcbddc->symmetric_primal) { 5314 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5315 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5316 PetscCall(MatDestroy(&AUXMAT)); 5317 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5318 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5319 PetscCall(MatDestroy(&AUXMAT)); 5320 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5321 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5322 PetscCall(MatDestroy(&AUXMAT)); 5323 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5324 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5325 PetscCall(MatDestroy(&AUXMAT)); 5326 } else { 5327 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5328 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5329 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5330 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5331 PetscCall(MatDestroy(&AUXMAT)); 5332 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5333 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5334 PetscCall(MatDestroy(&AUXMAT)); 5335 } 5336 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 5337 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 5338 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 5339 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 5340 if (pcbddc->benign_n) { 5341 Mat B0_B, B0_BPHI; 5342 const PetscScalar *data2; 5343 PetscScalar *data; 5344 PetscInt j; 5345 5346 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5347 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5348 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5349 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5350 PetscCall(MatDenseGetArray(TM1, &data)); 5351 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 5352 for (j = 0; j < pcbddc->benign_n; j++) { 5353 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5354 for (i = 0; i < pcbddc->local_primal_size; i++) { 5355 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 5356 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 5357 } 5358 } 5359 PetscCall(MatDenseRestoreArray(TM1, &data)); 5360 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 5361 PetscCall(MatDestroy(&B0_B)); 5362 PetscCall(ISDestroy(&is_dummy)); 5363 PetscCall(MatDestroy(&B0_BPHI)); 5364 } 5365 PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN)); 5366 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 5367 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5368 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5369 5370 /* check constraints */ 5371 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 5372 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 5373 if (!pcbddc->benign_n) { /* TODO: add benign case */ 5374 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5375 } else { 5376 PetscScalar *data; 5377 Mat tmat; 5378 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 5379 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 5380 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 5381 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5382 PetscCall(MatDestroy(&tmat)); 5383 } 5384 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 5385 PetscCall(VecSet(mones, -1.0)); 5386 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5387 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5388 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5389 if (!pcbddc->symmetric_primal) { 5390 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 5391 PetscCall(VecSet(mones, -1.0)); 5392 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5393 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5394 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5395 } 5396 PetscCall(MatDestroy(&C_B)); 5397 PetscCall(MatDestroy(&CPHI)); 5398 PetscCall(ISDestroy(&is_dummy)); 5399 PetscCall(VecDestroy(&mones)); 5400 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5401 PetscCall(MatDestroy(&A_II)); 5402 PetscCall(MatDestroy(&A_BB)); 5403 PetscCall(MatDestroy(&A_IB)); 5404 PetscCall(MatDestroy(&A_BI)); 5405 PetscCall(MatDestroy(&TM1)); 5406 PetscCall(MatDestroy(&TM2)); 5407 PetscCall(MatDestroy(&TM3)); 5408 PetscCall(MatDestroy(&TM4)); 5409 PetscCall(MatDestroy(&coarse_phi_D)); 5410 PetscCall(MatDestroy(&coarse_phi_B)); 5411 if (!pcbddc->symmetric_primal) { 5412 PetscCall(MatDestroy(&coarse_psi_D)); 5413 PetscCall(MatDestroy(&coarse_psi_B)); 5414 } 5415 } 5416 5417 #if 0 5418 { 5419 PetscViewer viewer; 5420 char filename[256]; 5421 5422 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 5423 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 5424 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 5425 PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat")); 5426 PetscCall(MatView(*coarse_submat,viewer)); 5427 if (pcbddc->coarse_phi_B) { 5428 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 5429 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 5430 } 5431 if (pcbddc->coarse_phi_D) { 5432 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 5433 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 5434 } 5435 if (pcbddc->coarse_psi_B) { 5436 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 5437 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 5438 } 5439 if (pcbddc->coarse_psi_D) { 5440 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 5441 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 5442 } 5443 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 5444 PetscCall(MatView(pcbddc->local_mat,viewer)); 5445 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 5446 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 5447 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 5448 PetscCall(ISView(pcis->is_I_local,viewer)); 5449 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 5450 PetscCall(ISView(pcis->is_B_local,viewer)); 5451 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 5452 PetscCall(ISView(pcbddc->is_R_local,viewer)); 5453 PetscCall(PetscViewerDestroy(&viewer)); 5454 } 5455 #endif 5456 5457 /* device support */ 5458 { 5459 PetscBool iscuda, iship, iskokkos; 5460 MatType mtype = NULL; 5461 5462 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, "")); 5463 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, "")); 5464 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, "")); 5465 if (iskokkos) { 5466 if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE; 5467 else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE; 5468 } 5469 if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP); 5470 else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP; 5471 else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA; 5472 if (mtype) { 5473 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 5474 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 5475 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 5476 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 5477 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 5478 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 5479 } 5480 } 5481 PetscFunctionReturn(PETSC_SUCCESS); 5482 } 5483 5484 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 5485 { 5486 Mat *work_mat; 5487 IS isrow_s, iscol_s; 5488 PetscBool rsorted, csorted; 5489 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 5490 5491 PetscFunctionBegin; 5492 PetscCall(ISSorted(isrow, &rsorted)); 5493 PetscCall(ISSorted(iscol, &csorted)); 5494 PetscCall(ISGetLocalSize(isrow, &rsize)); 5495 PetscCall(ISGetLocalSize(iscol, &csize)); 5496 5497 if (!rsorted) { 5498 const PetscInt *idxs; 5499 PetscInt *idxs_sorted, i; 5500 5501 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 5502 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 5503 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 5504 PetscCall(ISGetIndices(isrow, &idxs)); 5505 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 5506 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 5507 PetscCall(ISRestoreIndices(isrow, &idxs)); 5508 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 5509 } else { 5510 PetscCall(PetscObjectReference((PetscObject)isrow)); 5511 isrow_s = isrow; 5512 } 5513 5514 if (!csorted) { 5515 if (isrow == iscol) { 5516 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 5517 iscol_s = isrow_s; 5518 } else { 5519 const PetscInt *idxs; 5520 PetscInt *idxs_sorted, i; 5521 5522 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 5523 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 5524 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 5525 PetscCall(ISGetIndices(iscol, &idxs)); 5526 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 5527 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5528 PetscCall(ISRestoreIndices(iscol, &idxs)); 5529 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 5530 } 5531 } else { 5532 PetscCall(PetscObjectReference((PetscObject)iscol)); 5533 iscol_s = iscol; 5534 } 5535 5536 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 5537 5538 if (!rsorted || !csorted) { 5539 Mat new_mat; 5540 IS is_perm_r, is_perm_c; 5541 5542 if (!rsorted) { 5543 PetscInt *idxs_r, i; 5544 PetscCall(PetscMalloc1(rsize, &idxs_r)); 5545 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 5546 PetscCall(PetscFree(idxs_perm_r)); 5547 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 5548 } else { 5549 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 5550 } 5551 PetscCall(ISSetPermutation(is_perm_r)); 5552 5553 if (!csorted) { 5554 if (isrow_s == iscol_s) { 5555 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5556 is_perm_c = is_perm_r; 5557 } else { 5558 PetscInt *idxs_c, i; 5559 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 5560 PetscCall(PetscMalloc1(csize, &idxs_c)); 5561 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 5562 PetscCall(PetscFree(idxs_perm_c)); 5563 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 5564 } 5565 } else { 5566 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 5567 } 5568 PetscCall(ISSetPermutation(is_perm_c)); 5569 5570 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 5571 PetscCall(MatDestroy(&work_mat[0])); 5572 work_mat[0] = new_mat; 5573 PetscCall(ISDestroy(&is_perm_r)); 5574 PetscCall(ISDestroy(&is_perm_c)); 5575 } 5576 5577 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5578 *B = work_mat[0]; 5579 PetscCall(MatDestroyMatrices(1, &work_mat)); 5580 PetscCall(ISDestroy(&isrow_s)); 5581 PetscCall(ISDestroy(&iscol_s)); 5582 PetscFunctionReturn(PETSC_SUCCESS); 5583 } 5584 5585 static PetscErrorCode MatPtAPWithPrefix_Private(Mat A, Mat P, PetscReal fill, const char *prefix, Mat *C) 5586 { 5587 PetscFunctionBegin; 5588 PetscCall(MatProductCreate(A, P, NULL, C)); 5589 PetscCall(MatProductSetType(*C, MATPRODUCT_PtAP)); 5590 PetscCall(MatProductSetAlgorithm(*C, "default")); 5591 PetscCall(MatProductSetFill(*C, fill)); 5592 PetscCall(MatSetOptionsPrefix(*C, prefix)); 5593 PetscCall(MatProductSetFromOptions(*C)); 5594 PetscCall(MatProductSymbolic(*C)); 5595 PetscCall(MatProductNumeric(*C)); 5596 (*C)->symmetric = A->symmetric; 5597 (*C)->spd = A->spd; 5598 PetscFunctionReturn(PETSC_SUCCESS); 5599 } 5600 5601 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5602 { 5603 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5604 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5605 Mat new_mat, lA; 5606 IS is_local, is_global; 5607 PetscInt local_size; 5608 PetscBool isseqaij, issym, isset; 5609 char ptapprefix[256]; 5610 5611 PetscFunctionBegin; 5612 PetscCall(MatDestroy(&pcbddc->local_mat)); 5613 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 5614 if (pcbddc->mat_graph->multi_element) { 5615 Mat *mats, *bdiags; 5616 IS *gsubs; 5617 PetscInt nsubs = pcbddc->n_local_subs; 5618 5619 PetscCall(PetscCalloc1(nsubs * nsubs, &mats)); 5620 #if 1 5621 PetscCall(PetscMalloc1(nsubs, &gsubs)); 5622 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i])); 5623 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags)); 5624 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i])); 5625 PetscCall(PetscFree(gsubs)); 5626 #else /* this does not work since MatCreateSubMatrices does not support repeated indices */ 5627 Mat *tmats; 5628 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5629 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5630 PetscCall(ISDestroy(&is_local)); 5631 PetscCall(MatSetOption(ChangeOfBasisMatrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE)); 5632 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, 1, &is_global, &is_global, MAT_INITIAL_MATRIX, &tmats)); 5633 PetscCall(ISDestroy(&is_global)); 5634 PetscCall(MatCreateSubMatrices(tmats[0], nsubs, pcbddc->local_subs, pcbddc->local_subs, MAT_INITIAL_MATRIX, &bdiags)); 5635 PetscCall(MatDestroySubMatrices(1, &tmats)); 5636 #endif 5637 for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i]; 5638 PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat)); 5639 PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat)); 5640 PetscCall(MatDestroySubMatrices(nsubs, &bdiags)); 5641 PetscCall(PetscFree(mats)); 5642 } else { 5643 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5644 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5645 PetscCall(ISDestroy(&is_local)); 5646 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 5647 PetscCall(ISDestroy(&is_global)); 5648 } 5649 if (pcbddc->dbg_flag) { 5650 Vec x, x_change; 5651 PetscReal error; 5652 5653 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 5654 PetscCall(VecSetRandom(x, NULL)); 5655 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 5656 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5657 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5658 PetscCall(MatMult(new_mat, matis->x, matis->y)); 5659 if (!pcbddc->change_interior) { 5660 const PetscScalar *x, *y, *v; 5661 PetscReal lerror = 0.; 5662 PetscInt i; 5663 5664 PetscCall(VecGetArrayRead(matis->x, &x)); 5665 PetscCall(VecGetArrayRead(matis->y, &y)); 5666 PetscCall(VecGetArrayRead(matis->counter, &v)); 5667 for (i = 0; i < local_size; i++) 5668 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 5669 PetscCall(VecRestoreArrayRead(matis->x, &x)); 5670 PetscCall(VecRestoreArrayRead(matis->y, &y)); 5671 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 5672 PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 5673 if (error > PETSC_SMALL) { 5674 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5675 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 5676 } else { 5677 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 5678 } 5679 } 5680 } 5681 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5682 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5683 PetscCall(VecAXPY(x, -1.0, x_change)); 5684 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 5685 if (error > PETSC_SMALL) { 5686 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5687 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 5688 } else { 5689 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 5690 } 5691 } 5692 PetscCall(VecDestroy(&x)); 5693 PetscCall(VecDestroy(&x_change)); 5694 } 5695 5696 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5697 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5698 5699 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5700 if (((PetscObject)pc)->prefix) PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "%spc_bddc_change_", ((PetscObject)pc)->prefix)); 5701 else PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "pc_bddc_change_")); 5702 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5703 if (isseqaij) { 5704 PetscCall(MatDestroy(&pcbddc->local_mat)); 5705 PetscCall(MatPtAPWithPrefix_Private(matis->A, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat)); 5706 if (lA) { 5707 Mat work; 5708 PetscCall(MatPtAPWithPrefix_Private(lA, new_mat, PETSC_DEFAULT, ptapprefix, &work)); 5709 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5710 PetscCall(MatDestroy(&work)); 5711 } 5712 } else { 5713 Mat work_mat; 5714 5715 PetscCall(MatDestroy(&pcbddc->local_mat)); 5716 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5717 PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat)); 5718 PetscCall(MatDestroy(&work_mat)); 5719 if (lA) { 5720 Mat work; 5721 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5722 PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &work)); 5723 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5724 PetscCall(MatDestroy(&work)); 5725 } 5726 } 5727 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5728 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5729 PetscCall(MatDestroy(&new_mat)); 5730 PetscFunctionReturn(PETSC_SUCCESS); 5731 } 5732 5733 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5734 { 5735 PC_IS *pcis = (PC_IS *)pc->data; 5736 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5737 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5738 PetscInt *idx_R_local = NULL; 5739 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5740 PetscInt vbs, bs; 5741 PetscBT bitmask = NULL; 5742 5743 PetscFunctionBegin; 5744 /* 5745 No need to setup local scatters if 5746 - primal space is unchanged 5747 AND 5748 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5749 AND 5750 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5751 */ 5752 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5753 /* destroy old objects */ 5754 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5755 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5756 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5757 /* Set Non-overlapping dimensions */ 5758 n_B = pcis->n_B; 5759 n_D = pcis->n - n_B; 5760 n_vertices = pcbddc->n_vertices; 5761 5762 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5763 5764 /* create auxiliary bitmask and allocate workspace */ 5765 if (!sub_schurs || !sub_schurs->reuse_solver) { 5766 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5767 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5768 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5769 5770 for (i = 0, n_R = 0; i < pcis->n; i++) { 5771 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5772 } 5773 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5774 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5775 5776 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5777 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5778 } 5779 5780 /* Block code */ 5781 vbs = 1; 5782 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5783 if (bs > 1 && !(n_vertices % bs)) { 5784 PetscBool is_blocked = PETSC_TRUE; 5785 PetscInt *vary; 5786 if (!sub_schurs || !sub_schurs->reuse_solver) { 5787 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5788 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5789 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5790 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5791 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5792 for (i = 0; i < pcis->n / bs; i++) { 5793 if (vary[i] != 0 && vary[i] != bs) { 5794 is_blocked = PETSC_FALSE; 5795 break; 5796 } 5797 } 5798 PetscCall(PetscFree(vary)); 5799 } else { 5800 /* Verify directly the R set */ 5801 for (i = 0; i < n_R / bs; i++) { 5802 PetscInt j, node = idx_R_local[bs * i]; 5803 for (j = 1; j < bs; j++) { 5804 if (node != idx_R_local[bs * i + j] - j) { 5805 is_blocked = PETSC_FALSE; 5806 break; 5807 } 5808 } 5809 } 5810 } 5811 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5812 vbs = bs; 5813 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5814 } 5815 } 5816 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5817 if (sub_schurs && sub_schurs->reuse_solver) { 5818 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5819 5820 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5821 PetscCall(ISDestroy(&reuse_solver->is_R)); 5822 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5823 reuse_solver->is_R = pcbddc->is_R_local; 5824 } else { 5825 PetscCall(PetscFree(idx_R_local)); 5826 } 5827 5828 /* print some info if requested */ 5829 if (pcbddc->dbg_flag) { 5830 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5831 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5832 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5833 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5834 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5835 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5836 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5837 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5838 } 5839 5840 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5841 if (!sub_schurs || !sub_schurs->reuse_solver) { 5842 IS is_aux1, is_aux2; 5843 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5844 5845 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5846 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5847 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5848 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5849 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5850 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5851 for (i = 0, j = 0; i < n_R; i++) { 5852 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5853 } 5854 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5855 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5856 for (i = 0, j = 0; i < n_B; i++) { 5857 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5858 } 5859 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5860 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5861 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5862 PetscCall(ISDestroy(&is_aux1)); 5863 PetscCall(ISDestroy(&is_aux2)); 5864 5865 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5866 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5867 for (i = 0, j = 0; i < n_R; i++) { 5868 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5869 } 5870 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5871 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5872 PetscCall(ISDestroy(&is_aux1)); 5873 } 5874 PetscCall(PetscBTDestroy(&bitmask)); 5875 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5876 } else { 5877 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5878 IS tis; 5879 PetscInt schur_size; 5880 5881 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5882 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5883 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5884 PetscCall(ISDestroy(&tis)); 5885 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5886 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5887 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5888 PetscCall(ISDestroy(&tis)); 5889 } 5890 } 5891 PetscFunctionReturn(PETSC_SUCCESS); 5892 } 5893 5894 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5895 { 5896 MatNullSpace NullSpace; 5897 Mat dmat; 5898 const Vec *nullvecs; 5899 Vec v, v2, *nullvecs2; 5900 VecScatter sct = NULL; 5901 PetscScalar *ddata; 5902 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5903 PetscBool nnsp_has_cnst; 5904 5905 PetscFunctionBegin; 5906 if (!is && !B) { /* MATIS */ 5907 Mat_IS *matis = (Mat_IS *)A->data; 5908 5909 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5910 sct = matis->cctx; 5911 PetscCall(PetscObjectReference((PetscObject)sct)); 5912 } else { 5913 PetscCall(MatGetNullSpace(B, &NullSpace)); 5914 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5915 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5916 } 5917 PetscCall(MatGetNullSpace(A, &NullSpace)); 5918 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5919 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5920 5921 PetscCall(MatCreateVecs(A, &v, NULL)); 5922 PetscCall(MatCreateVecs(B, &v2, NULL)); 5923 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5924 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs)); 5925 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5926 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5927 PetscCall(VecGetBlockSize(v2, &bs)); 5928 PetscCall(VecGetSize(v2, &N)); 5929 PetscCall(VecGetLocalSize(v2, &n)); 5930 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5931 for (k = 0; k < nnsp_size; k++) { 5932 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5933 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5934 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5935 } 5936 if (nnsp_has_cnst) { 5937 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5938 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5939 } 5940 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5941 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5942 5943 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5944 PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault)); 5945 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5946 PetscCall(MatDestroy(&dmat)); 5947 5948 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5949 PetscCall(PetscFree(nullvecs2)); 5950 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5951 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5952 PetscCall(VecDestroy(&v)); 5953 PetscCall(VecDestroy(&v2)); 5954 PetscCall(VecScatterDestroy(&sct)); 5955 PetscFunctionReturn(PETSC_SUCCESS); 5956 } 5957 5958 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5959 { 5960 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5961 PC_IS *pcis = (PC_IS *)pc->data; 5962 PC pc_temp; 5963 Mat A_RR; 5964 MatNullSpace nnsp; 5965 MatReuse reuse; 5966 PetscScalar m_one = -1.0; 5967 PetscReal value; 5968 PetscInt n_D, n_R; 5969 PetscBool issbaij, opts, isset, issym; 5970 PetscBool f = PETSC_FALSE; 5971 char dir_prefix[256], neu_prefix[256], str_level[16]; 5972 size_t len; 5973 5974 PetscFunctionBegin; 5975 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5976 /* approximate solver, propagate NearNullSpace if needed */ 5977 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5978 MatNullSpace gnnsp1, gnnsp2; 5979 PetscBool lhas, ghas; 5980 5981 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5982 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5983 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5984 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5985 PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5986 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5987 } 5988 5989 /* compute prefixes */ 5990 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5991 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5992 if (!pcbddc->current_level) { 5993 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5994 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5995 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5996 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5997 } else { 5998 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 5999 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 6000 len -= 15; /* remove "pc_bddc_coarse_" */ 6001 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 6002 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 6003 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 6004 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 6005 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 6006 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 6007 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 6008 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 6009 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 6010 } 6011 6012 /* DIRICHLET PROBLEM */ 6013 if (dirichlet) { 6014 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6015 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 6016 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 6017 if (pcbddc->dbg_flag) { 6018 Mat A_IIn; 6019 6020 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 6021 PetscCall(MatDestroy(&pcis->A_II)); 6022 pcis->A_II = A_IIn; 6023 } 6024 } 6025 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6026 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 6027 6028 /* Matrix for Dirichlet problem is pcis->A_II */ 6029 n_D = pcis->n - pcis->n_B; 6030 opts = PETSC_FALSE; 6031 if (!pcbddc->ksp_D) { /* create object if not yet build */ 6032 opts = PETSC_TRUE; 6033 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 6034 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 6035 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 6036 /* default */ 6037 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 6038 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 6039 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 6040 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 6041 if (issbaij) { 6042 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6043 } else { 6044 PetscCall(PCSetType(pc_temp, PCLU)); 6045 } 6046 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 6047 } 6048 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 6049 PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view")); 6050 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 6051 /* Allow user's customization */ 6052 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 6053 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 6054 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6055 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 6056 } 6057 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 6058 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 6059 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6060 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6061 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6062 const PetscInt *idxs; 6063 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6064 6065 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 6066 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 6067 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6068 for (i = 0; i < nl; i++) { 6069 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6070 } 6071 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 6072 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6073 PetscCall(PetscFree(scoords)); 6074 } 6075 if (sub_schurs && sub_schurs->reuse_solver) { 6076 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6077 6078 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 6079 } 6080 6081 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6082 if (!n_D) { 6083 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 6084 PetscCall(PCSetType(pc_temp, PCNONE)); 6085 } 6086 PetscCall(KSPSetUp(pcbddc->ksp_D)); 6087 /* set ksp_D into pcis data */ 6088 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 6089 PetscCall(KSPDestroy(&pcis->ksp_D)); 6090 pcis->ksp_D = pcbddc->ksp_D; 6091 } 6092 6093 /* NEUMANN PROBLEM */ 6094 A_RR = NULL; 6095 if (neumann) { 6096 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6097 PetscInt ibs, mbs; 6098 PetscBool issbaij, reuse_neumann_solver, isset, issym; 6099 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6100 6101 reuse_neumann_solver = PETSC_FALSE; 6102 if (sub_schurs && sub_schurs->reuse_solver) { 6103 IS iP; 6104 6105 reuse_neumann_solver = PETSC_TRUE; 6106 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 6107 if (iP) reuse_neumann_solver = PETSC_FALSE; 6108 } 6109 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 6110 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 6111 if (pcbddc->ksp_R) { /* already created ksp */ 6112 PetscInt nn_R; 6113 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 6114 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6115 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 6116 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 6117 PetscCall(KSPReset(pcbddc->ksp_R)); 6118 PetscCall(MatDestroy(&A_RR)); 6119 reuse = MAT_INITIAL_MATRIX; 6120 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 6121 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 6122 PetscCall(MatDestroy(&A_RR)); 6123 reuse = MAT_INITIAL_MATRIX; 6124 } else { /* safe to reuse the matrix */ 6125 reuse = MAT_REUSE_MATRIX; 6126 } 6127 } 6128 /* last check */ 6129 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 6130 PetscCall(MatDestroy(&A_RR)); 6131 reuse = MAT_INITIAL_MATRIX; 6132 } 6133 } else { /* first time, so we need to create the matrix */ 6134 reuse = MAT_INITIAL_MATRIX; 6135 } 6136 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 6137 TODO: Get Rid of these conversions */ 6138 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 6139 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 6140 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 6141 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 6142 if (matis->A == pcbddc->local_mat) { 6143 PetscCall(MatDestroy(&pcbddc->local_mat)); 6144 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6145 } else { 6146 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6147 } 6148 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 6149 if (matis->A == pcbddc->local_mat) { 6150 PetscCall(MatDestroy(&pcbddc->local_mat)); 6151 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6152 } else { 6153 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6154 } 6155 } 6156 /* extract A_RR */ 6157 if (reuse_neumann_solver) { 6158 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6159 6160 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 6161 PetscCall(MatDestroy(&A_RR)); 6162 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 6163 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 6164 } else { 6165 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 6166 } 6167 } else { 6168 PetscCall(MatDestroy(&A_RR)); 6169 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 6170 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6171 } 6172 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 6173 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 6174 } 6175 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6176 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 6177 opts = PETSC_FALSE; 6178 if (!pcbddc->ksp_R) { /* create object if not present */ 6179 opts = PETSC_TRUE; 6180 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 6181 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 6182 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 6183 /* default */ 6184 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 6185 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 6186 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6187 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 6188 if (issbaij) { 6189 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6190 } else { 6191 PetscCall(PCSetType(pc_temp, PCLU)); 6192 } 6193 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 6194 } 6195 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 6196 PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view")); 6197 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 6198 if (opts) { /* Allow user's customization once */ 6199 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 6200 } 6201 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6202 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6203 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 6204 } 6205 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6206 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6207 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6208 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6209 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6210 const PetscInt *idxs; 6211 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6212 6213 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 6214 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 6215 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6216 for (i = 0; i < nl; i++) { 6217 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6218 } 6219 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 6220 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6221 PetscCall(PetscFree(scoords)); 6222 } 6223 6224 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6225 if (!n_R) { 6226 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6227 PetscCall(PCSetType(pc_temp, PCNONE)); 6228 } 6229 /* Reuse solver if it is present */ 6230 if (reuse_neumann_solver) { 6231 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6232 6233 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 6234 } 6235 PetscCall(KSPSetUp(pcbddc->ksp_R)); 6236 } 6237 6238 if (pcbddc->dbg_flag) { 6239 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6240 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6241 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 6242 } 6243 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 6244 6245 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 6246 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 6247 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 6248 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 6249 /* check Dirichlet and Neumann solvers */ 6250 if (pcbddc->dbg_flag) { 6251 if (dirichlet) { /* Dirichlet */ 6252 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 6253 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 6254 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 6255 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 6256 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 6257 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 6258 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value)); 6259 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6260 } 6261 if (neumann) { /* Neumann */ 6262 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 6263 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 6264 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 6265 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 6266 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 6267 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 6268 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value)); 6269 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6270 } 6271 } 6272 /* free Neumann problem's matrix */ 6273 PetscCall(MatDestroy(&A_RR)); 6274 PetscFunctionReturn(PETSC_SUCCESS); 6275 } 6276 6277 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 6278 { 6279 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6280 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6281 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 6282 6283 PetscFunctionBegin; 6284 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 6285 if (!pcbddc->switch_static) { 6286 if (applytranspose && pcbddc->local_auxmat1) { 6287 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 6288 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6289 } 6290 if (!reuse_solver) { 6291 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6292 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6293 } else { 6294 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6295 6296 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6297 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6298 } 6299 } else { 6300 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6301 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6302 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6303 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6304 if (applytranspose && pcbddc->local_auxmat1) { 6305 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 6306 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6307 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6308 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6309 } 6310 } 6311 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6312 if (!reuse_solver || pcbddc->switch_static) { 6313 if (applytranspose) { 6314 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6315 } else { 6316 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6317 } 6318 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 6319 } else { 6320 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6321 6322 if (applytranspose) { 6323 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6324 } else { 6325 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6326 } 6327 } 6328 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6329 PetscCall(VecSet(inout_B, 0.)); 6330 if (!pcbddc->switch_static) { 6331 if (!reuse_solver) { 6332 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6333 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6334 } else { 6335 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6336 6337 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6338 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6339 } 6340 if (!applytranspose && pcbddc->local_auxmat1) { 6341 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6342 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 6343 } 6344 } else { 6345 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6346 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6347 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6348 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6349 if (!applytranspose && pcbddc->local_auxmat1) { 6350 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6351 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 6352 } 6353 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6354 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6355 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6356 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6357 } 6358 PetscFunctionReturn(PETSC_SUCCESS); 6359 } 6360 6361 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 6362 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 6363 { 6364 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6365 PC_IS *pcis = (PC_IS *)pc->data; 6366 const PetscScalar zero = 0.0; 6367 6368 PetscFunctionBegin; 6369 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 6370 if (!pcbddc->benign_apply_coarse_only) { 6371 if (applytranspose) { 6372 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 6373 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6374 } else { 6375 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 6376 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6377 } 6378 } else { 6379 PetscCall(VecSet(pcbddc->vec1_P, zero)); 6380 } 6381 6382 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 6383 if (pcbddc->benign_n) { 6384 PetscScalar *array; 6385 PetscInt j; 6386 6387 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6388 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 6389 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6390 } 6391 6392 /* start communications from local primal nodes to rhs of coarse solver */ 6393 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 6394 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 6395 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 6396 6397 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 6398 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6399 if (pcbddc->coarse_ksp) { 6400 Mat coarse_mat; 6401 Vec rhs, sol; 6402 MatNullSpace nullsp; 6403 PetscBool isbddc = PETSC_FALSE; 6404 6405 if (pcbddc->benign_have_null) { 6406 PC coarse_pc; 6407 6408 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6409 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 6410 /* we need to propagate to coarser levels the need for a possible benign correction */ 6411 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 6412 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6413 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 6414 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 6415 } 6416 } 6417 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 6418 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 6419 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 6420 if (applytranspose) { 6421 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 6422 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 6423 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6424 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 6425 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6426 } else { 6427 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 6428 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 6429 PC coarse_pc; 6430 6431 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 6432 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6433 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 6434 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 6435 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 6436 } else { 6437 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 6438 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6439 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6440 } 6441 } 6442 /* we don't need the benign correction at coarser levels anymore */ 6443 if (pcbddc->benign_have_null && isbddc) { 6444 PC coarse_pc; 6445 PC_BDDC *coarsepcbddc; 6446 6447 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6448 coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6449 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 6450 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 6451 } 6452 } 6453 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6454 6455 /* Local solution on R nodes */ 6456 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 6457 /* communications from coarse sol to local primal nodes */ 6458 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 6459 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 6460 6461 /* Sum contributions from the two levels */ 6462 if (!pcbddc->benign_apply_coarse_only) { 6463 if (applytranspose) { 6464 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6465 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6466 } else { 6467 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6468 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6469 } 6470 /* store p0 */ 6471 if (pcbddc->benign_n) { 6472 PetscScalar *array; 6473 PetscInt j; 6474 6475 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6476 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 6477 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6478 } 6479 } else { /* expand the coarse solution */ 6480 if (applytranspose) { 6481 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 6482 } else { 6483 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 6484 } 6485 } 6486 PetscFunctionReturn(PETSC_SUCCESS); 6487 } 6488 6489 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 6490 { 6491 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6492 Vec from, to; 6493 const PetscScalar *array; 6494 6495 PetscFunctionBegin; 6496 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6497 from = pcbddc->coarse_vec; 6498 to = pcbddc->vec1_P; 6499 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6500 Vec tvec; 6501 6502 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6503 PetscCall(VecResetArray(tvec)); 6504 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 6505 PetscCall(VecGetArrayRead(tvec, &array)); 6506 PetscCall(VecPlaceArray(from, array)); 6507 PetscCall(VecRestoreArrayRead(tvec, &array)); 6508 } 6509 } else { /* from local to global -> put data in coarse right-hand side */ 6510 from = pcbddc->vec1_P; 6511 to = pcbddc->coarse_vec; 6512 } 6513 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6514 PetscFunctionReturn(PETSC_SUCCESS); 6515 } 6516 6517 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6518 { 6519 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6520 Vec from, to; 6521 const PetscScalar *array; 6522 6523 PetscFunctionBegin; 6524 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6525 from = pcbddc->coarse_vec; 6526 to = pcbddc->vec1_P; 6527 } else { /* from local to global -> put data in coarse right-hand side */ 6528 from = pcbddc->vec1_P; 6529 to = pcbddc->coarse_vec; 6530 } 6531 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6532 if (smode == SCATTER_FORWARD) { 6533 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6534 Vec tvec; 6535 6536 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6537 PetscCall(VecGetArrayRead(to, &array)); 6538 PetscCall(VecPlaceArray(tvec, array)); 6539 PetscCall(VecRestoreArrayRead(to, &array)); 6540 } 6541 } else { 6542 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6543 PetscCall(VecResetArray(from)); 6544 } 6545 } 6546 PetscFunctionReturn(PETSC_SUCCESS); 6547 } 6548 6549 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6550 { 6551 PC_IS *pcis = (PC_IS *)pc->data; 6552 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6553 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6554 /* one and zero */ 6555 PetscScalar one = 1.0, zero = 0.0; 6556 /* space to store constraints and their local indices */ 6557 PetscScalar *constraints_data; 6558 PetscInt *constraints_idxs, *constraints_idxs_B; 6559 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 6560 PetscInt *constraints_n; 6561 /* iterators */ 6562 PetscInt i, j, k, total_counts, total_counts_cc, cum; 6563 /* BLAS integers */ 6564 PetscBLASInt lwork, lierr; 6565 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 6566 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 6567 /* reuse */ 6568 PetscInt olocal_primal_size, olocal_primal_size_cc; 6569 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 6570 /* change of basis */ 6571 PetscBool qr_needed; 6572 PetscBT change_basis, qr_needed_idx; 6573 /* auxiliary stuff */ 6574 PetscInt *nnz, *is_indices; 6575 PetscInt ncc; 6576 /* some quantities */ 6577 PetscInt n_vertices, total_primal_vertices, valid_constraints; 6578 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 6579 PetscReal tol; /* tolerance for retaining eigenmodes */ 6580 6581 PetscFunctionBegin; 6582 tol = PetscSqrtReal(PETSC_SMALL); 6583 /* Destroy Mat objects computed previously */ 6584 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6585 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6586 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6587 /* save info on constraints from previous setup (if any) */ 6588 olocal_primal_size = pcbddc->local_primal_size; 6589 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6590 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 6591 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 6592 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 6593 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 6594 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6595 6596 if (!pcbddc->adaptive_selection) { 6597 IS ISForVertices, *ISForFaces, *ISForEdges; 6598 MatNullSpace nearnullsp; 6599 const Vec *nearnullvecs; 6600 Vec *localnearnullsp; 6601 PetscScalar *array; 6602 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 6603 PetscBool nnsp_has_cnst; 6604 /* LAPACK working arrays for SVD or POD */ 6605 PetscBool skip_lapack, boolforchange; 6606 PetscScalar *work; 6607 PetscReal *singular_vals; 6608 #if defined(PETSC_USE_COMPLEX) 6609 PetscReal *rwork; 6610 #endif 6611 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 6612 PetscBLASInt dummy_int = 1; 6613 PetscScalar dummy_scalar = 1.; 6614 PetscBool use_pod = PETSC_FALSE; 6615 6616 /* MKL SVD with same input gives different results on different processes! */ 6617 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6618 use_pod = PETSC_TRUE; 6619 #endif 6620 /* Get index sets for faces, edges and vertices from graph */ 6621 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 6622 o_nf = n_ISForFaces; 6623 o_ne = n_ISForEdges; 6624 n_vertices = 0; 6625 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 6626 /* print some info */ 6627 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6628 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6629 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 6630 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6631 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6632 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 6633 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 6634 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 6635 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6636 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6637 } 6638 6639 if (!pcbddc->use_vertices) n_vertices = 0; 6640 if (!pcbddc->use_edges) n_ISForEdges = 0; 6641 if (!pcbddc->use_faces) n_ISForFaces = 0; 6642 6643 /* check if near null space is attached to global mat */ 6644 if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 6645 else nearnullsp = NULL; 6646 6647 if (nearnullsp) { 6648 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 6649 /* remove any stored info */ 6650 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6651 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6652 /* store information for BDDC solver reuse */ 6653 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6654 pcbddc->onearnullspace = nearnullsp; 6655 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 6656 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 6657 } else { /* if near null space is not provided BDDC uses constants by default */ 6658 nnsp_size = 0; 6659 nnsp_has_cnst = PETSC_TRUE; 6660 } 6661 /* get max number of constraints on a single cc */ 6662 max_constraints = nnsp_size; 6663 if (nnsp_has_cnst) max_constraints++; 6664 6665 /* 6666 Evaluate maximum storage size needed by the procedure 6667 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6668 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6669 There can be multiple constraints per connected component 6670 */ 6671 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 6672 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 6673 6674 total_counts = n_ISForFaces + n_ISForEdges; 6675 total_counts *= max_constraints; 6676 total_counts += n_vertices; 6677 PetscCall(PetscBTCreate(total_counts, &change_basis)); 6678 6679 total_counts = 0; 6680 max_size_of_constraint = 0; 6681 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 6682 IS used_is; 6683 if (i < n_ISForEdges) { 6684 used_is = ISForEdges[i]; 6685 } else { 6686 used_is = ISForFaces[i - n_ISForEdges]; 6687 } 6688 PetscCall(ISGetSize(used_is, &j)); 6689 total_counts += j; 6690 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6691 } 6692 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6693 6694 /* get local part of global near null space vectors */ 6695 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6696 for (k = 0; k < nnsp_size; k++) { 6697 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6698 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6699 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6700 } 6701 6702 /* whether or not to skip lapack calls */ 6703 skip_lapack = PETSC_TRUE; 6704 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6705 6706 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6707 if (!skip_lapack) { 6708 PetscScalar temp_work; 6709 6710 if (use_pod) { 6711 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6712 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6713 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6714 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6715 #if defined(PETSC_USE_COMPLEX) 6716 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6717 #endif 6718 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6719 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6720 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6721 lwork = -1; 6722 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6723 #if !defined(PETSC_USE_COMPLEX) 6724 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6725 #else 6726 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6727 #endif 6728 PetscCall(PetscFPTrapPop()); 6729 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6730 } else { 6731 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6732 /* SVD */ 6733 PetscInt max_n, min_n; 6734 max_n = max_size_of_constraint; 6735 min_n = max_constraints; 6736 if (max_size_of_constraint < max_constraints) { 6737 min_n = max_size_of_constraint; 6738 max_n = max_constraints; 6739 } 6740 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6741 #if defined(PETSC_USE_COMPLEX) 6742 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6743 #endif 6744 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6745 lwork = -1; 6746 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6747 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6748 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6749 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6750 #if !defined(PETSC_USE_COMPLEX) 6751 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6752 #else 6753 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6754 #endif 6755 PetscCall(PetscFPTrapPop()); 6756 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6757 #else 6758 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6759 #endif /* on missing GESVD */ 6760 } 6761 /* Allocate optimal workspace */ 6762 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6763 PetscCall(PetscMalloc1(lwork, &work)); 6764 } 6765 /* Now we can loop on constraining sets */ 6766 total_counts = 0; 6767 constraints_idxs_ptr[0] = 0; 6768 constraints_data_ptr[0] = 0; 6769 /* vertices */ 6770 if (n_vertices) { 6771 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6772 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6773 for (i = 0; i < n_vertices; i++) { 6774 constraints_n[total_counts] = 1; 6775 constraints_data[total_counts] = 1.0; 6776 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6777 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6778 total_counts++; 6779 } 6780 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6781 } 6782 6783 /* edges and faces */ 6784 total_counts_cc = total_counts; 6785 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6786 IS used_is; 6787 PetscBool idxs_copied = PETSC_FALSE; 6788 6789 if (ncc < n_ISForEdges) { 6790 used_is = ISForEdges[ncc]; 6791 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6792 } else { 6793 used_is = ISForFaces[ncc - n_ISForEdges]; 6794 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6795 } 6796 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6797 6798 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6799 if (!size_of_constraint) continue; 6800 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6801 if (nnsp_has_cnst) { 6802 PetscScalar quad_value; 6803 6804 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6805 idxs_copied = PETSC_TRUE; 6806 6807 if (!pcbddc->use_nnsp_true) { 6808 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6809 } else { 6810 quad_value = 1.0; 6811 } 6812 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6813 temp_constraints++; 6814 total_counts++; 6815 } 6816 for (k = 0; k < nnsp_size; k++) { 6817 PetscReal real_value; 6818 PetscScalar *ptr_to_data; 6819 6820 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6821 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6822 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6823 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6824 /* check if array is null on the connected component */ 6825 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6826 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6827 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6828 temp_constraints++; 6829 total_counts++; 6830 if (!idxs_copied) { 6831 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6832 idxs_copied = PETSC_TRUE; 6833 } 6834 } 6835 } 6836 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6837 valid_constraints = temp_constraints; 6838 if (!pcbddc->use_nnsp_true && temp_constraints) { 6839 if (temp_constraints == 1) { /* just normalize the constraint */ 6840 PetscScalar norm, *ptr_to_data; 6841 6842 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6843 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6844 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6845 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6846 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6847 } else { /* perform SVD */ 6848 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6849 6850 if (use_pod) { 6851 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6852 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6853 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6854 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6855 from that computed using LAPACKgesvd 6856 -> This is due to a different computation of eigenvectors in LAPACKheev 6857 -> The quality of the POD-computed basis will be the same */ 6858 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6859 /* Store upper triangular part of correlation matrix */ 6860 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6861 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6862 for (j = 0; j < temp_constraints; j++) { 6863 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6864 } 6865 /* compute eigenvalues and eigenvectors of correlation matrix */ 6866 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6867 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6868 #if !defined(PETSC_USE_COMPLEX) 6869 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6870 #else 6871 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6872 #endif 6873 PetscCall(PetscFPTrapPop()); 6874 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6875 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6876 j = 0; 6877 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6878 total_counts = total_counts - j; 6879 valid_constraints = temp_constraints - j; 6880 /* scale and copy POD basis into used quadrature memory */ 6881 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6882 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6883 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6884 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6885 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6886 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6887 if (j < temp_constraints) { 6888 PetscInt ii; 6889 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6890 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6891 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6892 PetscCall(PetscFPTrapPop()); 6893 for (k = 0; k < temp_constraints - j; k++) { 6894 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6895 } 6896 } 6897 } else { 6898 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6899 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6900 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6901 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6902 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6903 #if !defined(PETSC_USE_COMPLEX) 6904 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6905 #else 6906 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6907 #endif 6908 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6909 PetscCall(PetscFPTrapPop()); 6910 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6911 k = temp_constraints; 6912 if (k > size_of_constraint) k = size_of_constraint; 6913 j = 0; 6914 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6915 valid_constraints = k - j; 6916 total_counts = total_counts - temp_constraints + valid_constraints; 6917 #else 6918 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6919 #endif /* on missing GESVD */ 6920 } 6921 } 6922 } 6923 /* update pointers information */ 6924 if (valid_constraints) { 6925 constraints_n[total_counts_cc] = valid_constraints; 6926 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6927 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6928 /* set change_of_basis flag */ 6929 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6930 total_counts_cc++; 6931 } 6932 } 6933 /* free workspace */ 6934 if (!skip_lapack) { 6935 PetscCall(PetscFree(work)); 6936 #if defined(PETSC_USE_COMPLEX) 6937 PetscCall(PetscFree(rwork)); 6938 #endif 6939 PetscCall(PetscFree(singular_vals)); 6940 PetscCall(PetscFree(correlation_mat)); 6941 PetscCall(PetscFree(temp_basis)); 6942 } 6943 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6944 PetscCall(PetscFree(localnearnullsp)); 6945 /* free index sets of faces, edges and vertices */ 6946 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6947 } else { 6948 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6949 6950 total_counts = 0; 6951 n_vertices = 0; 6952 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6953 max_constraints = 0; 6954 total_counts_cc = 0; 6955 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6956 total_counts += pcbddc->adaptive_constraints_n[i]; 6957 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6958 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6959 } 6960 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6961 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6962 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6963 constraints_data = pcbddc->adaptive_constraints_data; 6964 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6965 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6966 total_counts_cc = 0; 6967 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6968 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6969 } 6970 6971 max_size_of_constraint = 0; 6972 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6973 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6974 /* Change of basis */ 6975 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6976 if (pcbddc->use_change_of_basis) { 6977 for (i = 0; i < sub_schurs->n_subs; i++) { 6978 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6979 } 6980 } 6981 } 6982 pcbddc->local_primal_size = total_counts; 6983 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6984 6985 /* map constraints_idxs in boundary numbering */ 6986 if (pcbddc->use_change_of_basis) { 6987 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6988 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6989 } 6990 6991 /* Create constraint matrix */ 6992 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6993 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6994 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6995 6996 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6997 /* determine if a QR strategy is needed for change of basis */ 6998 qr_needed = pcbddc->use_qr_single; 6999 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 7000 total_primal_vertices = 0; 7001 pcbddc->local_primal_size_cc = 0; 7002 for (i = 0; i < total_counts_cc; i++) { 7003 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7004 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 7005 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 7006 pcbddc->local_primal_size_cc += 1; 7007 } else if (PetscBTLookup(change_basis, i)) { 7008 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 7009 pcbddc->local_primal_size_cc += constraints_n[i]; 7010 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 7011 PetscCall(PetscBTSet(qr_needed_idx, i)); 7012 qr_needed = PETSC_TRUE; 7013 } 7014 } else { 7015 pcbddc->local_primal_size_cc += 1; 7016 } 7017 } 7018 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 7019 pcbddc->n_vertices = total_primal_vertices; 7020 /* permute indices in order to have a sorted set of vertices */ 7021 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 7022 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 7023 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 7024 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 7025 7026 /* nonzero structure of constraint matrix */ 7027 /* and get reference dof for local constraints */ 7028 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 7029 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 7030 7031 j = total_primal_vertices; 7032 total_counts = total_primal_vertices; 7033 cum = total_primal_vertices; 7034 for (i = n_vertices; i < total_counts_cc; i++) { 7035 if (!PetscBTLookup(change_basis, i)) { 7036 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 7037 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 7038 cum++; 7039 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7040 for (k = 0; k < constraints_n[i]; k++) { 7041 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 7042 nnz[j + k] = size_of_constraint; 7043 } 7044 j += constraints_n[i]; 7045 } 7046 } 7047 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 7048 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 7049 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 7050 PetscCall(PetscFree(nnz)); 7051 7052 /* set values in constraint matrix */ 7053 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 7054 total_counts = total_primal_vertices; 7055 for (i = n_vertices; i < total_counts_cc; i++) { 7056 if (!PetscBTLookup(change_basis, i)) { 7057 PetscInt *cols; 7058 7059 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7060 cols = constraints_idxs + constraints_idxs_ptr[i]; 7061 for (k = 0; k < constraints_n[i]; k++) { 7062 PetscInt row = total_counts + k; 7063 PetscScalar *vals; 7064 7065 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 7066 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 7067 } 7068 total_counts += constraints_n[i]; 7069 } 7070 } 7071 /* assembling */ 7072 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 7073 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 7074 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 7075 7076 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 7077 if (pcbddc->use_change_of_basis) { 7078 /* dual and primal dofs on a single cc */ 7079 PetscInt dual_dofs, primal_dofs; 7080 /* working stuff for GEQRF */ 7081 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 7082 PetscBLASInt lqr_work; 7083 /* working stuff for UNGQR */ 7084 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 7085 PetscBLASInt lgqr_work; 7086 /* working stuff for TRTRS */ 7087 PetscScalar *trs_rhs = NULL; 7088 PetscBLASInt Blas_NRHS; 7089 /* pointers for values insertion into change of basis matrix */ 7090 PetscInt *start_rows, *start_cols; 7091 PetscScalar *start_vals; 7092 /* working stuff for values insertion */ 7093 PetscBT is_primal; 7094 PetscInt *aux_primal_numbering_B; 7095 /* matrix sizes */ 7096 PetscInt global_size, local_size; 7097 /* temporary change of basis */ 7098 Mat localChangeOfBasisMatrix; 7099 /* extra space for debugging */ 7100 PetscScalar *dbg_work = NULL; 7101 7102 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 7103 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 7104 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 7105 /* nonzeros for local mat */ 7106 PetscCall(PetscMalloc1(pcis->n, &nnz)); 7107 if (!pcbddc->benign_change || pcbddc->fake_change) { 7108 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 7109 } else { 7110 const PetscInt *ii; 7111 PetscInt n; 7112 PetscBool flg_row; 7113 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 7114 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 7115 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 7116 } 7117 for (i = n_vertices; i < total_counts_cc; i++) { 7118 if (PetscBTLookup(change_basis, i)) { 7119 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7120 if (PetscBTLookup(qr_needed_idx, i)) { 7121 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 7122 } else { 7123 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 7124 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 7125 } 7126 } 7127 } 7128 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 7129 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 7130 PetscCall(PetscFree(nnz)); 7131 /* Set interior change in the matrix */ 7132 if (!pcbddc->benign_change || pcbddc->fake_change) { 7133 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 7134 } else { 7135 const PetscInt *ii, *jj; 7136 PetscScalar *aa; 7137 PetscInt n; 7138 PetscBool flg_row; 7139 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 7140 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 7141 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 7142 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 7143 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 7144 } 7145 7146 if (pcbddc->dbg_flag) { 7147 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 7148 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 7149 } 7150 7151 /* Now we loop on the constraints which need a change of basis */ 7152 /* 7153 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 7154 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 7155 7156 Basic blocks of change of basis matrix T computed: 7157 7158 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 7159 7160 | 1 0 ... 0 s_1/S | 7161 | 0 1 ... 0 s_2/S | 7162 | ... | 7163 | 0 ... 1 s_{n-1}/S | 7164 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 7165 7166 with S = \sum_{i=1}^n s_i^2 7167 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 7168 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 7169 7170 - QR decomposition of constraints otherwise 7171 */ 7172 if (qr_needed && max_size_of_constraint) { 7173 /* space to store Q */ 7174 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 7175 /* array to store scaling factors for reflectors */ 7176 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 7177 /* first we issue queries for optimal work */ 7178 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7179 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 7180 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7181 lqr_work = -1; 7182 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 7183 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7184 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 7185 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 7186 lgqr_work = -1; 7187 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7188 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 7189 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 7190 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7191 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 7192 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 7193 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7194 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 7195 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 7196 /* array to store rhs and solution of triangular solver */ 7197 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 7198 /* allocating workspace for check */ 7199 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 7200 } 7201 /* array to store whether a node is primal or not */ 7202 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 7203 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 7204 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 7205 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 7206 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 7207 PetscCall(PetscFree(aux_primal_numbering_B)); 7208 7209 /* loop on constraints and see whether or not they need a change of basis and compute it */ 7210 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 7211 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 7212 if (PetscBTLookup(change_basis, total_counts)) { 7213 /* get constraint info */ 7214 primal_dofs = constraints_n[total_counts]; 7215 dual_dofs = size_of_constraint - primal_dofs; 7216 7217 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 7218 7219 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 7220 7221 /* copy quadrature constraints for change of basis check */ 7222 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7223 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 7224 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7225 7226 /* compute QR decomposition of constraints */ 7227 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7228 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7229 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7230 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7231 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 7232 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7233 PetscCall(PetscFPTrapPop()); 7234 7235 /* explicitly compute R^-T */ 7236 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 7237 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 7238 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7239 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 7240 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7241 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7242 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7243 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 7244 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr); 7245 PetscCall(PetscFPTrapPop()); 7246 7247 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 7248 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7249 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7250 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7251 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7252 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7253 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 7254 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7255 PetscCall(PetscFPTrapPop()); 7256 7257 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 7258 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 7259 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 7260 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7261 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7262 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7263 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7264 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7265 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 7266 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7267 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 7268 PetscCall(PetscFPTrapPop()); 7269 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7270 7271 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 7272 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 7273 /* insert cols for primal dofs */ 7274 for (j = 0; j < primal_dofs; j++) { 7275 start_vals = &qr_basis[j * size_of_constraint]; 7276 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7277 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7278 } 7279 /* insert cols for dual dofs */ 7280 for (j = 0, k = 0; j < dual_dofs; k++) { 7281 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 7282 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 7283 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7284 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7285 j++; 7286 } 7287 } 7288 7289 /* check change of basis */ 7290 if (pcbddc->dbg_flag) { 7291 PetscInt ii, jj; 7292 PetscBool valid_qr = PETSC_TRUE; 7293 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 7294 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7295 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 7296 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7297 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 7298 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 7299 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7300 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 7301 PetscCall(PetscFPTrapPop()); 7302 for (jj = 0; jj < size_of_constraint; jj++) { 7303 for (ii = 0; ii < primal_dofs; ii++) { 7304 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 7305 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 7306 } 7307 } 7308 if (!valid_qr) { 7309 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 7310 for (jj = 0; jj < size_of_constraint; jj++) { 7311 for (ii = 0; ii < primal_dofs; ii++) { 7312 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 7313 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7314 } 7315 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 7316 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7317 } 7318 } 7319 } 7320 } else { 7321 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 7322 } 7323 } 7324 } else { /* simple transformation block */ 7325 PetscInt row, col; 7326 PetscScalar val, norm; 7327 7328 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7329 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 7330 for (j = 0; j < size_of_constraint; j++) { 7331 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 7332 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7333 if (!PetscBTLookup(is_primal, row_B)) { 7334 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 7335 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 7336 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 7337 } else { 7338 for (k = 0; k < size_of_constraint; k++) { 7339 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7340 if (row != col) { 7341 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 7342 } else { 7343 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 7344 } 7345 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 7346 } 7347 } 7348 } 7349 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 7350 } 7351 } else { 7352 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 7353 } 7354 } 7355 7356 /* free workspace */ 7357 if (qr_needed) { 7358 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 7359 PetscCall(PetscFree(trs_rhs)); 7360 PetscCall(PetscFree(qr_tau)); 7361 PetscCall(PetscFree(qr_work)); 7362 PetscCall(PetscFree(gqr_work)); 7363 PetscCall(PetscFree(qr_basis)); 7364 } 7365 PetscCall(PetscBTDestroy(&is_primal)); 7366 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7367 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7368 7369 /* assembling of global change of variable */ 7370 if (!pcbddc->fake_change) { 7371 Mat tmat; 7372 7373 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 7374 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 7375 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 7376 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 7377 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 7378 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 7379 PetscCall(MatConvert(tmat, MATAIJ, MAT_INITIAL_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 7380 PetscCall(MatDestroy(&tmat)); 7381 PetscCall(VecSet(pcis->vec1_global, 0.0)); 7382 PetscCall(VecSet(pcis->vec1_N, 1.0)); 7383 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7384 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7385 PetscCall(VecReciprocal(pcis->vec1_global)); 7386 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 7387 7388 /* check */ 7389 if (pcbddc->dbg_flag) { 7390 PetscReal error; 7391 Vec x, x_change; 7392 7393 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 7394 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 7395 PetscCall(VecSetRandom(x, NULL)); 7396 PetscCall(VecCopy(x, pcis->vec1_global)); 7397 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7398 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7399 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 7400 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7401 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7402 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 7403 PetscCall(VecAXPY(x, -1.0, x_change)); 7404 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 7405 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 7406 PetscCall(VecDestroy(&x)); 7407 PetscCall(VecDestroy(&x_change)); 7408 } 7409 /* adapt sub_schurs computed (if any) */ 7410 if (pcbddc->use_deluxe_scaling) { 7411 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 7412 7413 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 7414 if (sub_schurs && sub_schurs->S_Ej_all) { 7415 Mat S_new, tmat; 7416 IS is_all_N, is_V_Sall = NULL; 7417 7418 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 7419 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 7420 if (pcbddc->deluxe_zerorows) { 7421 ISLocalToGlobalMapping NtoSall; 7422 IS is_V; 7423 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 7424 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 7425 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 7426 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 7427 PetscCall(ISDestroy(&is_V)); 7428 } 7429 PetscCall(ISDestroy(&is_all_N)); 7430 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7431 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 7432 PetscCall(PetscObjectReference((PetscObject)S_new)); 7433 if (pcbddc->deluxe_zerorows) { 7434 const PetscScalar *array; 7435 const PetscInt *idxs_V, *idxs_all; 7436 PetscInt i, n_V; 7437 7438 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7439 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 7440 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 7441 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 7442 PetscCall(VecGetArrayRead(pcis->D, &array)); 7443 for (i = 0; i < n_V; i++) { 7444 PetscScalar val; 7445 PetscInt idx; 7446 7447 idx = idxs_V[i]; 7448 val = array[idxs_all[idxs_V[i]]]; 7449 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 7450 } 7451 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 7452 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 7453 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 7454 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 7455 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 7456 } 7457 sub_schurs->S_Ej_all = S_new; 7458 PetscCall(MatDestroy(&S_new)); 7459 if (sub_schurs->sum_S_Ej_all) { 7460 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7461 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7462 PetscCall(PetscObjectReference((PetscObject)S_new)); 7463 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7464 sub_schurs->sum_S_Ej_all = S_new; 7465 PetscCall(MatDestroy(&S_new)); 7466 } 7467 PetscCall(ISDestroy(&is_V_Sall)); 7468 PetscCall(MatDestroy(&tmat)); 7469 } 7470 /* destroy any change of basis context in sub_schurs */ 7471 if (sub_schurs && sub_schurs->change) { 7472 PetscInt i; 7473 7474 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 7475 PetscCall(PetscFree(sub_schurs->change)); 7476 } 7477 } 7478 if (pcbddc->switch_static) { /* need to save the local change */ 7479 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7480 } else { 7481 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7482 } 7483 /* determine if any process has changed the pressures locally */ 7484 pcbddc->change_interior = pcbddc->benign_have_null; 7485 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7486 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7487 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7488 pcbddc->use_qr_single = qr_needed; 7489 } 7490 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7491 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7492 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7493 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7494 } else { 7495 Mat benign_global = NULL; 7496 if (pcbddc->benign_have_null) { 7497 Mat M; 7498 7499 pcbddc->change_interior = PETSC_TRUE; 7500 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 7501 PetscCall(VecReciprocal(pcis->vec1_N)); 7502 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 7503 if (pcbddc->benign_change) { 7504 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 7505 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 7506 } else { 7507 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 7508 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 7509 } 7510 PetscCall(MatISSetLocalMat(benign_global, M)); 7511 PetscCall(MatDestroy(&M)); 7512 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 7513 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 7514 } 7515 if (pcbddc->user_ChangeOfBasisMatrix) { 7516 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix)); 7517 PetscCall(MatDestroy(&benign_global)); 7518 } else if (pcbddc->benign_have_null) { 7519 pcbddc->ChangeOfBasisMatrix = benign_global; 7520 } 7521 } 7522 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7523 IS is_global; 7524 const PetscInt *gidxs; 7525 7526 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 7527 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 7528 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 7529 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 7530 PetscCall(ISDestroy(&is_global)); 7531 } 7532 } 7533 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 7534 7535 if (!pcbddc->fake_change) { 7536 /* add pressure dofs to set of primal nodes for numbering purposes */ 7537 for (i = 0; i < pcbddc->benign_n; i++) { 7538 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7539 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7540 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7541 pcbddc->local_primal_size_cc++; 7542 pcbddc->local_primal_size++; 7543 } 7544 7545 /* check if a new primal space has been introduced (also take into account benign trick) */ 7546 pcbddc->new_primal_space_local = PETSC_TRUE; 7547 if (olocal_primal_size == pcbddc->local_primal_size) { 7548 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7549 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7550 if (!pcbddc->new_primal_space_local) { 7551 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7552 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7553 } 7554 } 7555 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7556 PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7557 } 7558 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 7559 7560 /* flush dbg viewer */ 7561 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7562 7563 /* free workspace */ 7564 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7565 PetscCall(PetscBTDestroy(&change_basis)); 7566 if (!pcbddc->adaptive_selection) { 7567 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 7568 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 7569 } else { 7570 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 7571 PetscCall(PetscFree(constraints_n)); 7572 PetscCall(PetscFree(constraints_idxs_B)); 7573 } 7574 PetscFunctionReturn(PETSC_SUCCESS); 7575 } 7576 7577 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7578 { 7579 ISLocalToGlobalMapping map; 7580 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7581 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 7582 PetscInt i, N; 7583 PetscBool rcsr = PETSC_FALSE; 7584 7585 PetscFunctionBegin; 7586 if (pcbddc->recompute_topography) { 7587 pcbddc->graphanalyzed = PETSC_FALSE; 7588 /* Reset previously computed graph */ 7589 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7590 /* Init local Graph struct */ 7591 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 7592 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 7593 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 7594 7595 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 7596 /* Check validity of the csr graph passed in by the user */ 7597 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 7598 pcbddc->mat_graph->nvtxs); 7599 7600 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7601 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7602 PetscInt *xadj, *adjncy; 7603 PetscInt nvtxs; 7604 PetscBool flg_row; 7605 Mat A; 7606 7607 PetscCall(PetscObjectReference((PetscObject)matis->A)); 7608 A = matis->A; 7609 for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) { 7610 Mat AtA; 7611 7612 PetscCall(MatProductCreate(A, A, NULL, &AtA)); 7613 PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_")); 7614 PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB)); 7615 PetscCall(MatProductSetFromOptions(AtA)); 7616 PetscCall(MatProductSymbolic(AtA)); 7617 PetscCall(MatProductClear(AtA)); 7618 /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */ 7619 AtA->assembled = PETSC_TRUE; 7620 PetscCall(MatDestroy(&A)); 7621 A = AtA; 7622 } 7623 PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7624 if (flg_row) { 7625 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 7626 pcbddc->computed_rowadj = PETSC_TRUE; 7627 PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7628 rcsr = PETSC_TRUE; 7629 } 7630 PetscCall(MatDestroy(&A)); 7631 } 7632 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7633 7634 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7635 PetscReal *lcoords; 7636 PetscInt n; 7637 MPI_Datatype dimrealtype; 7638 PetscMPIInt cdimi; 7639 7640 /* TODO: support for blocked */ 7641 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 7642 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7643 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 7644 PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi)); 7645 PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype)); 7646 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7647 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7648 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7649 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7650 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7651 7652 pcbddc->mat_graph->coords = lcoords; 7653 pcbddc->mat_graph->cloc = PETSC_TRUE; 7654 pcbddc->mat_graph->cnloc = n; 7655 } 7656 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 7657 pcbddc->mat_graph->nvtxs); 7658 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7659 7660 /* attach info on disconnected subdomains if present */ 7661 if (pcbddc->n_local_subs) { 7662 PetscInt *local_subs, n, totn; 7663 7664 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7665 PetscCall(PetscMalloc1(n, &local_subs)); 7666 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 7667 for (i = 0; i < pcbddc->n_local_subs; i++) { 7668 const PetscInt *idxs; 7669 PetscInt nl, j; 7670 7671 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 7672 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 7673 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 7674 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 7675 } 7676 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 7677 pcbddc->mat_graph->n_local_subs = totn + 1; 7678 pcbddc->mat_graph->local_subs = local_subs; 7679 } 7680 7681 /* Setup of Graph */ 7682 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 7683 } 7684 7685 if (!pcbddc->graphanalyzed) { 7686 /* Graph's connected components analysis */ 7687 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7688 pcbddc->graphanalyzed = PETSC_TRUE; 7689 pcbddc->corner_selected = pcbddc->corner_selection; 7690 } 7691 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7692 PetscFunctionReturn(PETSC_SUCCESS); 7693 } 7694 7695 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7696 { 7697 PetscInt i, j, n; 7698 PetscScalar *alphas; 7699 PetscReal norm, *onorms; 7700 7701 PetscFunctionBegin; 7702 n = *nio; 7703 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7704 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7705 PetscCall(VecNormalize(vecs[0], &norm)); 7706 if (norm < PETSC_SMALL) { 7707 onorms[0] = 0.0; 7708 PetscCall(VecSet(vecs[0], 0.0)); 7709 } else { 7710 onorms[0] = norm; 7711 } 7712 7713 for (i = 1; i < n; i++) { 7714 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7715 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7716 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7717 PetscCall(VecNormalize(vecs[i], &norm)); 7718 if (norm < PETSC_SMALL) { 7719 onorms[i] = 0.0; 7720 PetscCall(VecSet(vecs[i], 0.0)); 7721 } else { 7722 onorms[i] = norm; 7723 } 7724 } 7725 /* push nonzero vectors at the beginning */ 7726 for (i = 0; i < n; i++) { 7727 if (onorms[i] == 0.0) { 7728 for (j = i + 1; j < n; j++) { 7729 if (onorms[j] != 0.0) { 7730 PetscCall(VecCopy(vecs[j], vecs[i])); 7731 onorms[i] = onorms[j]; 7732 onorms[j] = 0.0; 7733 break; 7734 } 7735 } 7736 } 7737 } 7738 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7739 PetscCall(PetscFree2(alphas, onorms)); 7740 PetscFunctionReturn(PETSC_SUCCESS); 7741 } 7742 7743 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7744 { 7745 ISLocalToGlobalMapping mapping; 7746 Mat A; 7747 PetscInt n_neighs, *neighs, *n_shared, **shared; 7748 PetscMPIInt size, rank, color; 7749 PetscInt *xadj, *adjncy; 7750 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7751 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7752 PetscInt void_procs, *procs_candidates = NULL; 7753 PetscInt xadj_count, *count; 7754 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7755 PetscSubcomm psubcomm; 7756 MPI_Comm subcomm; 7757 7758 PetscFunctionBegin; 7759 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7760 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7761 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7762 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7763 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7764 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7765 7766 if (have_void) *have_void = PETSC_FALSE; 7767 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7768 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7769 PetscCall(MatISGetLocalMat(mat, &A)); 7770 PetscCall(MatGetLocalSize(A, &n, NULL)); 7771 im_active = !!n; 7772 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7773 void_procs = size - active_procs; 7774 /* get ranks of non-active processes in mat communicator */ 7775 if (void_procs) { 7776 PetscInt ncand; 7777 7778 if (have_void) *have_void = PETSC_TRUE; 7779 PetscCall(PetscMalloc1(size, &procs_candidates)); 7780 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7781 for (i = 0, ncand = 0; i < size; i++) { 7782 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7783 } 7784 /* force n_subdomains to be not greater that the number of non-active processes */ 7785 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7786 } 7787 7788 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7789 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7790 PetscCall(MatGetSize(mat, &N, NULL)); 7791 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7792 PetscInt issize, isidx, dest; 7793 PetscBool default_sub; 7794 7795 if (*n_subdomains == 1) dest = 0; 7796 else dest = rank; 7797 if (im_active) { 7798 issize = 1; 7799 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7800 isidx = procs_candidates[dest]; 7801 } else { 7802 isidx = dest; 7803 } 7804 } else { 7805 issize = 0; 7806 isidx = rank; 7807 } 7808 if (*n_subdomains != 1) *n_subdomains = active_procs; 7809 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7810 default_sub = (PetscBool)(isidx == rank); 7811 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &default_sub, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 7812 if (default_sub) PetscCall(PetscObjectSetName((PetscObject)*is_sends, "default subassembling")); 7813 PetscCall(PetscFree(procs_candidates)); 7814 PetscFunctionReturn(PETSC_SUCCESS); 7815 } 7816 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL)); 7817 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL)); 7818 threshold = PetscMax(threshold, 2); 7819 7820 /* Get info on mapping */ 7821 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7822 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7823 7824 /* build local CSR graph of subdomains' connectivity */ 7825 PetscCall(PetscMalloc1(2, &xadj)); 7826 xadj[0] = 0; 7827 xadj[1] = PetscMax(n_neighs - 1, 0); 7828 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7829 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7830 PetscCall(PetscCalloc1(n, &count)); 7831 for (i = 1; i < n_neighs; i++) 7832 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7833 7834 xadj_count = 0; 7835 for (i = 1; i < n_neighs; i++) { 7836 for (j = 0; j < n_shared[i]; j++) { 7837 if (count[shared[i][j]] < threshold) { 7838 adjncy[xadj_count] = neighs[i]; 7839 adjncy_wgt[xadj_count] = n_shared[i]; 7840 xadj_count++; 7841 break; 7842 } 7843 } 7844 } 7845 xadj[1] = xadj_count; 7846 PetscCall(PetscFree(count)); 7847 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7848 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7849 7850 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7851 7852 /* Restrict work on active processes only */ 7853 PetscCall(PetscMPIIntCast(im_active, &color)); 7854 if (void_procs) { 7855 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7856 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7857 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7858 subcomm = PetscSubcommChild(psubcomm); 7859 } else { 7860 psubcomm = NULL; 7861 subcomm = PetscObjectComm((PetscObject)mat); 7862 } 7863 7864 v_wgt = NULL; 7865 if (!color) { 7866 PetscCall(PetscFree(xadj)); 7867 PetscCall(PetscFree(adjncy)); 7868 PetscCall(PetscFree(adjncy_wgt)); 7869 } else { 7870 Mat subdomain_adj; 7871 IS new_ranks, new_ranks_contig; 7872 MatPartitioning partitioner; 7873 PetscInt rstart, rend; 7874 PetscMPIInt irstart = 0, irend = 0; 7875 PetscInt *is_indices, *oldranks; 7876 PetscMPIInt size; 7877 PetscBool aggregate; 7878 7879 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7880 if (void_procs) { 7881 PetscInt prank = rank; 7882 PetscCall(PetscMalloc1(size, &oldranks)); 7883 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7884 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7885 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7886 } else { 7887 oldranks = NULL; 7888 } 7889 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7890 if (aggregate) { /* TODO: all this part could be made more efficient */ 7891 PetscInt lrows, row, ncols, *cols; 7892 PetscMPIInt nrank; 7893 PetscScalar *vals; 7894 7895 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7896 lrows = 0; 7897 if (nrank < redprocs) { 7898 lrows = size / redprocs; 7899 if (nrank < size % redprocs) lrows++; 7900 } 7901 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7902 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7903 PetscCall(PetscMPIIntCast(rstart, &irstart)); 7904 PetscCall(PetscMPIIntCast(rend, &irend)); 7905 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7906 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7907 row = nrank; 7908 ncols = xadj[1] - xadj[0]; 7909 cols = adjncy; 7910 PetscCall(PetscMalloc1(ncols, &vals)); 7911 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7912 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7913 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7914 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7915 PetscCall(PetscFree(xadj)); 7916 PetscCall(PetscFree(adjncy)); 7917 PetscCall(PetscFree(adjncy_wgt)); 7918 PetscCall(PetscFree(vals)); 7919 if (use_vwgt) { 7920 Vec v; 7921 const PetscScalar *array; 7922 PetscInt nl; 7923 7924 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7925 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7926 PetscCall(VecAssemblyBegin(v)); 7927 PetscCall(VecAssemblyEnd(v)); 7928 PetscCall(VecGetLocalSize(v, &nl)); 7929 PetscCall(VecGetArrayRead(v, &array)); 7930 PetscCall(PetscMalloc1(nl, &v_wgt)); 7931 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7932 PetscCall(VecRestoreArrayRead(v, &array)); 7933 PetscCall(VecDestroy(&v)); 7934 } 7935 } else { 7936 PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7937 if (use_vwgt) { 7938 PetscCall(PetscMalloc1(1, &v_wgt)); 7939 v_wgt[0] = n; 7940 } 7941 } 7942 /* PetscCall(MatView(subdomain_adj,0)); */ 7943 7944 /* Partition */ 7945 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7946 #if defined(PETSC_HAVE_PTSCOTCH) 7947 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7948 #elif defined(PETSC_HAVE_PARMETIS) 7949 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7950 #else 7951 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7952 #endif 7953 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7954 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7955 *n_subdomains = PetscMin(size, *n_subdomains); 7956 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7957 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7958 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7959 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7960 7961 /* renumber new_ranks to avoid "holes" in new set of processors */ 7962 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7963 PetscCall(ISDestroy(&new_ranks)); 7964 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7965 if (!aggregate) { 7966 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7967 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7968 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7969 } else if (oldranks) { 7970 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7971 } else { 7972 ranks_send_to_idx[0] = is_indices[0]; 7973 } 7974 } else { 7975 PetscInt idx = 0; 7976 PetscMPIInt tag; 7977 MPI_Request *reqs; 7978 7979 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7980 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7981 for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7982 PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7983 PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE)); 7984 PetscCall(PetscFree(reqs)); 7985 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7986 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7987 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7988 } else if (oldranks) { 7989 ranks_send_to_idx[0] = oldranks[idx]; 7990 } else { 7991 ranks_send_to_idx[0] = idx; 7992 } 7993 } 7994 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7995 /* clean up */ 7996 PetscCall(PetscFree(oldranks)); 7997 PetscCall(ISDestroy(&new_ranks_contig)); 7998 PetscCall(MatDestroy(&subdomain_adj)); 7999 PetscCall(MatPartitioningDestroy(&partitioner)); 8000 } 8001 PetscCall(PetscSubcommDestroy(&psubcomm)); 8002 PetscCall(PetscFree(procs_candidates)); 8003 8004 /* assemble parallel IS for sends */ 8005 i = 1; 8006 if (!color) i = 0; 8007 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 8008 PetscFunctionReturn(PETSC_SUCCESS); 8009 } 8010 8011 typedef enum { 8012 MATDENSE_PRIVATE = 0, 8013 MATAIJ_PRIVATE, 8014 MATBAIJ_PRIVATE, 8015 MATSBAIJ_PRIVATE 8016 } MatTypePrivate; 8017 8018 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 8019 { 8020 Mat local_mat; 8021 IS is_sends_internal; 8022 PetscInt rows, cols, new_local_rows; 8023 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 8024 PetscBool ismatis, isdense, newisdense, destroy_mat; 8025 ISLocalToGlobalMapping l2gmap; 8026 PetscInt *l2gmap_indices; 8027 const PetscInt *is_indices; 8028 MatType new_local_type; 8029 /* buffers */ 8030 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 8031 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 8032 PetscInt *recv_buffer_idxs_local; 8033 PetscScalar *ptr_vals, *recv_buffer_vals; 8034 const PetscScalar *send_buffer_vals; 8035 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 8036 /* MPI */ 8037 MPI_Comm comm, comm_n; 8038 PetscSubcomm subcomm; 8039 PetscMPIInt n_sends, n_recvs, size; 8040 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 8041 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 8042 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 8043 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 8044 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 8045 8046 PetscFunctionBegin; 8047 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 8048 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 8049 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 8050 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 8051 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 8052 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 8053 PetscValidLogicalCollectiveBool(mat, reuse, 6); 8054 PetscValidLogicalCollectiveInt(mat, nis, 8); 8055 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 8056 if (nvecs) { 8057 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 8058 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 8059 } 8060 /* further checks */ 8061 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8062 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 8063 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 8064 8065 PetscCall(MatGetSize(local_mat, &rows, &cols)); 8066 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 8067 if (reuse && *mat_n) { 8068 PetscInt mrows, mcols, mnrows, mncols; 8069 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 8070 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 8071 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 8072 PetscCall(MatGetSize(mat, &mrows, &mcols)); 8073 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 8074 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 8075 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 8076 } 8077 PetscCall(MatGetBlockSize(local_mat, &bs)); 8078 PetscValidLogicalCollectiveInt(mat, bs, 1); 8079 8080 /* prepare IS for sending if not provided */ 8081 if (!is_sends) { 8082 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 8083 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 8084 } else { 8085 PetscCall(PetscObjectReference((PetscObject)is_sends)); 8086 is_sends_internal = is_sends; 8087 } 8088 8089 /* get comm */ 8090 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 8091 8092 /* compute number of sends */ 8093 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 8094 PetscCall(PetscMPIIntCast(i, &n_sends)); 8095 8096 /* compute number of receives */ 8097 PetscCallMPI(MPI_Comm_size(comm, &size)); 8098 PetscCall(PetscMalloc1(size, &iflags)); 8099 PetscCall(PetscArrayzero(iflags, size)); 8100 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 8101 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 8102 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 8103 PetscCall(PetscFree(iflags)); 8104 8105 /* restrict comm if requested */ 8106 subcomm = NULL; 8107 destroy_mat = PETSC_FALSE; 8108 if (restrict_comm) { 8109 PetscMPIInt color, subcommsize; 8110 8111 color = 0; 8112 if (restrict_full) { 8113 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 8114 } else { 8115 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 8116 } 8117 PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 8118 subcommsize = size - subcommsize; 8119 /* check if reuse has been requested */ 8120 if (reuse) { 8121 if (*mat_n) { 8122 PetscMPIInt subcommsize2; 8123 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 8124 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 8125 comm_n = PetscObjectComm((PetscObject)*mat_n); 8126 } else { 8127 comm_n = PETSC_COMM_SELF; 8128 } 8129 } else { /* MAT_INITIAL_MATRIX */ 8130 PetscMPIInt rank; 8131 8132 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 8133 PetscCall(PetscSubcommCreate(comm, &subcomm)); 8134 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 8135 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 8136 comm_n = PetscSubcommChild(subcomm); 8137 } 8138 /* flag to destroy *mat_n if not significative */ 8139 if (color) destroy_mat = PETSC_TRUE; 8140 } else { 8141 comm_n = comm; 8142 } 8143 8144 /* prepare send/receive buffers */ 8145 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 8146 PetscCall(PetscArrayzero(ilengths_idxs, size)); 8147 PetscCall(PetscMalloc1(size, &ilengths_vals)); 8148 PetscCall(PetscArrayzero(ilengths_vals, size)); 8149 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 8150 8151 /* Get data from local matrices */ 8152 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 8153 /* TODO: See below some guidelines on how to prepare the local buffers */ 8154 /* 8155 send_buffer_vals should contain the raw values of the local matrix 8156 send_buffer_idxs should contain: 8157 - MatType_PRIVATE type 8158 - PetscInt size_of_l2gmap 8159 - PetscInt global_row_indices[size_of_l2gmap] 8160 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 8161 */ 8162 { 8163 ISLocalToGlobalMapping mapping; 8164 8165 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 8166 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 8167 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 8168 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 8169 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 8170 send_buffer_idxs[1] = i; 8171 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 8172 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 8173 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 8174 PetscCall(PetscMPIIntCast(i, &len)); 8175 for (i = 0; i < n_sends; i++) { 8176 ilengths_vals[is_indices[i]] = len * len; 8177 ilengths_idxs[is_indices[i]] = len + 2; 8178 } 8179 } 8180 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 8181 /* additional is (if any) */ 8182 if (nis) { 8183 PetscMPIInt psum; 8184 PetscInt j; 8185 for (j = 0, psum = 0; j < nis; j++) { 8186 PetscInt plen; 8187 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8188 PetscCall(PetscMPIIntCast(plen, &len)); 8189 psum += len + 1; /* indices + length */ 8190 } 8191 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 8192 for (j = 0, psum = 0; j < nis; j++) { 8193 PetscInt plen; 8194 const PetscInt *is_array_idxs; 8195 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8196 send_buffer_idxs_is[psum] = plen; 8197 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 8198 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 8199 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 8200 psum += plen + 1; /* indices + length */ 8201 } 8202 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 8203 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 8204 } 8205 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8206 8207 buf_size_idxs = 0; 8208 buf_size_vals = 0; 8209 buf_size_idxs_is = 0; 8210 buf_size_vecs = 0; 8211 for (i = 0; i < n_recvs; i++) { 8212 buf_size_idxs += olengths_idxs[i]; 8213 buf_size_vals += olengths_vals[i]; 8214 if (nis) buf_size_idxs_is += olengths_idxs_is[i]; 8215 if (nvecs) buf_size_vecs += olengths_idxs[i]; 8216 } 8217 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 8218 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 8219 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 8220 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 8221 8222 /* get new tags for clean communications */ 8223 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 8224 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 8225 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 8226 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 8227 8228 /* allocate for requests */ 8229 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 8230 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 8231 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 8232 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 8233 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 8234 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 8235 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 8236 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 8237 8238 /* communications */ 8239 ptr_idxs = recv_buffer_idxs; 8240 ptr_vals = recv_buffer_vals; 8241 ptr_idxs_is = recv_buffer_idxs_is; 8242 ptr_vecs = recv_buffer_vecs; 8243 for (i = 0; i < n_recvs; i++) { 8244 PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i])); 8245 PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i])); 8246 ptr_idxs += olengths_idxs[i]; 8247 ptr_vals += olengths_vals[i]; 8248 if (nis) { 8249 PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i])); 8250 ptr_idxs_is += olengths_idxs_is[i]; 8251 } 8252 if (nvecs) { 8253 PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i])); 8254 ptr_vecs += olengths_idxs[i] - 2; 8255 } 8256 } 8257 for (i = 0; i < n_sends; i++) { 8258 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 8259 PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 8260 PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 8261 if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 8262 if (nvecs) { 8263 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8264 PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 8265 } 8266 } 8267 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 8268 PetscCall(ISDestroy(&is_sends_internal)); 8269 8270 /* assemble new l2g map */ 8271 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 8272 ptr_idxs = recv_buffer_idxs; 8273 new_local_rows = 0; 8274 for (i = 0; i < n_recvs; i++) { 8275 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8276 ptr_idxs += olengths_idxs[i]; 8277 } 8278 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 8279 ptr_idxs = recv_buffer_idxs; 8280 new_local_rows = 0; 8281 for (i = 0; i < n_recvs; i++) { 8282 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 8283 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8284 ptr_idxs += olengths_idxs[i]; 8285 } 8286 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 8287 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 8288 PetscCall(PetscFree(l2gmap_indices)); 8289 8290 /* infer new local matrix type from received local matrices type */ 8291 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 8292 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 8293 if (n_recvs) { 8294 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 8295 ptr_idxs = recv_buffer_idxs; 8296 for (i = 0; i < n_recvs; i++) { 8297 if ((PetscInt)new_local_type_private != *ptr_idxs) { 8298 new_local_type_private = MATAIJ_PRIVATE; 8299 break; 8300 } 8301 ptr_idxs += olengths_idxs[i]; 8302 } 8303 switch (new_local_type_private) { 8304 case MATDENSE_PRIVATE: 8305 new_local_type = MATSEQAIJ; 8306 bs = 1; 8307 break; 8308 case MATAIJ_PRIVATE: 8309 new_local_type = MATSEQAIJ; 8310 bs = 1; 8311 break; 8312 case MATBAIJ_PRIVATE: 8313 new_local_type = MATSEQBAIJ; 8314 break; 8315 case MATSBAIJ_PRIVATE: 8316 new_local_type = MATSEQSBAIJ; 8317 break; 8318 default: 8319 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 8320 } 8321 } else { /* by default, new_local_type is seqaij */ 8322 new_local_type = MATSEQAIJ; 8323 bs = 1; 8324 } 8325 8326 /* create MATIS object if needed */ 8327 if (!reuse) { 8328 PetscCall(MatGetSize(mat, &rows, &cols)); 8329 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8330 } else { 8331 /* it also destroys the local matrices */ 8332 if (*mat_n) { 8333 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 8334 } else { /* this is a fake object */ 8335 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8336 } 8337 } 8338 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 8339 PetscCall(MatSetType(local_mat, new_local_type)); 8340 8341 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 8342 8343 /* Global to local map of received indices */ 8344 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 8345 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 8346 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 8347 8348 /* restore attributes -> type of incoming data and its size */ 8349 buf_size_idxs = 0; 8350 for (i = 0; i < n_recvs; i++) { 8351 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 8352 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 8353 buf_size_idxs += olengths_idxs[i]; 8354 } 8355 PetscCall(PetscFree(recv_buffer_idxs)); 8356 8357 /* set preallocation */ 8358 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 8359 if (!newisdense) { 8360 PetscInt *new_local_nnz = NULL; 8361 8362 ptr_idxs = recv_buffer_idxs_local; 8363 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 8364 for (i = 0; i < n_recvs; i++) { 8365 PetscInt j; 8366 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 8367 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 8368 } else { 8369 /* TODO */ 8370 } 8371 ptr_idxs += olengths_idxs[i]; 8372 } 8373 if (new_local_nnz) { 8374 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 8375 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 8376 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 8377 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8378 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 8379 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8380 } else { 8381 PetscCall(MatSetUp(local_mat)); 8382 } 8383 PetscCall(PetscFree(new_local_nnz)); 8384 } else { 8385 PetscCall(MatSetUp(local_mat)); 8386 } 8387 8388 /* set values */ 8389 ptr_vals = recv_buffer_vals; 8390 ptr_idxs = recv_buffer_idxs_local; 8391 for (i = 0; i < n_recvs; i++) { 8392 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 8393 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 8394 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 8395 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 8396 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 8397 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 8398 } else { 8399 /* TODO */ 8400 } 8401 ptr_idxs += olengths_idxs[i]; 8402 ptr_vals += olengths_vals[i]; 8403 } 8404 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 8405 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 8406 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 8407 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 8408 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 8409 PetscCall(PetscFree(recv_buffer_vals)); 8410 8411 #if 0 8412 if (!restrict_comm) { /* check */ 8413 Vec lvec,rvec; 8414 PetscReal infty_error; 8415 8416 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 8417 PetscCall(VecSetRandom(rvec,NULL)); 8418 PetscCall(MatMult(mat,rvec,lvec)); 8419 PetscCall(VecScale(lvec,-1.0)); 8420 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 8421 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 8422 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 8423 PetscCall(VecDestroy(&rvec)); 8424 PetscCall(VecDestroy(&lvec)); 8425 } 8426 #endif 8427 8428 /* assemble new additional is (if any) */ 8429 if (nis) { 8430 PetscInt **temp_idxs, *count_is, j, psum; 8431 8432 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 8433 PetscCall(PetscCalloc1(nis, &count_is)); 8434 ptr_idxs = recv_buffer_idxs_is; 8435 psum = 0; 8436 for (i = 0; i < n_recvs; i++) { 8437 for (j = 0; j < nis; j++) { 8438 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8439 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8440 psum += plen; 8441 ptr_idxs += plen + 1; /* shift pointer to received data */ 8442 } 8443 } 8444 PetscCall(PetscMalloc1(nis, &temp_idxs)); 8445 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 8446 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 8447 PetscCall(PetscArrayzero(count_is, nis)); 8448 ptr_idxs = recv_buffer_idxs_is; 8449 for (i = 0; i < n_recvs; i++) { 8450 for (j = 0; j < nis; j++) { 8451 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8452 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 8453 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8454 ptr_idxs += plen + 1; /* shift pointer to received data */ 8455 } 8456 } 8457 for (i = 0; i < nis; i++) { 8458 PetscCall(ISDestroy(&isarray[i])); 8459 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 8460 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 8461 } 8462 PetscCall(PetscFree(count_is)); 8463 PetscCall(PetscFree(temp_idxs[0])); 8464 PetscCall(PetscFree(temp_idxs)); 8465 } 8466 /* free workspace */ 8467 PetscCall(PetscFree(recv_buffer_idxs_is)); 8468 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 8469 PetscCall(PetscFree(send_buffer_idxs)); 8470 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 8471 if (isdense) { 8472 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8473 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 8474 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8475 } else { 8476 /* PetscCall(PetscFree(send_buffer_vals)); */ 8477 } 8478 if (nis) { 8479 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 8480 PetscCall(PetscFree(send_buffer_idxs_is)); 8481 } 8482 8483 if (nvecs) { 8484 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 8485 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 8486 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8487 PetscCall(VecDestroy(&nnsp_vec[0])); 8488 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 8489 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 8490 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 8491 /* set values */ 8492 ptr_vals = recv_buffer_vecs; 8493 ptr_idxs = recv_buffer_idxs_local; 8494 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8495 for (i = 0; i < n_recvs; i++) { 8496 PetscInt j; 8497 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 8498 ptr_idxs += olengths_idxs[i]; 8499 ptr_vals += olengths_idxs[i] - 2; 8500 } 8501 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8502 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8503 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8504 } 8505 8506 PetscCall(PetscFree(recv_buffer_vecs)); 8507 PetscCall(PetscFree(recv_buffer_idxs_local)); 8508 PetscCall(PetscFree(recv_req_idxs)); 8509 PetscCall(PetscFree(recv_req_vals)); 8510 PetscCall(PetscFree(recv_req_vecs)); 8511 PetscCall(PetscFree(recv_req_idxs_is)); 8512 PetscCall(PetscFree(send_req_idxs)); 8513 PetscCall(PetscFree(send_req_vals)); 8514 PetscCall(PetscFree(send_req_vecs)); 8515 PetscCall(PetscFree(send_req_idxs_is)); 8516 PetscCall(PetscFree(ilengths_vals)); 8517 PetscCall(PetscFree(ilengths_idxs)); 8518 PetscCall(PetscFree(olengths_vals)); 8519 PetscCall(PetscFree(olengths_idxs)); 8520 PetscCall(PetscFree(onodes)); 8521 if (nis) { 8522 PetscCall(PetscFree(ilengths_idxs_is)); 8523 PetscCall(PetscFree(olengths_idxs_is)); 8524 PetscCall(PetscFree(onodes_is)); 8525 } 8526 PetscCall(PetscSubcommDestroy(&subcomm)); 8527 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8528 PetscCall(MatDestroy(mat_n)); 8529 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 8530 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8531 PetscCall(VecDestroy(&nnsp_vec[0])); 8532 } 8533 *mat_n = NULL; 8534 } 8535 PetscFunctionReturn(PETSC_SUCCESS); 8536 } 8537 8538 /* temporary hack into ksp private data structure */ 8539 #include <petsc/private/kspimpl.h> 8540 8541 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat) 8542 { 8543 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8544 PC_IS *pcis = (PC_IS *)pc->data; 8545 PCBDDCGraph graph = pcbddc->mat_graph; 8546 Mat coarse_mat, coarse_mat_is; 8547 Mat coarsedivudotp = NULL; 8548 Mat coarseG, t_coarse_mat_is; 8549 MatNullSpace CoarseNullSpace = NULL; 8550 ISLocalToGlobalMapping coarse_islg; 8551 IS coarse_is, *isarray, corners; 8552 PetscInt i, im_active = -1, active_procs = -1; 8553 PetscInt nis, nisdofs, nisneu, nisvert; 8554 PetscInt coarse_eqs_per_proc, coarsening_ratio; 8555 PC pc_temp; 8556 PCType coarse_pc_type; 8557 KSPType coarse_ksp_type; 8558 PetscBool multilevel_requested, multilevel_allowed; 8559 PetscBool coarse_reuse, multi_element = graph->multi_element; 8560 PetscInt ncoarse, nedcfield; 8561 PetscBool compute_vecs = PETSC_FALSE; 8562 PetscScalar *array; 8563 MatReuse coarse_mat_reuse; 8564 PetscBool restr, full_restr, have_void; 8565 PetscMPIInt size; 8566 8567 PetscFunctionBegin; 8568 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8569 /* Assign global numbering to coarse dofs */ 8570 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8571 PetscInt ocoarse_size; 8572 compute_vecs = PETSC_TRUE; 8573 8574 pcbddc->new_primal_space = PETSC_TRUE; 8575 ocoarse_size = pcbddc->coarse_size; 8576 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8577 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 8578 /* see if we can avoid some work */ 8579 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8580 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8581 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8582 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8583 coarse_reuse = PETSC_FALSE; 8584 } else { /* we can safely reuse already computed coarse matrix */ 8585 coarse_reuse = PETSC_TRUE; 8586 } 8587 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8588 coarse_reuse = PETSC_FALSE; 8589 } 8590 /* reset any subassembling information */ 8591 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8592 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8593 coarse_reuse = PETSC_TRUE; 8594 } 8595 if (coarse_reuse && pcbddc->coarse_ksp) { 8596 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 8597 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8598 coarse_mat_reuse = MAT_REUSE_MATRIX; 8599 } else { 8600 coarse_mat = NULL; 8601 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8602 } 8603 8604 /* creates temporary l2gmap and IS for coarse indexes */ 8605 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 8606 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 8607 8608 /* creates temporary MATIS object for coarse matrix */ 8609 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is)); 8610 PetscCall(MatSetType(t_coarse_mat_is, MATIS)); 8611 PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size)); 8612 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, multi_element)); 8613 PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg)); 8614 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat)); 8615 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8616 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8617 PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view")); 8618 8619 /* count "active" (i.e. with positive local size) and "void" processes */ 8620 im_active = !!pcis->n; 8621 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8622 8623 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8624 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8625 /* full_restr : just use the receivers from the subassembling pattern */ 8626 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 8627 coarse_mat_is = NULL; 8628 multilevel_allowed = PETSC_FALSE; 8629 multilevel_requested = PETSC_FALSE; 8630 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 8631 if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1); 8632 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8633 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8634 coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio; 8635 if (multilevel_requested) { 8636 ncoarse = active_procs / coarsening_ratio; 8637 restr = PETSC_FALSE; 8638 full_restr = PETSC_FALSE; 8639 } else { 8640 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 8641 restr = PETSC_TRUE; 8642 full_restr = PETSC_TRUE; 8643 } 8644 if (!pcbddc->coarse_size || (size == 1 && !multi_element)) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8645 ncoarse = PetscMax(1, ncoarse); 8646 if (!pcbddc->coarse_subassembling) { 8647 if (coarsening_ratio > 1) { 8648 if (multilevel_requested) { 8649 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8650 } else { 8651 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8652 } 8653 } else { 8654 PetscMPIInt rank; 8655 8656 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 8657 have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE; 8658 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 8659 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_subassembling, "default subassembling")); 8660 } 8661 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8662 PetscInt psum; 8663 if (pcbddc->coarse_ksp) psum = 1; 8664 else psum = 0; 8665 PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8666 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8667 } 8668 /* determine if we can go multilevel */ 8669 if (multilevel_requested) { 8670 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8671 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8672 } 8673 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8674 8675 /* dump subassembling pattern */ 8676 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 8677 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8678 nedcfield = -1; 8679 corners = NULL; 8680 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8681 PetscInt *tidxs, *tidxs2, nout, tsize, i; 8682 const PetscInt *idxs; 8683 ISLocalToGlobalMapping tmap; 8684 8685 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8686 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 8687 /* allocate space for temporary storage */ 8688 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 8689 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 8690 /* allocate for IS array */ 8691 nisdofs = pcbddc->n_ISForDofsLocal; 8692 if (pcbddc->nedclocal) { 8693 if (pcbddc->nedfield > -1) { 8694 nedcfield = pcbddc->nedfield; 8695 } else { 8696 nedcfield = 0; 8697 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 8698 nisdofs = 1; 8699 } 8700 } 8701 nisneu = !!pcbddc->NeumannBoundariesLocal; 8702 nisvert = 0; /* nisvert is not used */ 8703 nis = nisdofs + nisneu + nisvert; 8704 PetscCall(PetscMalloc1(nis, &isarray)); 8705 /* dofs splitting */ 8706 for (i = 0; i < nisdofs; i++) { 8707 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8708 if (nedcfield != i) { 8709 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 8710 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8711 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8712 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8713 } else { 8714 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8715 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8716 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8717 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8718 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8719 } 8720 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8721 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8722 /* PetscCall(ISView(isarray[i],0)); */ 8723 } 8724 /* neumann boundaries */ 8725 if (pcbddc->NeumannBoundariesLocal) { 8726 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8727 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8728 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8729 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8730 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8731 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8732 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8733 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8734 } 8735 /* coordinates */ 8736 if (pcbddc->corner_selected) { 8737 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8738 PetscCall(ISGetLocalSize(corners, &tsize)); 8739 PetscCall(ISGetIndices(corners, &idxs)); 8740 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8741 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8742 PetscCall(ISRestoreIndices(corners, &idxs)); 8743 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8744 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8745 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8746 } 8747 PetscCall(PetscFree(tidxs)); 8748 PetscCall(PetscFree(tidxs2)); 8749 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8750 } else { 8751 nis = 0; 8752 nisdofs = 0; 8753 nisneu = 0; 8754 nisvert = 0; 8755 isarray = NULL; 8756 } 8757 /* destroy no longer needed map */ 8758 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8759 8760 /* subassemble */ 8761 if (multilevel_allowed) { 8762 Vec vp[1]; 8763 PetscInt nvecs = 0; 8764 PetscBool reuse; 8765 8766 vp[0] = NULL; 8767 /* XXX HDIV also */ 8768 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8769 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8770 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8771 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8772 nvecs = 1; 8773 8774 if (pcbddc->divudotp) { 8775 Mat B, loc_divudotp; 8776 Vec v, p; 8777 IS dummy; 8778 PetscInt np; 8779 8780 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8781 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8782 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8783 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8784 PetscCall(MatCreateVecs(B, &v, &p)); 8785 PetscCall(VecSet(p, 1.)); 8786 PetscCall(MatMultTranspose(B, p, v)); 8787 PetscCall(VecDestroy(&p)); 8788 PetscCall(MatDestroy(&B)); 8789 PetscCall(VecGetArray(vp[0], &array)); 8790 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8791 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8792 PetscCall(VecResetArray(pcbddc->vec1_P)); 8793 PetscCall(VecRestoreArray(vp[0], &array)); 8794 PetscCall(ISDestroy(&dummy)); 8795 PetscCall(VecDestroy(&v)); 8796 } 8797 } 8798 if (coarse_mat) reuse = PETSC_TRUE; 8799 else reuse = PETSC_FALSE; 8800 if (multi_element) { 8801 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8802 coarse_mat_is = t_coarse_mat_is; 8803 } else { 8804 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8805 if (reuse) { 8806 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8807 } else { 8808 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8809 } 8810 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8811 PetscScalar *arraym; 8812 const PetscScalar *arrayv; 8813 PetscInt nl; 8814 PetscCall(VecGetLocalSize(vp[0], &nl)); 8815 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8816 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8817 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8818 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8819 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8820 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8821 PetscCall(VecDestroy(&vp[0])); 8822 } else { 8823 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8824 } 8825 } 8826 } else { 8827 PetscBool default_sub; 8828 8829 PetscCall(PetscStrcmp(((PetscObject)pcbddc->coarse_subassembling)->name, "default subassembling", &default_sub)); 8830 if (!default_sub) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8831 else { 8832 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8833 coarse_mat_is = t_coarse_mat_is; 8834 } 8835 } 8836 if (coarse_mat_is || coarse_mat) { 8837 if (!multilevel_allowed) { 8838 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8839 } else { 8840 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8841 if (coarse_mat_is) { 8842 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8843 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8844 coarse_mat = coarse_mat_is; 8845 } 8846 } 8847 } 8848 PetscCall(MatDestroy(&t_coarse_mat_is)); 8849 PetscCall(MatDestroy(&coarse_mat_is)); 8850 8851 /* create local to global scatters for coarse problem */ 8852 if (compute_vecs) { 8853 PetscInt lrows; 8854 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8855 if (coarse_mat) { 8856 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8857 } else { 8858 lrows = 0; 8859 } 8860 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8861 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8862 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8863 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8864 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8865 } 8866 PetscCall(ISDestroy(&coarse_is)); 8867 8868 /* set defaults for coarse KSP and PC */ 8869 if (multilevel_allowed) { 8870 coarse_ksp_type = KSPRICHARDSON; 8871 coarse_pc_type = PCBDDC; 8872 } else { 8873 coarse_ksp_type = KSPPREONLY; 8874 coarse_pc_type = PCREDUNDANT; 8875 } 8876 8877 /* print some info if requested */ 8878 if (pcbddc->dbg_flag) { 8879 if (!multilevel_allowed) { 8880 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8881 if (multilevel_requested) { 8882 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio)); 8883 } else if (pcbddc->max_levels) { 8884 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8885 } 8886 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8887 } 8888 } 8889 8890 /* communicate coarse discrete gradient */ 8891 coarseG = NULL; 8892 if (pcbddc->nedcG && multilevel_allowed) { 8893 MPI_Comm ccomm; 8894 if (coarse_mat) { 8895 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8896 } else { 8897 ccomm = MPI_COMM_NULL; 8898 } 8899 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8900 } 8901 8902 /* create the coarse KSP object only once with defaults */ 8903 if (coarse_mat) { 8904 PetscBool isredundant, isbddc, force, valid; 8905 PetscViewer dbg_viewer = NULL; 8906 PetscBool isset, issym, isher, isspd; 8907 8908 if (pcbddc->dbg_flag) { 8909 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8910 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8911 } 8912 if (!pcbddc->coarse_ksp) { 8913 char prefix[256], str_level[16]; 8914 size_t len; 8915 8916 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8917 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8918 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8919 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8920 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1)); 8921 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8922 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8923 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8924 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8925 /* TODO is this logic correct? should check for coarse_mat type */ 8926 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8927 /* prefix */ 8928 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8929 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8930 if (!pcbddc->current_level) { 8931 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8932 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8933 } else { 8934 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8935 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8936 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8937 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8938 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8939 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 8940 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8941 } 8942 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8943 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8944 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8945 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8946 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8947 /* allow user customization */ 8948 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8949 /* get some info after set from options */ 8950 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8951 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8952 force = PETSC_FALSE; 8953 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8954 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8955 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8956 if (multilevel_allowed && !force && !valid) { 8957 isbddc = PETSC_TRUE; 8958 PetscCall(PCSetType(pc_temp, PCBDDC)); 8959 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8960 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8961 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8962 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8963 PetscObjectOptionsBegin((PetscObject)pc_temp); 8964 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8965 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8966 PetscOptionsEnd(); 8967 pc_temp->setfromoptionscalled++; 8968 } 8969 } 8970 } 8971 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8972 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8973 if (nisdofs) { 8974 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8975 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8976 } 8977 if (nisneu) { 8978 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8979 PetscCall(ISDestroy(&isarray[nisdofs])); 8980 } 8981 if (nisvert) { 8982 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8983 PetscCall(ISDestroy(&isarray[nis - 1])); 8984 } 8985 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8986 8987 /* get some info after set from options */ 8988 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8989 8990 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8991 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8992 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8993 force = PETSC_FALSE; 8994 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8995 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8996 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8997 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8998 if (isredundant) { 8999 KSP inner_ksp; 9000 PC inner_pc; 9001 9002 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 9003 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 9004 } 9005 9006 /* parameters which miss an API */ 9007 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 9008 if (isbddc) { 9009 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 9010 9011 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 9012 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 9013 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 9014 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 9015 if (pcbddc_coarse->benign_saddle_point) { 9016 Mat coarsedivudotp_is; 9017 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 9018 IS row, col; 9019 const PetscInt *gidxs; 9020 PetscInt n, st, M, N; 9021 9022 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 9023 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 9024 st = st - n; 9025 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 9026 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 9027 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 9028 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 9029 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 9030 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 9031 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 9032 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 9033 PetscCall(ISGetSize(row, &M)); 9034 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 9035 PetscCall(ISDestroy(&row)); 9036 PetscCall(ISDestroy(&col)); 9037 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 9038 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 9039 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 9040 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 9041 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 9042 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 9043 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 9044 PetscCall(MatDestroy(&coarsedivudotp)); 9045 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 9046 PetscCall(MatDestroy(&coarsedivudotp_is)); 9047 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 9048 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 9049 } 9050 } 9051 9052 /* propagate symmetry info of coarse matrix */ 9053 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 9054 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 9055 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 9056 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 9057 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 9058 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 9059 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 9060 9061 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 9062 /* set operators */ 9063 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 9064 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 9065 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 9066 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 9067 } 9068 PetscCall(MatDestroy(&coarseG)); 9069 PetscCall(PetscFree(isarray)); 9070 #if 0 9071 { 9072 PetscViewer viewer; 9073 char filename[256]; 9074 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 9075 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 9076 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 9077 PetscCall(MatView(coarse_mat,viewer)); 9078 PetscCall(PetscViewerPopFormat(viewer)); 9079 PetscCall(PetscViewerDestroy(&viewer)); 9080 } 9081 #endif 9082 9083 if (corners) { 9084 Vec gv; 9085 IS is; 9086 const PetscInt *idxs; 9087 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 9088 PetscScalar *coords; 9089 9090 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 9091 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 9092 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 9093 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 9094 PetscCall(VecSetBlockSize(gv, cdim)); 9095 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 9096 PetscCall(VecSetType(gv, VECSTANDARD)); 9097 PetscCall(VecSetFromOptions(gv)); 9098 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 9099 9100 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 9101 PetscCall(ISGetLocalSize(is, &n)); 9102 PetscCall(ISGetIndices(is, &idxs)); 9103 PetscCall(PetscMalloc1(n * cdim, &coords)); 9104 for (i = 0; i < n; i++) { 9105 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 9106 } 9107 PetscCall(ISRestoreIndices(is, &idxs)); 9108 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 9109 9110 PetscCall(ISGetLocalSize(corners, &n)); 9111 PetscCall(ISGetIndices(corners, &idxs)); 9112 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 9113 PetscCall(ISRestoreIndices(corners, &idxs)); 9114 PetscCall(PetscFree(coords)); 9115 PetscCall(VecAssemblyBegin(gv)); 9116 PetscCall(VecAssemblyEnd(gv)); 9117 PetscCall(VecGetArray(gv, &coords)); 9118 if (pcbddc->coarse_ksp) { 9119 PC coarse_pc; 9120 PetscBool isbddc; 9121 9122 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 9123 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 9124 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 9125 PetscReal *realcoords; 9126 9127 PetscCall(VecGetLocalSize(gv, &n)); 9128 #if defined(PETSC_USE_COMPLEX) 9129 PetscCall(PetscMalloc1(n, &realcoords)); 9130 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 9131 #else 9132 realcoords = coords; 9133 #endif 9134 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 9135 #if defined(PETSC_USE_COMPLEX) 9136 PetscCall(PetscFree(realcoords)); 9137 #endif 9138 } 9139 } 9140 PetscCall(VecRestoreArray(gv, &coords)); 9141 PetscCall(VecDestroy(&gv)); 9142 } 9143 PetscCall(ISDestroy(&corners)); 9144 9145 if (pcbddc->coarse_ksp) { 9146 Vec crhs, csol; 9147 9148 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 9149 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 9150 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL)); 9151 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs)); 9152 } 9153 PetscCall(MatDestroy(&coarsedivudotp)); 9154 9155 /* compute null space for coarse solver if the benign trick has been requested */ 9156 if (pcbddc->benign_null) { 9157 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 9158 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 9159 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 9160 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 9161 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9162 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9163 if (coarse_mat) { 9164 Vec nullv; 9165 PetscScalar *array, *array2; 9166 PetscInt nl; 9167 9168 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 9169 PetscCall(VecGetLocalSize(nullv, &nl)); 9170 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9171 PetscCall(VecGetArray(nullv, &array2)); 9172 PetscCall(PetscArraycpy(array2, array, nl)); 9173 PetscCall(VecRestoreArray(nullv, &array2)); 9174 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9175 PetscCall(VecNormalize(nullv, NULL)); 9176 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 9177 PetscCall(VecDestroy(&nullv)); 9178 } 9179 } 9180 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 9181 9182 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9183 if (pcbddc->coarse_ksp) { 9184 PetscBool ispreonly; 9185 9186 if (CoarseNullSpace) { 9187 PetscBool isnull; 9188 9189 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 9190 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 9191 /* TODO: add local nullspaces (if any) */ 9192 } 9193 /* setup coarse ksp */ 9194 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 9195 /* Check coarse problem if in debug mode or if solving with an iterative method */ 9196 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 9197 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 9198 KSP check_ksp; 9199 KSPType check_ksp_type; 9200 PC check_pc; 9201 Vec check_vec, coarse_vec; 9202 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 9203 PetscInt its; 9204 PetscBool compute_eigs; 9205 PetscReal *eigs_r, *eigs_c; 9206 PetscInt neigs; 9207 const char *prefix; 9208 9209 /* Create ksp object suitable for estimation of extreme eigenvalues */ 9210 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 9211 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 9212 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 9213 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 9214 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 9215 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size)); 9216 /* prevent from setup unneeded object */ 9217 PetscCall(KSPGetPC(check_ksp, &check_pc)); 9218 PetscCall(PCSetType(check_pc, PCNONE)); 9219 if (ispreonly) { 9220 check_ksp_type = KSPPREONLY; 9221 compute_eigs = PETSC_FALSE; 9222 } else { 9223 check_ksp_type = KSPGMRES; 9224 compute_eigs = PETSC_TRUE; 9225 } 9226 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 9227 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 9228 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 9229 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 9230 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 9231 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 9232 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 9233 PetscCall(KSPSetFromOptions(check_ksp)); 9234 PetscCall(KSPSetUp(check_ksp)); 9235 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 9236 PetscCall(KSPSetPC(check_ksp, check_pc)); 9237 /* create random vec */ 9238 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 9239 PetscCall(VecSetRandom(check_vec, NULL)); 9240 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9241 /* solve coarse problem */ 9242 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 9243 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 9244 /* set eigenvalue estimation if preonly has not been requested */ 9245 if (compute_eigs) { 9246 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 9247 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 9248 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 9249 if (neigs) { 9250 lambda_max = eigs_r[neigs - 1]; 9251 lambda_min = eigs_r[0]; 9252 if (pcbddc->use_coarse_estimates) { 9253 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 9254 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 9255 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 9256 } 9257 } 9258 } 9259 } 9260 9261 /* check coarse problem residual error */ 9262 if (pcbddc->dbg_flag) { 9263 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 9264 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9265 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 9266 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 9267 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9268 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 9269 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 9270 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer)); 9271 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer)); 9272 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 9273 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 9274 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 9275 if (compute_eigs) { 9276 PetscReal lambda_max_s, lambda_min_s; 9277 KSPConvergedReason reason; 9278 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 9279 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 9280 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 9281 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 9282 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 9283 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 9284 } 9285 PetscCall(PetscViewerFlush(dbg_viewer)); 9286 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9287 } 9288 PetscCall(VecDestroy(&check_vec)); 9289 PetscCall(VecDestroy(&coarse_vec)); 9290 PetscCall(KSPDestroy(&check_ksp)); 9291 if (compute_eigs) { 9292 PetscCall(PetscFree(eigs_r)); 9293 PetscCall(PetscFree(eigs_c)); 9294 } 9295 } 9296 } 9297 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 9298 /* print additional info */ 9299 if (pcbddc->dbg_flag) { 9300 /* waits until all processes reaches this point */ 9301 PetscCall(PetscBarrier((PetscObject)pc)); 9302 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 9303 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9304 } 9305 9306 /* free memory */ 9307 PetscCall(MatDestroy(&coarse_mat)); 9308 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9309 PetscFunctionReturn(PETSC_SUCCESS); 9310 } 9311 9312 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 9313 { 9314 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9315 PC_IS *pcis = (PC_IS *)pc->data; 9316 IS subset, subset_mult, subset_n; 9317 PetscInt local_size, coarse_size = 0; 9318 PetscInt *local_primal_indices = NULL; 9319 const PetscInt *t_local_primal_indices; 9320 9321 PetscFunctionBegin; 9322 /* Compute global number of coarse dofs */ 9323 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 9324 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 9325 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 9326 PetscCall(ISDestroy(&subset_n)); 9327 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 9328 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 9329 PetscCall(ISDestroy(&subset)); 9330 PetscCall(ISDestroy(&subset_mult)); 9331 PetscCall(ISGetLocalSize(subset_n, &local_size)); 9332 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 9333 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 9334 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 9335 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 9336 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 9337 PetscCall(ISDestroy(&subset_n)); 9338 9339 if (pcbddc->dbg_flag) { 9340 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9341 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 9342 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size)); 9343 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9344 } 9345 9346 /* get back data */ 9347 *coarse_size_n = coarse_size; 9348 *local_primal_indices_n = local_primal_indices; 9349 PetscFunctionReturn(PETSC_SUCCESS); 9350 } 9351 9352 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 9353 { 9354 IS localis_t; 9355 PetscInt i, lsize, *idxs, n; 9356 PetscScalar *vals; 9357 9358 PetscFunctionBegin; 9359 /* get indices in local ordering exploiting local to global map */ 9360 PetscCall(ISGetLocalSize(globalis, &lsize)); 9361 PetscCall(PetscMalloc1(lsize, &vals)); 9362 for (i = 0; i < lsize; i++) vals[i] = 1.0; 9363 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 9364 PetscCall(VecSet(gwork, 0.0)); 9365 PetscCall(VecSet(lwork, 0.0)); 9366 if (idxs) { /* multilevel guard */ 9367 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 9368 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 9369 } 9370 PetscCall(VecAssemblyBegin(gwork)); 9371 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 9372 PetscCall(PetscFree(vals)); 9373 PetscCall(VecAssemblyEnd(gwork)); 9374 /* now compute set in local ordering */ 9375 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9376 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9377 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 9378 PetscCall(VecGetSize(lwork, &n)); 9379 for (i = 0, lsize = 0; i < n; i++) { 9380 if (PetscRealPart(vals[i]) > 0.5) lsize++; 9381 } 9382 PetscCall(PetscMalloc1(lsize, &idxs)); 9383 for (i = 0, lsize = 0; i < n; i++) { 9384 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 9385 } 9386 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 9387 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 9388 *localis = localis_t; 9389 PetscFunctionReturn(PETSC_SUCCESS); 9390 } 9391 9392 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 9393 { 9394 PC_IS *pcis = (PC_IS *)pc->data; 9395 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9396 PC_IS *pcisf; 9397 PC_BDDC *pcbddcf; 9398 PC pcf; 9399 9400 PetscFunctionBegin; 9401 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 9402 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 9403 PetscCall(PCSetType(pcf, PCBDDC)); 9404 9405 pcisf = (PC_IS *)pcf->data; 9406 pcbddcf = (PC_BDDC *)pcf->data; 9407 9408 pcisf->is_B_local = pcis->is_B_local; 9409 pcisf->vec1_N = pcis->vec1_N; 9410 pcisf->BtoNmap = pcis->BtoNmap; 9411 pcisf->n = pcis->n; 9412 pcisf->n_B = pcis->n_B; 9413 9414 PetscCall(PetscFree(pcbddcf->mat_graph)); 9415 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9416 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 9417 pcbddcf->sub_schurs = schurs; 9418 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9419 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9420 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9421 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9422 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9423 pcbddcf->use_faces = PETSC_TRUE; 9424 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9425 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9426 pcbddcf->use_qr_single = (PetscBool)!constraints; 9427 pcbddcf->fake_change = PETSC_TRUE; 9428 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9429 9430 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9431 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9432 9433 *change = pcbddcf->ConstraintMatrix; 9434 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 9435 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 9436 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9437 9438 if (schurs) pcbddcf->sub_schurs = NULL; 9439 pcbddcf->ConstraintMatrix = NULL; 9440 pcbddcf->mat_graph = NULL; 9441 pcisf->is_B_local = NULL; 9442 pcisf->vec1_N = NULL; 9443 pcisf->BtoNmap = NULL; 9444 PetscCall(PCDestroy(&pcf)); 9445 PetscFunctionReturn(PETSC_SUCCESS); 9446 } 9447 9448 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9449 { 9450 PC_IS *pcis = (PC_IS *)pc->data; 9451 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9452 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 9453 Mat S_j; 9454 PetscInt *used_xadj, *used_adjncy; 9455 PetscBool free_used_adj; 9456 9457 PetscFunctionBegin; 9458 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9459 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9460 free_used_adj = PETSC_FALSE; 9461 if (pcbddc->sub_schurs_layers == -1) { 9462 used_xadj = NULL; 9463 used_adjncy = NULL; 9464 } else { 9465 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9466 used_xadj = pcbddc->mat_graph->xadj; 9467 used_adjncy = pcbddc->mat_graph->adjncy; 9468 } else if (pcbddc->computed_rowadj) { 9469 used_xadj = pcbddc->mat_graph->xadj; 9470 used_adjncy = pcbddc->mat_graph->adjncy; 9471 } else { 9472 PetscBool flg_row = PETSC_FALSE; 9473 const PetscInt *xadj, *adjncy; 9474 PetscInt nvtxs; 9475 9476 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9477 if (flg_row) { 9478 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 9479 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 9480 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 9481 free_used_adj = PETSC_TRUE; 9482 } else { 9483 pcbddc->sub_schurs_layers = -1; 9484 used_xadj = NULL; 9485 used_adjncy = NULL; 9486 } 9487 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9488 } 9489 } 9490 9491 /* setup sub_schurs data */ 9492 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 9493 if (!sub_schurs->schur_explicit) { 9494 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9495 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 9496 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 9497 } else { 9498 Mat change = NULL; 9499 Vec scaling = NULL; 9500 IS change_primal = NULL, iP; 9501 PetscInt benign_n; 9502 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9503 PetscBool need_change = PETSC_FALSE; 9504 PetscBool discrete_harmonic = PETSC_FALSE; 9505 9506 if (!pcbddc->use_vertices && reuse_solvers) { 9507 PetscInt n_vertices; 9508 9509 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 9510 reuse_solvers = (PetscBool)!n_vertices; 9511 } 9512 if (!pcbddc->benign_change_explicit) { 9513 benign_n = pcbddc->benign_n; 9514 } else { 9515 benign_n = 0; 9516 } 9517 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9518 We need a global reduction to avoid possible deadlocks. 9519 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9520 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9521 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9522 PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 9523 need_change = (PetscBool)(!need_change); 9524 } 9525 /* If the user defines additional constraints, we import them here */ 9526 if (need_change) { 9527 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 9528 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 9529 } 9530 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9531 9532 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 9533 if (iP) { 9534 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 9535 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 9536 PetscOptionsEnd(); 9537 } 9538 if (discrete_harmonic) { 9539 Mat A; 9540 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 9541 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 9542 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 9543 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 9544 pcbddc->benign_zerodiag_subs, change, change_primal)); 9545 PetscCall(MatDestroy(&A)); 9546 } else { 9547 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 9548 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 9549 } 9550 PetscCall(MatDestroy(&change)); 9551 PetscCall(ISDestroy(&change_primal)); 9552 } 9553 PetscCall(MatDestroy(&S_j)); 9554 9555 /* free adjacency */ 9556 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 9557 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9558 PetscFunctionReturn(PETSC_SUCCESS); 9559 } 9560 9561 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9562 { 9563 PC_IS *pcis = (PC_IS *)pc->data; 9564 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9565 PCBDDCGraph graph; 9566 9567 PetscFunctionBegin; 9568 /* attach interface graph for determining subsets */ 9569 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9570 IS verticesIS, verticescomm; 9571 PetscInt vsize, *idxs; 9572 9573 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9574 PetscCall(ISGetSize(verticesIS, &vsize)); 9575 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 9576 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 9577 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 9578 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9579 PetscCall(PCBDDCGraphCreate(&graph)); 9580 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 9581 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 9582 PetscCall(ISDestroy(&verticescomm)); 9583 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9584 } else { 9585 graph = pcbddc->mat_graph; 9586 } 9587 /* print some info */ 9588 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9589 IS vertices; 9590 PetscInt nv, nedges, nfaces; 9591 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 9592 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9593 PetscCall(ISGetSize(vertices, &nv)); 9594 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9595 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 9596 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 9597 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 9598 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 9599 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9600 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9601 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9602 } 9603 9604 /* sub_schurs init */ 9605 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9606 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 9607 9608 /* free graph struct */ 9609 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 9610 PetscFunctionReturn(PETSC_SUCCESS); 9611 } 9612 9613 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer) 9614 { 9615 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 9616 PetscInt n = pc->pmat->rmap->n, ln, ni, st; 9617 const PetscInt *idxs; 9618 IS gis; 9619 9620 PetscFunctionBegin; 9621 if (!is) PetscFunctionReturn(PETSC_SUCCESS); 9622 PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL)); 9623 PetscCall(MatGetLocalSize(matis->A, NULL, &ln)); 9624 PetscCall(PetscArrayzero(matis->sf_leafdata, ln)); 9625 PetscCall(PetscArrayzero(matis->sf_rootdata, n)); 9626 PetscCall(ISGetLocalSize(is, &ni)); 9627 PetscCall(ISGetIndices(is, &idxs)); 9628 for (PetscInt i = 0; i < ni; i++) { 9629 if (idxs[i] < 0 || idxs[i] >= ln) continue; 9630 matis->sf_leafdata[idxs[i]] = 1; 9631 } 9632 PetscCall(ISRestoreIndices(is, &idxs)); 9633 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9634 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9635 ln = 0; 9636 for (PetscInt i = 0; i < n; i++) { 9637 if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st; 9638 } 9639 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis)); 9640 PetscCall(ISView(gis, viewer)); 9641 PetscCall(ISDestroy(&gis)); 9642 PetscFunctionReturn(PETSC_SUCCESS); 9643 } 9644 9645 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile) 9646 { 9647 PetscInt header[11]; 9648 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9649 PetscViewer viewer; 9650 MPI_Comm comm = PetscObjectComm((PetscObject)pc); 9651 9652 PetscFunctionBegin; 9653 PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer)); 9654 if (load) { 9655 IS is; 9656 Mat A; 9657 9658 PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT)); 9659 PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9660 PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9661 PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9662 PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9663 PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9664 PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9665 PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9666 PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9667 PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9668 PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9669 if (header[0]) { 9670 PetscCall(ISCreate(comm, &is)); 9671 PetscCall(ISLoad(is, viewer)); 9672 PetscCall(PCBDDCSetDirichletBoundaries(pc, is)); 9673 PetscCall(ISDestroy(&is)); 9674 } 9675 if (header[1]) { 9676 PetscCall(ISCreate(comm, &is)); 9677 PetscCall(ISLoad(is, viewer)); 9678 PetscCall(PCBDDCSetNeumannBoundaries(pc, is)); 9679 PetscCall(ISDestroy(&is)); 9680 } 9681 if (header[2]) { 9682 IS *isarray; 9683 9684 PetscCall(PetscMalloc1(header[2], &isarray)); 9685 for (PetscInt i = 0; i < header[2]; i++) { 9686 PetscCall(ISCreate(comm, &isarray[i])); 9687 PetscCall(ISLoad(isarray[i], viewer)); 9688 } 9689 PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray)); 9690 for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i])); 9691 PetscCall(PetscFree(isarray)); 9692 } 9693 if (header[3]) { 9694 PetscCall(ISCreate(comm, &is)); 9695 PetscCall(ISLoad(is, viewer)); 9696 PetscCall(PCBDDCSetPrimalVerticesIS(pc, is)); 9697 PetscCall(ISDestroy(&is)); 9698 } 9699 if (header[4]) { 9700 PetscCall(MatCreate(comm, &A)); 9701 PetscCall(MatSetType(A, MATAIJ)); 9702 PetscCall(MatLoad(A, viewer)); 9703 PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8])); 9704 PetscCall(MatDestroy(&A)); 9705 } 9706 if (header[9]) { 9707 PetscCall(MatCreate(comm, &A)); 9708 PetscCall(MatSetType(A, MATIS)); 9709 PetscCall(MatLoad(A, viewer)); 9710 PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL)); 9711 PetscCall(MatDestroy(&A)); 9712 } 9713 } else { 9714 header[0] = (PetscInt)!!pcbddc->DirichletBoundariesLocal; 9715 header[1] = (PetscInt)!!pcbddc->NeumannBoundariesLocal; 9716 header[2] = pcbddc->n_ISForDofsLocal; 9717 header[3] = (PetscInt)!!pcbddc->user_primal_vertices_local; 9718 header[4] = (PetscInt)!!pcbddc->discretegradient; 9719 header[5] = pcbddc->nedorder; 9720 header[6] = pcbddc->nedfield; 9721 header[7] = (PetscInt)pcbddc->nedglobal; 9722 header[8] = (PetscInt)pcbddc->conforming; 9723 header[9] = (PetscInt)!!pcbddc->divudotp; 9724 header[10] = (PetscInt)pcbddc->divudotp_trans; 9725 if (header[4]) header[3] = 0; 9726 9727 PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT)); 9728 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer)); 9729 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer)); 9730 for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer)); 9731 if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer)); 9732 if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer)); 9733 if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer)); 9734 } 9735 PetscCall(PetscViewerDestroy(&viewer)); 9736 PetscFunctionReturn(PETSC_SUCCESS); 9737 } 9738 9739 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9740 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9741 { 9742 Mat At; 9743 IS rows; 9744 PetscInt rst, ren; 9745 PetscLayout rmap; 9746 9747 PetscFunctionBegin; 9748 rst = ren = 0; 9749 if (ccomm != MPI_COMM_NULL) { 9750 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9751 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9752 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9753 PetscCall(PetscLayoutSetUp(rmap)); 9754 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9755 } 9756 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9757 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9758 PetscCall(ISDestroy(&rows)); 9759 9760 if (ccomm != MPI_COMM_NULL) { 9761 Mat_MPIAIJ *a, *b; 9762 IS from, to; 9763 Vec gvec; 9764 PetscInt lsize; 9765 9766 PetscCall(MatCreate(ccomm, B)); 9767 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9768 PetscCall(MatSetType(*B, MATAIJ)); 9769 PetscCall(PetscLayoutDestroy(&(*B)->rmap)); 9770 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9771 a = (Mat_MPIAIJ *)At->data; 9772 b = (Mat_MPIAIJ *)(*B)->data; 9773 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9774 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9775 PetscCall(PetscObjectReference((PetscObject)a->A)); 9776 PetscCall(PetscObjectReference((PetscObject)a->B)); 9777 b->A = a->A; 9778 b->B = a->B; 9779 9780 b->donotstash = a->donotstash; 9781 b->roworiented = a->roworiented; 9782 b->rowindices = NULL; 9783 b->rowvalues = NULL; 9784 b->getrowactive = PETSC_FALSE; 9785 9786 (*B)->rmap = rmap; 9787 (*B)->factortype = A->factortype; 9788 (*B)->assembled = PETSC_TRUE; 9789 (*B)->insertmode = NOT_SET_VALUES; 9790 (*B)->preallocated = PETSC_TRUE; 9791 9792 if (a->colmap) { 9793 #if defined(PETSC_USE_CTABLE) 9794 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9795 #else 9796 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9797 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9798 #endif 9799 } else b->colmap = NULL; 9800 if (a->garray) { 9801 PetscInt len; 9802 len = a->B->cmap->n; 9803 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9804 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9805 } else b->garray = NULL; 9806 9807 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9808 b->lvec = a->lvec; 9809 9810 /* cannot use VecScatterCopy */ 9811 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9812 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9813 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9814 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9815 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9816 PetscCall(ISDestroy(&from)); 9817 PetscCall(ISDestroy(&to)); 9818 PetscCall(VecDestroy(&gvec)); 9819 } 9820 PetscCall(MatDestroy(&At)); 9821 PetscFunctionReturn(PETSC_SUCCESS); 9822 } 9823 9824 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */ 9825 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA) 9826 { 9827 PetscBool isaij; 9828 MPI_Comm comm; 9829 9830 PetscFunctionBegin; 9831 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9832 PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, "")); 9833 PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented"); 9834 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij)); 9835 if (isaij) { /* SeqAIJ supports repeated rows */ 9836 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA)); 9837 } else { 9838 Mat A_loc; 9839 Mat_SeqAIJ *da; 9840 PetscSF sf; 9841 PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata; 9842 PetscScalar *daa; 9843 const PetscInt *idxs; 9844 const PetscSFNode *iremotes; 9845 PetscSFNode *remotes; 9846 9847 /* SF for incoming rows */ 9848 PetscCall(PetscSFCreate(comm, &sf)); 9849 PetscCall(ISGetLocalSize(rows, &ni)); 9850 PetscCall(ISGetIndices(rows, &idxs)); 9851 PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs)); 9852 PetscCall(ISRestoreIndices(rows, &idxs)); 9853 9854 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc)); 9855 da = (Mat_SeqAIJ *)A_loc->data; 9856 PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata)); 9857 for (PetscInt i = 0; i < m; i++) { 9858 rdata[2 * i + 0] = da->i[i + 1] - da->i[i]; 9859 rdata[2 * i + 1] = da->i[i]; 9860 } 9861 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9862 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9863 PetscCall(PetscMalloc1(ni + 1, &di)); 9864 di[0] = 0; 9865 for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0]; 9866 PetscCall(PetscMalloc1(di[ni], &dj)); 9867 PetscCall(PetscMalloc1(di[ni], &daa)); 9868 PetscCall(PetscMalloc1(di[ni], &remotes)); 9869 9870 PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes)); 9871 9872 /* SF graph for nonzeros */ 9873 c = 0; 9874 for (PetscInt i = 0; i < ni; i++) { 9875 const PetscInt rank = iremotes[i].rank; 9876 const PetscInt rsize = ldata[2 * i]; 9877 for (PetscInt j = 0; j < rsize; j++) { 9878 remotes[c].rank = rank; 9879 remotes[c].index = ldata[2 * i + 1] + j; 9880 c++; 9881 } 9882 } 9883 PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]); 9884 PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER)); 9885 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9886 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9887 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9888 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9889 9890 PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA)); 9891 PetscCall(MatDestroy(&A_loc)); 9892 PetscCall(PetscSFDestroy(&sf)); 9893 PetscCall(PetscFree(di)); 9894 PetscCall(PetscFree(dj)); 9895 PetscCall(PetscFree(daa)); 9896 PetscCall(PetscFree(remotes)); 9897 PetscCall(PetscFree2(ldata, rdata)); 9898 } 9899 PetscFunctionReturn(PETSC_SUCCESS); 9900 } 9901