1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <../src/mat/impls/dense/seq/dense.h> 6 #include <petscdmplex.h> 7 #include <petscblaslapack.h> 8 #include <petsc/private/sfimpl.h> 9 #include <petsc/private/dmpleximpl.h> 10 #include <petscdmda.h> 11 12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 13 14 /* if range is true, it returns B s.t. span{B} = range(A) 15 if range is false, it returns B s.t. range(B) _|_ range(A) */ 16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 17 { 18 PetscScalar *uwork, *data, *U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM, bN, lwork, lierr, di = 1; 21 PetscInt ulw, i, nr, nc, n; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &nr, &nc)); 28 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 33 PetscCall(PetscMalloc1(ulw, &uwork)); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr, nc); 39 if (!rwork) { 40 PetscCall(PetscMalloc1(n, &sing)); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 PetscCall(PetscMalloc1(nr * nr, &U)); 47 PetscCall(PetscBLASIntCast(nr, &bM)); 48 PetscCall(PetscBLASIntCast(nc, &bN)); 49 PetscCall(PetscBLASIntCast(ulw, &lwork)); 50 PetscCall(MatDenseGetArray(A, &data)); 51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 54 #else 55 PetscCall(PetscMalloc1(5 * n, &rwork2)); 56 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 57 PetscCall(PetscFree(rwork2)); 58 #endif 59 PetscCall(PetscFPTrapPop()); 60 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 61 PetscCall(MatDenseRestoreArray(A, &data)); 62 for (i = 0; i < n; i++) 63 if (sing[i] < PETSC_SMALL) break; 64 if (!rwork) PetscCall(PetscFree(sing)); 65 if (!work) PetscCall(PetscFree(uwork)); 66 /* create B */ 67 if (!range) { 68 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 69 PetscCall(MatDenseGetArray(*B, &data)); 70 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 71 } else { 72 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 73 PetscCall(MatDenseGetArray(*B, &data)); 74 PetscCall(PetscArraycpy(data, U, i * nr)); 75 } 76 PetscCall(MatDenseRestoreArray(*B, &data)); 77 PetscCall(PetscFree(U)); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 Mat GE, GEd; 90 PetscInt rsize, csize, esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 PetscCall(ISGetSize(edge, &esize)); 95 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 96 PetscCall(ISGetSize(extrow, &rsize)); 97 PetscCall(ISGetSize(extcol, &csize)); 98 99 /* gradients */ 100 ptr = work + 5 * esize; 101 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 102 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 103 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 104 PetscCall(MatDestroy(&GE)); 105 106 /* constants */ 107 ptr += rsize * csize; 108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 109 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 110 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 111 PetscCall(MatDestroy(&GE)); 112 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 113 PetscCall(MatDestroy(&GEd)); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 121 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 122 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 123 /* v = PetscAbsScalar(vals[0]); */ 124 v = 1.; 125 cvals[0] = vals[0] / v; 126 cvals[1] = vals[1] / v; 127 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 128 PetscCall(MatScale(*GKins, 1. / v)); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 134 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 135 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 136 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 137 PetscCall(MatView(GEc, viewer)); 138 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK")); 139 PetscCall(MatView(*GKins, viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 141 PetscCall(MatView(GEd, viewer)); 142 PetscCall(PetscViewerDestroy(&viewer)); 143 } 144 #endif 145 PetscCall(MatDestroy(&GEd)); 146 PetscCall(MatDestroy(&GEc)); 147 } 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *); 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 156 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 157 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 160 MPI_Comm comm; 161 IS lned, primals, allprimals, nedfieldlocal, elements_corners = NULL; 162 IS *eedges, *extrows, *extcols, *alleedges; 163 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 164 PetscScalar *vals, *work; 165 PetscReal *rwork; 166 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 167 PetscInt ne, nv, Lv, order, n, field; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 187 /* Command line customization */ 188 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 189 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 190 /* print debug info and adaptive order TODO: to be removed */ 191 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 192 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 193 PetscOptionsEnd(); 194 195 /* Return if there are no edges in the decomposition */ 196 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 197 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 198 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 199 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 200 lrc[0] = PETSC_FALSE; 201 for (i = 0; i < n; i++) { 202 if (PetscRealPart(vals[i]) > 2.) { 203 lrc[0] = PETSC_TRUE; 204 break; 205 } 206 } 207 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 208 PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPI_C_BOOL, MPI_LOR, comm)); 209 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 210 211 /* Get Nedelec field */ 212 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 213 if (pcbddc->n_ISForDofsLocal && field >= 0) { 214 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 215 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 216 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 217 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 218 ne = n; 219 nedfieldlocal = NULL; 220 global = PETSC_TRUE; 221 } else if (field == PETSC_DECIDE) { 222 PetscInt rst, ren, *idx; 223 224 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 225 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 226 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 227 for (i = rst; i < ren; i++) { 228 PetscInt nc; 229 230 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 231 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 232 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 233 } 234 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 235 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 236 PetscCall(PetscMalloc1(n, &idx)); 237 for (i = 0, ne = 0; i < n; i++) 238 if (matis->sf_leafdata[i]) idx[ne++] = i; 239 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 240 } else { 241 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 242 } 243 244 /* Sanity checks */ 245 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 246 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 247 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 248 249 /* Just set primal dofs and return */ 250 if (setprimal) { 251 IS enedfieldlocal; 252 PetscInt *eidxs; 253 254 PetscCall(PetscMalloc1(ne, &eidxs)); 255 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 256 if (nedfieldlocal) { 257 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 258 for (i = 0, cum = 0; i < ne; i++) { 259 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 260 } 261 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 262 } else { 263 for (i = 0, cum = 0; i < ne; i++) { 264 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 265 } 266 } 267 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 268 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 269 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 270 PetscCall(PetscFree(eidxs)); 271 PetscCall(ISDestroy(&nedfieldlocal)); 272 PetscCall(ISDestroy(&enedfieldlocal)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 /* Compute some l2g maps */ 277 if (nedfieldlocal) { 278 IS is; 279 280 /* need to map from the local Nedelec field to local numbering */ 281 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 282 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 283 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 284 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 285 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 286 if (global) { 287 PetscCall(PetscObjectReference((PetscObject)al2g)); 288 el2g = al2g; 289 } else { 290 IS gis; 291 292 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 293 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 294 PetscCall(ISDestroy(&gis)); 295 } 296 PetscCall(ISDestroy(&is)); 297 } else { 298 /* one ref for the destruction of al2g, one for el2g */ 299 PetscCall(PetscObjectReference((PetscObject)al2g)); 300 PetscCall(PetscObjectReference((PetscObject)al2g)); 301 el2g = al2g; 302 fl2g = NULL; 303 } 304 305 /* Start communication to drop connections for interior edges (for cc analysis only) */ 306 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 307 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 308 if (nedfieldlocal) { 309 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 310 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 311 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 312 } else { 313 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 314 } 315 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 316 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 317 318 /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting 319 Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */ 320 if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners)); 321 322 /* drop connections with interior edges to avoid unneeded communications and memory movements */ 323 PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view")); 324 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 325 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 326 if (global) { 327 PetscInt rst; 328 329 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 330 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 331 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 332 } 333 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 334 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 335 } else { 336 PetscInt *tbz; 337 338 PetscCall(PetscMalloc1(ne, &tbz)); 339 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 340 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 342 for (i = 0, cum = 0; i < ne; i++) 343 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 344 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 345 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 346 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 347 PetscCall(PetscFree(tbz)); 348 } 349 350 /* Extract subdomain relevant rows of G */ 351 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 352 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 353 PetscCall(MatAIJExtractRows(G, lned, &lGall)); 354 /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */ 355 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 356 PetscCall(ISDestroy(&lned)); 357 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 358 PetscCall(MatDestroy(&lGall)); 359 PetscCall(MatISGetLocalMat(lGis, &lG)); 360 if (matis->allow_repeated) { /* multi-element support */ 361 Mat *lGn, B; 362 IS *is_rows, *tcols, tmap, nmap; 363 PetscInt subnv; 364 const PetscInt *subvidxs; 365 ISLocalToGlobalMapping mapn; 366 367 PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn)); 368 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows)); 369 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols)); 370 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 371 if (fl2g) { 372 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i])); 373 } else { 374 PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i])); 375 is_rows[i] = pcbddc->local_subs[i]; 376 } 377 PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)])); 378 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn)); 379 PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs)); 381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i])); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs)); 383 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 384 } 385 386 /* Create new MATIS with repeated vertices */ 387 PetscCall(MatCreate(comm, &B)); 388 PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N)); 389 PetscCall(MatSetType(B, MATIS)); 390 PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE)); 391 PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap)); 392 PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap)); 393 PetscCall(ISDestroy(&tmap)); 394 PetscCall(ISGetLocalSize(nmap, &subnv)); 395 PetscCall(ISGetIndices(nmap, &subvidxs)); 396 PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap)); 397 PetscCall(ISRestoreIndices(nmap, &subvidxs)); 398 PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn)); 399 PetscCall(ISDestroy(&tmap)); 400 PetscCall(ISDestroy(&nmap)); 401 PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn)); 402 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 403 PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG)); 404 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 405 PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)])); 406 PetscCall(ISDestroy(&is_rows[i])); 407 PetscCall(ISDestroy(&tcols[i])); 408 } 409 PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG)); 410 PetscCall(PetscFree(lGn)); 411 PetscCall(PetscFree(is_rows)); 412 PetscCall(PetscFree(tcols)); 413 PetscCall(MatISSetLocalMat(B, lG)); 414 PetscCall(MatDestroy(&lG)); 415 416 PetscCall(MatDestroy(&lGis)); 417 lGis = B; 418 419 lGis->assembled = PETSC_TRUE; 420 } 421 PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view")); 422 423 /* SF for nodal dofs communications */ 424 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 425 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 426 PetscCall(PetscObjectReference((PetscObject)vl2g)); 427 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 428 PetscCall(PetscSFCreate(comm, &sfv)); 429 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 430 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 431 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 432 433 if (elements_corners) { 434 IS tmp; 435 Vec global, local; 436 Mat_IS *tGis = (Mat_IS *)lGis->data; 437 438 PetscCall(MatCreateVecs(lGis, &global, NULL)); 439 PetscCall(MatCreateVecs(tGis->A, &local, NULL)); 440 PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp)); 441 PetscCall(VecDestroy(&global)); 442 PetscCall(VecDestroy(&local)); 443 elements_corners = tmp; 444 } 445 446 /* Destroy temporary G */ 447 PetscCall(MatISGetLocalMat(lGis, &lG)); 448 PetscCall(PetscObjectReference((PetscObject)lG)); 449 PetscCall(MatDestroy(&G)); 450 PetscCall(MatDestroy(&lGis)); 451 452 if (print) { 453 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 454 PetscCall(MatView(lG, NULL)); 455 } 456 457 /* Save lG for values insertion in change of basis */ 458 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 459 460 /* Analyze the edge-nodes connections (duplicate lG) */ 461 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 462 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 463 PetscCall(PetscBTCreate(nv, &btv)); 464 PetscCall(PetscBTCreate(ne, &bte)); 465 PetscCall(PetscBTCreate(ne, &btb)); 466 PetscCall(PetscBTCreate(ne, &btbd)); 467 /* need to import the boundary specification to ensure the 468 proper detection of coarse edges' endpoints */ 469 if (pcbddc->DirichletBoundariesLocal) { 470 IS is; 471 472 if (fl2g) { 473 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 474 } else { 475 is = pcbddc->DirichletBoundariesLocal; 476 } 477 PetscCall(ISGetLocalSize(is, &cum)); 478 PetscCall(ISGetIndices(is, &idxs)); 479 for (i = 0; i < cum; i++) { 480 if (idxs[i] >= 0 && idxs[i] < ne) { 481 PetscCall(PetscBTSet(btb, idxs[i])); 482 PetscCall(PetscBTSet(btbd, idxs[i])); 483 } 484 } 485 PetscCall(ISRestoreIndices(is, &idxs)); 486 if (fl2g) PetscCall(ISDestroy(&is)); 487 } 488 if (pcbddc->NeumannBoundariesLocal) { 489 IS is; 490 491 if (fl2g) { 492 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 493 } else { 494 is = pcbddc->NeumannBoundariesLocal; 495 } 496 PetscCall(ISGetLocalSize(is, &cum)); 497 PetscCall(ISGetIndices(is, &idxs)); 498 for (i = 0; i < cum; i++) { 499 if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i])); 500 } 501 PetscCall(ISRestoreIndices(is, &idxs)); 502 if (fl2g) PetscCall(ISDestroy(&is)); 503 } 504 505 /* Count neighs per dof */ 506 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL)); 507 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL)); 508 509 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 510 for proper detection of coarse edges' endpoints */ 511 PetscCall(PetscBTCreate(ne, &btee)); 512 for (i = 0; i < ne; i++) { 513 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 514 } 515 PetscCall(PetscMalloc1(ne, &marks)); 516 if (!conforming) { 517 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 518 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 519 } 520 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 521 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 522 cum = 0; 523 for (i = 0; i < ne; i++) { 524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 525 if (!PetscBTLookup(btee, i)) { 526 marks[cum++] = i; 527 continue; 528 } 529 /* set badly connected edge dofs as primal */ 530 if (!conforming) { 531 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 532 marks[cum++] = i; 533 PetscCall(PetscBTSet(bte, i)); 534 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 535 } else { 536 /* every edge dofs should be connected through a certain number of nodal dofs 537 to other edge dofs belonging to coarse edges 538 - at most 2 endpoints 539 - order-1 interior nodal dofs 540 - no undefined nodal dofs (nconn < order) 541 */ 542 PetscInt ends = 0, ints = 0, undef = 0; 543 for (j = ii[i]; j < ii[i + 1]; j++) { 544 PetscInt v = jj[j], k; 545 PetscInt nconn = iit[v + 1] - iit[v]; 546 for (k = iit[v]; k < iit[v + 1]; k++) 547 if (!PetscBTLookup(btee, jjt[k])) nconn--; 548 if (nconn > order) ends++; 549 else if (nconn == order) ints++; 550 else undef++; 551 } 552 if (undef || ends > 2 || ints != order - 1) { 553 marks[cum++] = i; 554 PetscCall(PetscBTSet(bte, i)); 555 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i + 1] != ii[i]) { 561 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 562 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 563 } 564 } 565 PetscCall(PetscBTDestroy(&btee)); 566 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 567 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 568 if (!conforming) { 569 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 570 PetscCall(MatDestroy(&lGt)); 571 } 572 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 573 574 /* identify splitpoints and corner candidates */ 575 PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots)); 576 PetscCall(PetscBTCreate(nv, &btvcand)); 577 if (elements_corners) { 578 PetscCall(ISGetLocalSize(elements_corners, &cum)); 579 PetscCall(ISGetIndices(elements_corners, &idxs)); 580 for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i])); 581 PetscCall(ISRestoreIndices(elements_corners, &idxs)); 582 } 583 584 if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */ 585 PetscSF emlsf, vmlsf; 586 PetscInt *eleaves, *vleaves, *meleaves, *mvleaves; 587 PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl; 588 589 PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs)); 590 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 591 PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs)); 592 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 593 594 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf)); 595 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf)); 596 597 PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL)); 598 for (i = 0, j = 0; i < ne; i++) j += ecount[i]; 599 PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne); 600 PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j); 601 602 PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL)); 603 for (i = 0, j = 0; i < nv; i++) j += vcount[i]; 604 PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv); 605 PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j); 606 607 PetscCall(PetscMalloc1(ne, &eleaves)); 608 PetscCall(PetscMalloc1(nv, &vleaves)); 609 for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX; 610 for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX; 611 PetscCall(PetscMalloc1(emnl, &meleaves)); 612 PetscCall(PetscMalloc1(vmnl, &mvleaves)); 613 614 PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm)); 615 PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 616 for (i = 0; i < n_subs; i++) { 617 const PetscInt *idxs; 618 const PetscInt subid = cum_subs + i; 619 PetscInt ns; 620 621 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns)); 622 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 623 for (j = 0; j < ns; j++) { 624 const PetscInt e = idxs[j]; 625 626 eleaves[e] = subid; 627 for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid; 628 } 629 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 630 } 631 PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 632 PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 633 PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 634 PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 635 PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 636 PetscCall(PetscFree(eleaves)); 637 PetscCall(PetscFree(vleaves)); 638 639 PetscCall(PetscMalloc1(ne + 1, &eneighs)); 640 eneighs[0] = meleaves; 641 for (i = 0; i < ne; i++) { 642 PetscCall(PetscSortInt(ecount[i], eneighs[i])); 643 eneighs[i + 1] = eneighs[i] + ecount[i]; 644 } 645 PetscCall(PetscMalloc1(nv + 1, &vneighs)); 646 vneighs[0] = mvleaves; 647 for (i = 0; i < nv; i++) { 648 PetscCall(PetscSortInt(vcount[i], vneighs[i])); 649 vneighs[i + 1] = vneighs[i] + vcount[i]; 650 } 651 } else { 652 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs)); 653 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs)); 654 } 655 656 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 657 if (print) { 658 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 659 PetscCall(MatView(lGe, NULL)); 660 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 661 PetscCall(MatView(lGt, NULL)); 662 } 663 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 664 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 665 for (i = 0; i < nv; i++) { 666 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 667 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 668 if (!order) { /* variable order */ 669 PetscReal vorder = 0.; 670 671 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 672 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 673 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 674 ord = 1; 675 } 676 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 677 const PetscInt e = jj[j]; 678 679 if (PetscBTLookup(btbd, e)) { 680 bdir = PETSC_TRUE; 681 break; 682 } 683 if (vc != ecount[e]) { 684 sneighs = PETSC_FALSE; 685 } else { 686 const PetscInt *vn = vneighs[i], *en = eneighs[e]; 687 688 for (PetscInt k = 0; k < vc; k++) { 689 if (vn[k] != en[k]) { 690 sneighs = PETSC_FALSE; 691 break; 692 } 693 } 694 } 695 } 696 if (elements_corners) test = 0; 697 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 698 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 699 PetscCall(PetscBTSet(btv, i)); 700 } else if (test == ord) { 701 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 702 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 703 PetscCall(PetscBTSet(btv, i)); 704 } else if (!elements_corners) { 705 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 706 PetscCall(PetscBTSet(btvcand, i)); 707 } 708 } 709 } 710 PetscCall(PetscBTDestroy(&btbd)); 711 712 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 713 if (order != 1) { 714 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 715 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 716 for (i = 0; i < nv; i++) { 717 if (PetscBTLookup(btvcand, i)) { 718 PetscBool found = PETSC_FALSE; 719 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 720 PetscInt k, e = jj[j]; 721 if (PetscBTLookup(bte, e)) continue; 722 for (k = iit[e]; k < iit[e + 1]; k++) { 723 PetscInt v = jjt[k]; 724 if (v != i && PetscBTLookup(btvcand, v)) { 725 found = PETSC_TRUE; 726 break; 727 } 728 } 729 } 730 if (!found) { 731 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 732 PetscCall(PetscBTClear(btvcand, i)); 733 } else { 734 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 735 } 736 } 737 } 738 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 739 } 740 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 741 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 742 PetscCall(MatDestroy(&lGe)); 743 744 /* Get the local G^T explicitly */ 745 PetscCall(MatDestroy(&lGt)); 746 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 747 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 748 749 /* Mark shared nodal dofs */ 750 PetscCall(PetscBTCreate(nv, &btvi)); 751 for (i = 0; i < nv; i++) { 752 if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i)); 753 } 754 755 if (matis->allow_repeated) { 756 PetscCall(PetscFree(eneighs[0])); 757 PetscCall(PetscFree(vneighs[0])); 758 PetscCall(PetscFree(eneighs)); 759 PetscCall(PetscFree(vneighs)); 760 } 761 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 762 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 763 764 /* communicate corners and splitpoints */ 765 PetscCall(PetscMalloc1(nv, &vmarks)); 766 PetscCall(PetscArrayzero(sfvleaves, nv)); 767 PetscCall(PetscArrayzero(sfvroots, Lv)); 768 for (i = 0; i < nv; i++) 769 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 770 771 if (print) { 772 IS tbz; 773 774 cum = 0; 775 for (i = 0; i < nv; i++) 776 if (sfvleaves[i]) vmarks[cum++] = i; 777 778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 779 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 780 PetscCall(ISView(tbz, NULL)); 781 PetscCall(ISDestroy(&tbz)); 782 } 783 784 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 785 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 786 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 787 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 788 789 /* Zero rows of lGt corresponding to identified corners 790 and interior nodal dofs */ 791 cum = 0; 792 for (i = 0; i < nv; i++) { 793 if (sfvleaves[i]) { 794 vmarks[cum++] = i; 795 PetscCall(PetscBTSet(btv, i)); 796 } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 797 } 798 PetscCall(PetscBTDestroy(&btvi)); 799 if (print) { 800 IS tbz; 801 802 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 803 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 804 PetscCall(ISView(tbz, NULL)); 805 PetscCall(ISDestroy(&tbz)); 806 } 807 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 808 PetscCall(PetscFree(vmarks)); 809 PetscCall(PetscSFDestroy(&sfv)); 810 PetscCall(PetscFree2(sfvleaves, sfvroots)); 811 812 /* Recompute G */ 813 PetscCall(MatDestroy(&lG)); 814 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 815 if (print) { 816 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 817 PetscCall(MatView(lG, NULL)); 818 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 819 PetscCall(MatView(lGt, NULL)); 820 } 821 822 /* Get primal dofs (if any) */ 823 cum = 0; 824 for (i = 0; i < ne; i++) { 825 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 826 } 827 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 828 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 829 if (print) { 830 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 831 PetscCall(ISView(primals, NULL)); 832 } 833 PetscCall(PetscBTDestroy(&bte)); 834 /* TODO: what if the user passed in some of them ? */ 835 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 836 PetscCall(ISDestroy(&primals)); 837 838 /* Compute edge connectivity */ 839 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 840 841 /* Symbolic conn = lG*lGt */ 842 if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */ 843 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 844 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 845 PetscCall(MatProductSetAlgorithm(conn, "default")); 846 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 847 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 848 PetscCall(MatProductSetFromOptions(conn)); 849 PetscCall(MatProductSymbolic(conn)); 850 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 851 if (fl2g) { 852 PetscBT btf; 853 PetscInt *iia, *jja, *iiu, *jju; 854 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 855 856 /* create CSR for all local dofs */ 857 PetscCall(PetscMalloc1(n + 1, &iia)); 858 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 859 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 860 iiu = pcbddc->mat_graph->xadj; 861 jju = pcbddc->mat_graph->adjncy; 862 } else if (pcbddc->use_local_adj) { 863 rest = PETSC_TRUE; 864 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 865 } else { 866 free = PETSC_TRUE; 867 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 868 iiu[0] = 0; 869 for (i = 0; i < n; i++) { 870 iiu[i + 1] = i + 1; 871 jju[i] = -1; 872 } 873 } 874 875 /* import sizes of CSR */ 876 iia[0] = 0; 877 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 878 879 /* overwrite entries corresponding to the Nedelec field */ 880 PetscCall(PetscBTCreate(n, &btf)); 881 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 882 for (i = 0; i < ne; i++) { 883 PetscCall(PetscBTSet(btf, idxs[i])); 884 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 885 } 886 887 /* iia in CSR */ 888 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 889 890 /* jja in CSR */ 891 PetscCall(PetscMalloc1(iia[n], &jja)); 892 for (i = 0; i < n; i++) 893 if (!PetscBTLookup(btf, i)) 894 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 895 896 /* map edge dofs connectivity */ 897 if (jj) { 898 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 899 for (i = 0; i < ne; i++) { 900 PetscInt e = idxs[i]; 901 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 902 } 903 } 904 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 905 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES)); 906 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 907 if (free) PetscCall(PetscFree2(iiu, jju)); 908 PetscCall(PetscBTDestroy(&btf)); 909 } else { 910 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES)); 911 } 912 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 913 PetscCall(MatDestroy(&conn)); 914 } 915 916 /* Analyze interface for edge dofs */ 917 PetscCall(PCBDDCAnalyzeInterface(pc)); 918 pcbddc->mat_graph->twodim = PETSC_FALSE; 919 920 /* Get coarse edges in the edge space */ 921 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 922 923 if (fl2g) { 924 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 925 PetscCall(PetscMalloc1(nee, &eedges)); 926 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 927 } else { 928 eedges = alleedges; 929 primals = allprimals; 930 } 931 932 /* Mark fine edge dofs with their coarse edge id */ 933 PetscCall(PetscArrayzero(marks, ne)); 934 PetscCall(ISGetLocalSize(primals, &cum)); 935 PetscCall(ISGetIndices(primals, &idxs)); 936 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 937 PetscCall(ISRestoreIndices(primals, &idxs)); 938 if (print) { 939 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 940 PetscCall(ISView(primals, NULL)); 941 } 942 943 maxsize = 0; 944 for (i = 0; i < nee; i++) { 945 PetscInt size, mark = i + 1; 946 947 PetscCall(ISGetLocalSize(eedges[i], &size)); 948 PetscCall(ISGetIndices(eedges[i], &idxs)); 949 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 950 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 951 maxsize = PetscMax(maxsize, size); 952 } 953 954 /* Find coarse edge endpoints */ 955 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 956 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 957 for (i = 0; i < nee; i++) { 958 PetscInt mark = i + 1, size; 959 960 PetscCall(ISGetLocalSize(eedges[i], &size)); 961 if (!size && nedfieldlocal) continue; 962 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 963 PetscCall(ISGetIndices(eedges[i], &idxs)); 964 if (print) { 965 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 966 PetscCall(ISView(eedges[i], NULL)); 967 } 968 for (j = 0; j < size; j++) { 969 PetscInt k, ee = idxs[j]; 970 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 971 for (k = ii[ee]; k < ii[ee + 1]; k++) { 972 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 973 if (PetscBTLookup(btv, jj[k])) { 974 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 975 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 976 PetscInt k2; 977 PetscBool corner = PETSC_FALSE; 978 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 979 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 980 /* it's a corner if either is connected with an edge dof belonging to a different cc or 981 if the edge dof lie on the natural part of the boundary */ 982 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 983 corner = PETSC_TRUE; 984 break; 985 } 986 } 987 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 988 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 989 PetscCall(PetscBTSet(btv, jj[k])); 990 } else { 991 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 992 } 993 } 994 } 995 } 996 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 997 } 998 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 999 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1000 PetscCall(PetscBTDestroy(&btb)); 1001 1002 /* Reset marked primal dofs */ 1003 PetscCall(ISGetLocalSize(primals, &cum)); 1004 PetscCall(ISGetIndices(primals, &idxs)); 1005 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 1006 PetscCall(ISRestoreIndices(primals, &idxs)); 1007 1008 /* Now use the initial lG */ 1009 PetscCall(MatDestroy(&lG)); 1010 PetscCall(MatDestroy(&lGt)); 1011 lG = lGinit; 1012 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 1013 1014 /* Compute extended cols indices */ 1015 PetscCall(PetscBTCreate(nv, &btvc)); 1016 PetscCall(PetscBTCreate(nee, &bter)); 1017 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1018 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 1019 i *= maxsize; 1020 PetscCall(PetscCalloc1(nee, &extcols)); 1021 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 1022 eerr = PETSC_FALSE; 1023 for (i = 0; i < nee; i++) { 1024 PetscInt size, found = 0; 1025 1026 cum = 0; 1027 PetscCall(ISGetLocalSize(eedges[i], &size)); 1028 if (!size && nedfieldlocal) continue; 1029 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1030 PetscCall(ISGetIndices(eedges[i], &idxs)); 1031 PetscCall(PetscBTMemzero(nv, btvc)); 1032 for (j = 0; j < size; j++) { 1033 PetscInt k, ee = idxs[j]; 1034 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1035 PetscInt vv = jj[k]; 1036 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 1037 else if (!PetscBTLookupSet(btvc, vv)) found++; 1038 } 1039 } 1040 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1041 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1042 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1043 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1044 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1045 /* it may happen that endpoints are not defined at this point 1046 if it is the case, mark this edge for a second pass */ 1047 if (cum != size - 1 || found != 2) { 1048 PetscCall(PetscBTSet(bter, i)); 1049 if (print) { 1050 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 1051 PetscCall(ISView(eedges[i], NULL)); 1052 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 1053 PetscCall(ISView(extcols[i], NULL)); 1054 } 1055 eerr = PETSC_TRUE; 1056 } 1057 } 1058 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 1059 PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPI_C_BOOL, MPI_LOR, comm)); 1060 if (done) { 1061 PetscInt *newprimals; 1062 1063 PetscCall(PetscMalloc1(ne, &newprimals)); 1064 PetscCall(ISGetLocalSize(primals, &cum)); 1065 PetscCall(ISGetIndices(primals, &idxs)); 1066 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 1067 PetscCall(ISRestoreIndices(primals, &idxs)); 1068 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1069 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 1070 for (i = 0; i < nee; i++) { 1071 PetscBool has_candidates = PETSC_FALSE; 1072 if (PetscBTLookup(bter, i)) { 1073 PetscInt size, mark = i + 1; 1074 1075 PetscCall(ISGetLocalSize(eedges[i], &size)); 1076 PetscCall(ISGetIndices(eedges[i], &idxs)); 1077 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1078 for (j = 0; j < size; j++) { 1079 PetscInt k, ee = idxs[j]; 1080 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 1081 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1082 /* set all candidates located on the edge as corners */ 1083 if (PetscBTLookup(btvcand, jj[k])) { 1084 PetscInt k2, vv = jj[k]; 1085 has_candidates = PETSC_TRUE; 1086 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 1087 PetscCall(PetscBTSet(btv, vv)); 1088 /* set all edge dofs connected to candidate as primals */ 1089 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 1090 if (marks[jjt[k2]] == mark) { 1091 PetscInt k3, ee2 = jjt[k2]; 1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 1093 newprimals[cum++] = ee2; 1094 /* finally set the new corners */ 1095 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 1096 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 1097 PetscCall(PetscBTSet(btv, jj[k3])); 1098 } 1099 } 1100 } 1101 } else { 1102 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 1103 } 1104 } 1105 } 1106 if (!has_candidates) { /* circular edge */ 1107 PetscInt k, ee = idxs[0], *tmarks; 1108 1109 PetscCall(PetscCalloc1(ne, &tmarks)); 1110 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 1111 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1112 PetscInt k2; 1113 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 1114 PetscCall(PetscBTSet(btv, jj[k])); 1115 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 1116 } 1117 for (j = 0; j < size; j++) { 1118 if (tmarks[idxs[j]] > 1) { 1119 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 1120 newprimals[cum++] = idxs[j]; 1121 } 1122 } 1123 PetscCall(PetscFree(tmarks)); 1124 } 1125 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1126 } 1127 PetscCall(ISDestroy(&extcols[i])); 1128 } 1129 PetscCall(PetscFree(extcols)); 1130 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1131 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 1132 if (fl2g) { 1133 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 1134 PetscCall(ISDestroy(&primals)); 1135 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1136 PetscCall(PetscFree(eedges)); 1137 } 1138 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1139 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 1140 PetscCall(PetscFree(newprimals)); 1141 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 1142 PetscCall(ISDestroy(&primals)); 1143 PetscCall(PCBDDCAnalyzeInterface(pc)); 1144 pcbddc->mat_graph->twodim = PETSC_FALSE; 1145 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1146 if (fl2g) { 1147 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 1148 PetscCall(PetscMalloc1(nee, &eedges)); 1149 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 1150 } else { 1151 eedges = alleedges; 1152 primals = allprimals; 1153 } 1154 PetscCall(PetscCalloc1(nee, &extcols)); 1155 1156 /* Mark again */ 1157 PetscCall(PetscArrayzero(marks, ne)); 1158 for (i = 0; i < nee; i++) { 1159 PetscInt size, mark = i + 1; 1160 1161 PetscCall(ISGetLocalSize(eedges[i], &size)); 1162 PetscCall(ISGetIndices(eedges[i], &idxs)); 1163 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1164 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1165 } 1166 if (print) { 1167 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1168 PetscCall(ISView(primals, NULL)); 1169 } 1170 1171 /* Recompute extended cols */ 1172 eerr = PETSC_FALSE; 1173 for (i = 0; i < nee; i++) { 1174 PetscInt size; 1175 1176 cum = 0; 1177 PetscCall(ISGetLocalSize(eedges[i], &size)); 1178 if (!size && nedfieldlocal) continue; 1179 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1180 PetscCall(ISGetIndices(eedges[i], &idxs)); 1181 for (j = 0; j < size; j++) { 1182 PetscInt k, ee = idxs[j]; 1183 for (k = ii[ee]; k < ii[ee + 1]; k++) 1184 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1185 } 1186 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1187 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1188 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1189 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1190 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1191 if (cum != size - 1) { 1192 if (print) { 1193 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1194 PetscCall(ISView(eedges[i], NULL)); 1195 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1196 PetscCall(ISView(extcols[i], NULL)); 1197 } 1198 eerr = PETSC_TRUE; 1199 } 1200 } 1201 } 1202 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1203 PetscCall(PetscFree2(extrow, gidxs)); 1204 PetscCall(PetscBTDestroy(&bter)); 1205 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1206 /* an error should not occur at this point */ 1207 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1208 1209 /* Check the number of endpoints */ 1210 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1211 PetscCall(PetscMalloc1(2 * nee, &corners)); 1212 PetscCall(PetscMalloc1(nee, &cedges)); 1213 for (i = 0; i < nee; i++) { 1214 PetscInt size, found = 0, gc[2]; 1215 1216 /* init with defaults */ 1217 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1218 PetscCall(ISGetLocalSize(eedges[i], &size)); 1219 if (!size && nedfieldlocal) continue; 1220 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1221 PetscCall(ISGetIndices(eedges[i], &idxs)); 1222 PetscCall(PetscBTMemzero(nv, btvc)); 1223 for (j = 0; j < size; j++) { 1224 PetscInt k, ee = idxs[j]; 1225 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1226 PetscInt vv = jj[k]; 1227 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1228 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1229 corners[i * 2 + found++] = vv; 1230 } 1231 } 1232 } 1233 if (found != 2) { 1234 PetscInt e; 1235 if (fl2g) { 1236 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1237 } else { 1238 e = idxs[0]; 1239 } 1240 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1241 } 1242 1243 /* get primal dof index on this coarse edge */ 1244 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1245 if (gc[0] > gc[1]) { 1246 PetscInt swap = corners[2 * i]; 1247 corners[2 * i] = corners[2 * i + 1]; 1248 corners[2 * i + 1] = swap; 1249 } 1250 cedges[i] = idxs[size - 1]; 1251 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1252 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1253 } 1254 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1255 PetscCall(PetscBTDestroy(&btvc)); 1256 1257 if (PetscDefined(USE_DEBUG)) { 1258 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1259 not interfere with neighbouring coarse edges */ 1260 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1261 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1262 for (i = 0; i < nv; i++) { 1263 PetscInt emax = 0, eemax = 0; 1264 1265 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1266 PetscCall(PetscArrayzero(emarks, nee + 1)); 1267 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1268 for (j = 1; j < nee + 1; j++) { 1269 if (emax < emarks[j]) { 1270 emax = emarks[j]; 1271 eemax = j; 1272 } 1273 } 1274 /* not relevant for edges */ 1275 if (!eemax) continue; 1276 1277 for (j = ii[i]; j < ii[i + 1]; j++) { 1278 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1279 } 1280 } 1281 PetscCall(PetscFree(emarks)); 1282 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1283 } 1284 1285 /* Compute extended rows indices for edge blocks of the change of basis */ 1286 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1287 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1288 extmem *= maxsize; 1289 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1290 PetscCall(PetscMalloc1(nee, &extrows)); 1291 PetscCall(PetscCalloc1(nee, &extrowcum)); 1292 for (i = 0; i < nv; i++) { 1293 PetscInt mark = 0, size, start; 1294 1295 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1296 for (j = ii[i]; j < ii[i + 1]; j++) 1297 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1298 1299 /* not relevant */ 1300 if (!mark) continue; 1301 1302 /* import extended row */ 1303 mark--; 1304 start = mark * extmem + extrowcum[mark]; 1305 size = ii[i + 1] - ii[i]; 1306 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1307 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1308 extrowcum[mark] += size; 1309 } 1310 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1311 PetscCall(MatDestroy(&lGt)); 1312 PetscCall(PetscFree(marks)); 1313 1314 /* Compress extrows */ 1315 cum = 0; 1316 for (i = 0; i < nee; i++) { 1317 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1318 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1320 cum = PetscMax(cum, size); 1321 } 1322 PetscCall(PetscFree(extrowcum)); 1323 PetscCall(PetscBTDestroy(&btv)); 1324 PetscCall(PetscBTDestroy(&btvcand)); 1325 1326 /* Workspace for lapack inner calls and VecSetValues */ 1327 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1328 1329 /* Create change of basis matrix (no preallocation) */ 1330 PetscCall(MatCreate(comm, &T)); 1331 PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap)); 1332 PetscCall(MatSetType(T, MATAIJ)); 1333 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1334 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1335 PetscCall(MatSetOption(T, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 1336 //PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL)); 1337 //PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL)); 1338 //PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1339 1340 /* Defaults to identity */ 1341 { 1342 Vec w; 1343 const PetscScalar *wa; 1344 1345 PetscCall(MatCreateVecs(T, &w, NULL)); 1346 PetscCall(VecSetLocalToGlobalMapping(w, al2g)); 1347 PetscCall(VecSet(w, 1.0)); 1348 for (i = 0; i < nee; i++) { 1349 const PetscInt *idxs; 1350 PetscInt nl; 1351 1352 PetscCall(ISGetLocalSize(eedges[i], &nl)); 1353 PetscCall(ISGetIndices(eedges[i], &idxs)); 1354 PetscCall(VecSetValuesLocal(w, nl, idxs, NULL, INSERT_VALUES)); 1355 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1356 } 1357 PetscCall(VecAssemblyBegin(w)); 1358 PetscCall(VecAssemblyEnd(w)); 1359 PetscCall(VecGetArrayRead(w, &wa)); 1360 for (i = T->rmap->rstart; i < T->rmap->rend; i++) 1361 if (PetscAbsScalar(wa[i - T->rmap->rstart])) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES)); 1362 PetscCall(VecRestoreArrayRead(w, &wa)); 1363 PetscCall(VecDestroy(&w)); 1364 } 1365 1366 /* Create discrete gradient for the coarser level if needed */ 1367 PetscCall(MatDestroy(&pcbddc->nedcG)); 1368 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1369 if (pcbddc->current_level < pcbddc->max_levels) { 1370 ISLocalToGlobalMapping cel2g, cvl2g; 1371 IS wis, gwis; 1372 PetscInt cnv, cne; 1373 1374 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1375 if (fl2g) { 1376 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1377 } else { 1378 PetscCall(PetscObjectReference((PetscObject)wis)); 1379 pcbddc->nedclocal = wis; 1380 } 1381 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1382 PetscCall(ISDestroy(&wis)); 1383 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1384 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1385 PetscCall(ISDestroy(&wis)); 1386 PetscCall(ISDestroy(&gwis)); 1387 1388 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1389 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1390 PetscCall(ISDestroy(&wis)); 1391 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1392 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1393 PetscCall(ISDestroy(&wis)); 1394 PetscCall(ISDestroy(&gwis)); 1395 1396 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1397 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1398 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1399 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1400 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1401 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1402 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1403 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1404 } 1405 1406 MatNullSpace nnsp; 1407 PetscBool nnsp_has_const = PETSC_FALSE; 1408 const Vec *nnsp_vecs = NULL; 1409 PetscInt nnsp_nvecs = 0; 1410 VecScatter nnsp_vscat = NULL; 1411 PetscCall(MatGetNullSpace(pcbddc->discretegradient, &nnsp)); 1412 if (nnsp) PetscCall(MatNullSpaceGetVecs(nnsp, &nnsp_has_const, &nnsp_nvecs, &nnsp_vecs)); 1413 if (nnsp_has_const || nnsp_nvecs) { /* create scatter to import edge constraints */ 1414 IS allextcols, gallextcols, galleedges, is_E_to_zero; 1415 Vec E, V; 1416 PetscInt *eedgesidxs; 1417 const PetscScalar *evals; 1418 1419 PetscCall(MatCreateVecs(pc->pmat, &E, NULL)); 1420 PetscCall(MatCreateVecs(pcbddc->discretegradient, &V, NULL)); 1421 PetscCall(ISConcatenate(PETSC_COMM_SELF, nee, extcols, &allextcols)); 1422 cum = 0; 1423 for (i = 0; i < nee; i++) { 1424 PetscInt j; 1425 1426 PetscCall(ISGetLocalSize(eedges[i], &j)); 1427 PetscCheck(j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Zero sized edge %" PetscInt_FMT, i); 1428 cum += j - 1; 1429 } 1430 PetscCall(PetscMalloc1(PetscMax(cum, pc->pmat->rmap->n), &eedgesidxs)); 1431 cum = 0; 1432 for (i = 0; i < nee; i++) { 1433 const PetscInt *idxs; 1434 PetscInt j; 1435 1436 PetscCall(ISGetLocalSize(eedges[i], &j)); 1437 PetscCall(ISGetIndices(eedges[i], &idxs)); 1438 PetscCall(PetscArraycpy(eedgesidxs + cum, idxs, j - 1)); /* last on the edge is primal */ 1439 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1440 cum += j - 1; 1441 } 1442 PetscCall(ISLocalToGlobalMappingApply(al2g, cum, eedgesidxs, eedgesidxs)); 1443 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_USE_POINTER, &galleedges)); 1444 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, allextcols, &gallextcols)); 1445 PetscCall(VecScatterCreate(V, gallextcols, E, galleedges, &nnsp_vscat)); 1446 PetscCall(ISDestroy(&allextcols)); 1447 PetscCall(ISDestroy(&gallextcols)); 1448 PetscCall(ISDestroy(&galleedges)); 1449 1450 /* identify dofs we must zero if importing user-defined near nullspace from pmat */ 1451 PetscCall(VecSet(E, 1.0)); 1452 PetscCall(VecSetValues(E, cum, eedgesidxs, NULL, INSERT_VALUES)); 1453 PetscCall(VecAssemblyBegin(E)); 1454 PetscCall(VecAssemblyEnd(E)); 1455 PetscCall(VecGetArrayRead(E, &evals)); 1456 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) 1457 if (evals[i] == 0.0) eedgesidxs[cum++] = i + pc->pmat->rmap->rstart; 1458 PetscCall(VecRestoreArrayRead(E, &evals)); 1459 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_COPY_VALUES, &is_E_to_zero)); 1460 PetscCall(PetscFree(eedgesidxs)); 1461 1462 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject)V)); 1463 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject)E)); 1464 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_zero", (PetscObject)is_E_to_zero)); 1465 PetscCall(ISDestroy(&is_E_to_zero)); 1466 PetscCall(VecDestroy(&V)); 1467 PetscCall(VecDestroy(&E)); 1468 } 1469 #if defined(PRINT_GDET) 1470 inc = 0; 1471 lev = pcbddc->current_level; 1472 #endif 1473 1474 /* Insert values in the change of basis matrix */ 1475 for (i = 0; i < nee; i++) { 1476 Mat Gins = NULL, GKins = NULL; 1477 IS cornersis = NULL; 1478 PetscScalar cvals[2]; 1479 1480 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1481 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1482 if (Gins && GKins) { 1483 const PetscScalar *data; 1484 const PetscInt *rows, *cols; 1485 PetscInt nrh, nch, nrc, ncc; 1486 1487 PetscCall(ISGetIndices(eedges[i], &cols)); 1488 /* H1 */ 1489 PetscCall(ISGetIndices(extrows[i], &rows)); 1490 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1491 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1492 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1493 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1494 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1495 /* complement */ 1496 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1497 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1498 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1499 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1500 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1501 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1502 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1503 1504 /* coarse discrete gradient */ 1505 if (pcbddc->nedcG) { 1506 PetscInt cols[2]; 1507 1508 cols[0] = 2 * i; 1509 cols[1] = 2 * i + 1; 1510 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1511 } 1512 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1513 } 1514 PetscCall(ISDestroy(&extrows[i])); 1515 PetscCall(ISDestroy(&extcols[i])); 1516 PetscCall(ISDestroy(&cornersis)); 1517 PetscCall(MatDestroy(&Gins)); 1518 PetscCall(MatDestroy(&GKins)); 1519 } 1520 1521 /* import edge constraints */ 1522 if (nnsp_vscat) { 1523 Vec V, E, *quadvecs; 1524 PetscInt nvecs, nvecs_orth; 1525 MatNullSpace onnsp = NULL; 1526 PetscBool onnsp_has_const = PETSC_FALSE; 1527 const Vec *onnsp_vecs = NULL; 1528 PetscInt onnsp_nvecs = 0, new_nnsp_nvecs, old_nnsp_nvecs; 1529 IS is_E_to_zero; 1530 1531 /* import nearnullspace from preconditioning matrix if user-defined */ 1532 PetscCall(MatGetNearNullSpace(pc->pmat, &onnsp)); 1533 if (onnsp) { 1534 PetscBool isinternal; 1535 1536 PetscCall(PetscStrcmp("_internal_BDDC_nedelec_nnsp", ((PetscObject)onnsp)->name, &isinternal)); 1537 if (!isinternal) PetscCall(MatNullSpaceGetVecs(onnsp, &onnsp_has_const, &onnsp_nvecs, &onnsp_vecs)); 1538 } 1539 new_nnsp_nvecs = nnsp_nvecs + (nnsp_has_const ? 1 : 0); 1540 old_nnsp_nvecs = onnsp_nvecs + (onnsp_has_const ? 1 : 0); 1541 nvecs = old_nnsp_nvecs + new_nnsp_nvecs; 1542 PetscCall(PetscMalloc1(nvecs, &quadvecs)); 1543 1544 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject *)&V)); 1545 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject *)&E)); 1546 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_zero", (PetscObject *)&is_E_to_zero)); 1547 for (i = 0; i < nvecs; i++) PetscCall(VecDuplicate(E, &quadvecs[i])); 1548 cum = 0; 1549 if (nnsp_has_const) { 1550 PetscCall(VecSet(V, 1.0)); 1551 PetscCall(VecScatterBegin(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD)); 1552 PetscCall(VecScatterEnd(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD)); 1553 cum = 1; 1554 } 1555 for (i = 0; i < nnsp_nvecs; i++) { 1556 PetscCall(VecScatterBegin(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD)); 1557 PetscCall(VecScatterEnd(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD)); 1558 } 1559 1560 /* Now add old nnsp if present */ 1561 cum = 0; 1562 if (onnsp_has_const) { 1563 PetscCall(VecSet(quadvecs[new_nnsp_nvecs], 1.0)); 1564 PetscCall(VecISSet(quadvecs[new_nnsp_nvecs], is_E_to_zero, 0)); 1565 cum = 1; 1566 } 1567 for (i = 0; i < onnsp_nvecs; i++) { 1568 PetscCall(VecCopy(onnsp_vecs[i], quadvecs[i + cum + new_nnsp_nvecs])); 1569 PetscCall(VecISSet(quadvecs[i + cum + new_nnsp_nvecs], is_E_to_zero, 0)); 1570 } 1571 nvecs_orth = nvecs; 1572 PetscCall(PCBDDCOrthonormalizeVecs(&nvecs_orth, quadvecs)); 1573 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, nvecs_orth, quadvecs, &nnsp)); 1574 for (i = 0; i < nvecs; i++) PetscCall(VecDestroy(&quadvecs[i])); 1575 PetscCall(PetscFree(quadvecs)); 1576 PetscCall(PetscObjectSetName((PetscObject)nnsp, "_internal_BDDC_nedelec_nnsp")); 1577 PetscCall(MatSetNearNullSpace(pc->pmat, nnsp)); 1578 PetscCall(MatNullSpaceDestroy(&nnsp)); 1579 } 1580 PetscCall(VecScatterDestroy(&nnsp_vscat)); 1581 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1582 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1583 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1584 1585 /* Start assembling */ 1586 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1587 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1588 1589 /* Free */ 1590 if (fl2g) { 1591 PetscCall(ISDestroy(&primals)); 1592 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1593 PetscCall(PetscFree(eedges)); 1594 } 1595 1596 /* hack mat_graph with primal dofs on the coarse edges */ 1597 { 1598 PCBDDCGraph graph = pcbddc->mat_graph; 1599 PetscInt *oqueue = graph->queue; 1600 PetscInt *ocptr = graph->cptr; 1601 PetscInt ncc, *idxs; 1602 1603 /* find first primal edge */ 1604 if (pcbddc->nedclocal) { 1605 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1606 } else { 1607 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1608 idxs = cedges; 1609 } 1610 cum = 0; 1611 while (cum < nee && cedges[cum] < 0) cum++; 1612 1613 /* adapt connected components */ 1614 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1615 graph->cptr[0] = 0; 1616 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1617 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1618 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1619 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1620 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1621 ncc++; 1622 lc--; 1623 cum++; 1624 while (cum < nee && cedges[cum] < 0) cum++; 1625 } 1626 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1627 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1628 ncc++; 1629 } 1630 graph->ncc = ncc; 1631 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1632 PetscCall(PetscFree2(ocptr, oqueue)); 1633 } 1634 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1635 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1636 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1637 1638 PetscCall(ISDestroy(&nedfieldlocal)); 1639 PetscCall(PetscFree(extrow)); 1640 PetscCall(PetscFree2(work, rwork)); 1641 PetscCall(PetscFree(corners)); 1642 PetscCall(PetscFree(cedges)); 1643 PetscCall(PetscFree(extrows)); 1644 PetscCall(PetscFree(extcols)); 1645 PetscCall(MatDestroy(&lG)); 1646 1647 /* Complete assembling */ 1648 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view")); 1650 if (pcbddc->nedcG) { 1651 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1652 PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view")); 1653 } 1654 1655 PetscCall(ISDestroy(&elements_corners)); 1656 1657 /* set change of basis */ 1658 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE)); 1659 PetscCall(MatDestroy(&T)); 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 /* the near-null space of BDDC carries information on quadrature weights, 1664 and these can be collinear -> so cheat with MatNullSpaceCreate 1665 and create a suitable set of basis vectors first */ 1666 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1667 { 1668 PetscInt i; 1669 1670 PetscFunctionBegin; 1671 for (i = 0; i < nvecs; i++) { 1672 PetscInt first, last; 1673 1674 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1675 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1676 if (i >= first && i < last) { 1677 PetscScalar *data; 1678 PetscCall(VecGetArray(quad_vecs[i], &data)); 1679 if (!has_const) { 1680 data[i - first] = 1.; 1681 } else { 1682 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1683 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1684 } 1685 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1686 } 1687 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1688 } 1689 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1690 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1691 PetscInt first, last; 1692 PetscCall(VecLockReadPop(quad_vecs[i])); 1693 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1694 if (i >= first && i < last) { 1695 PetscScalar *data; 1696 PetscCall(VecGetArray(quad_vecs[i], &data)); 1697 if (!has_const) { 1698 data[i - first] = 0.; 1699 } else { 1700 data[2 * i - first] = 0.; 1701 data[2 * i - first + 1] = 0.; 1702 } 1703 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1704 } 1705 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1706 PetscCall(VecLockReadPush(quad_vecs[i])); 1707 } 1708 PetscFunctionReturn(PETSC_SUCCESS); 1709 } 1710 1711 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1712 { 1713 Mat loc_divudotp; 1714 Vec p, v, quad_vec; 1715 ISLocalToGlobalMapping map; 1716 PetscScalar *array; 1717 1718 PetscFunctionBegin; 1719 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1720 if (!transpose) { 1721 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1722 } else { 1723 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1724 } 1725 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp)); 1726 PetscCall(VecLockReadPop(quad_vec)); 1727 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1728 1729 /* compute local quad vec */ 1730 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1731 if (!transpose) { 1732 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1733 } else { 1734 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1735 } 1736 /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */ 1737 PetscCall(VecSet(p, 1.)); 1738 if (!transpose) { 1739 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1740 } else { 1741 PetscCall(MatMult(loc_divudotp, p, v)); 1742 } 1743 PetscCall(VecDestroy(&p)); 1744 if (vl2l) { 1745 Mat lA; 1746 VecScatter sc; 1747 Vec vins; 1748 1749 PetscCall(MatISGetLocalMat(A, &lA)); 1750 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1751 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1752 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1753 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1754 PetscCall(VecScatterDestroy(&sc)); 1755 PetscCall(VecDestroy(&v)); 1756 v = vins; 1757 } 1758 1759 /* mask summation of interface values */ 1760 PetscInt n, *mmask, *mask, *idxs, nmr, nr; 1761 const PetscInt *degree; 1762 PetscSF msf; 1763 1764 PetscCall(VecGetLocalSize(v, &n)); 1765 PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL)); 1766 PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf)); 1767 PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL)); 1768 PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs)); 1769 PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree)); 1770 PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree)); 1771 for (PetscInt i = 0, c = 0; i < nr; i++) { 1772 mmask[c] = 1; 1773 c += degree[i]; 1774 } 1775 PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1776 PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1777 PetscCall(VecGetArray(v, &array)); 1778 for (PetscInt i = 0; i < n; i++) { 1779 array[i] *= mask[i]; 1780 idxs[i] = i; 1781 } 1782 PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES)); 1783 PetscCall(VecRestoreArray(v, &array)); 1784 PetscCall(PetscFree3(mmask, mask, idxs)); 1785 PetscCall(VecDestroy(&v)); 1786 PetscCall(VecAssemblyBegin(quad_vec)); 1787 PetscCall(VecAssemblyEnd(quad_vec)); 1788 PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view")); 1789 PetscCall(VecLockReadPush(quad_vec)); 1790 PetscCall(VecDestroy(&quad_vec)); 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1795 { 1796 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1797 1798 PetscFunctionBegin; 1799 if (primalv) { 1800 if (pcbddc->user_primal_vertices_local) { 1801 IS list[2], newp; 1802 1803 list[0] = primalv; 1804 list[1] = pcbddc->user_primal_vertices_local; 1805 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1806 PetscCall(ISSortRemoveDups(newp)); 1807 PetscCall(ISDestroy(&list[1])); 1808 pcbddc->user_primal_vertices_local = newp; 1809 } else { 1810 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1811 } 1812 } 1813 PetscFunctionReturn(PETSC_SUCCESS); 1814 } 1815 1816 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, PetscCtx ctx) 1817 { 1818 PetscInt f, *comp = (PetscInt *)ctx; 1819 1820 PetscFunctionBegin; 1821 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1822 PetscFunctionReturn(PETSC_SUCCESS); 1823 } 1824 1825 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1826 { 1827 Vec local, global; 1828 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1829 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1830 PetscBool monolithic = PETSC_FALSE; 1831 1832 PetscFunctionBegin; 1833 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1834 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1835 PetscOptionsEnd(); 1836 /* need to convert from global to local topology information and remove references to information in global ordering */ 1837 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1838 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1839 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1840 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1841 if (monolithic) { /* just get block size to properly compute vertices */ 1842 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1843 goto boundary; 1844 } 1845 1846 if (pcbddc->user_provided_isfordofs) { 1847 if (pcbddc->n_ISForDofs) { 1848 PetscInt i; 1849 1850 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1851 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1852 PetscInt bs; 1853 1854 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1855 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1856 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1857 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1858 } 1859 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1860 pcbddc->n_ISForDofs = 0; 1861 PetscCall(PetscFree(pcbddc->ISForDofs)); 1862 } 1863 } else { 1864 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1865 DM dm; 1866 1867 PetscCall(MatGetDM(pc->pmat, &dm)); 1868 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1869 if (dm) { 1870 IS *fields; 1871 PetscInt nf, i; 1872 1873 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1874 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1875 for (i = 0; i < nf; i++) { 1876 PetscInt bs; 1877 1878 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1879 PetscCall(ISGetBlockSize(fields[i], &bs)); 1880 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1881 PetscCall(ISDestroy(&fields[i])); 1882 } 1883 PetscCall(PetscFree(fields)); 1884 pcbddc->n_ISForDofsLocal = nf; 1885 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1886 PetscContainer c; 1887 1888 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1889 if (c) { 1890 MatISLocalFields lf; 1891 PetscCall(PetscContainerGetPointer(c, &lf)); 1892 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1893 } else { /* fallback, create the default fields if bs > 1 */ 1894 PetscInt i, n = matis->A->rmap->n; 1895 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1896 if (i > 1) { 1897 pcbddc->n_ISForDofsLocal = i; 1898 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1899 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1900 } 1901 } 1902 } 1903 } else { 1904 PetscInt i; 1905 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1906 } 1907 } 1908 1909 boundary: 1910 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1911 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1912 } else if (pcbddc->DirichletBoundariesLocal) { 1913 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1914 } 1915 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1916 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1917 } else if (pcbddc->NeumannBoundariesLocal) { 1918 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1919 } 1920 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1921 PetscCall(VecDestroy(&global)); 1922 PetscCall(VecDestroy(&local)); 1923 /* detect local disconnected subdomains if requested or needed */ 1924 if (pcbddc->detect_disconnected || matis->allow_repeated) { 1925 IS primalv = NULL; 1926 PetscInt nel; 1927 PetscBool filter = pcbddc->detect_disconnected_filter; 1928 1929 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1930 PetscCall(PetscFree(pcbddc->local_subs)); 1931 PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL)); 1932 if (matis->allow_repeated && nel) { 1933 const PetscInt *elsizes; 1934 1935 pcbddc->n_local_subs = nel; 1936 PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes)); 1937 PetscCall(PetscMalloc1(nel, &pcbddc->local_subs)); 1938 for (PetscInt i = 0, c = 0; i < nel; i++) { 1939 PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i])); 1940 c += elsizes[i]; 1941 } 1942 } else { 1943 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1944 } 1945 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1946 PetscCall(ISDestroy(&primalv)); 1947 } 1948 /* early stage corner detection */ 1949 { 1950 DM dm; 1951 1952 PetscCall(MatGetDM(pc->pmat, &dm)); 1953 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1954 if (dm) { 1955 PetscBool isda; 1956 1957 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1958 if (isda) { 1959 ISLocalToGlobalMapping l2l; 1960 IS corners; 1961 Mat lA; 1962 PetscBool gl, lo; 1963 1964 { 1965 Vec cvec; 1966 const PetscScalar *coords; 1967 PetscInt dof, n, cdim; 1968 PetscBool memc = PETSC_TRUE; 1969 1970 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1971 PetscCall(DMGetCoordinates(dm, &cvec)); 1972 PetscCall(VecGetLocalSize(cvec, &n)); 1973 PetscCall(VecGetBlockSize(cvec, &cdim)); 1974 n /= cdim; 1975 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1976 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1977 PetscCall(VecGetArrayRead(cvec, &coords)); 1978 #if defined(PETSC_USE_COMPLEX) 1979 memc = PETSC_FALSE; 1980 #endif 1981 if (dof != 1) memc = PETSC_FALSE; 1982 if (memc) { 1983 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1984 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1985 PetscReal *bcoords = pcbddc->mat_graph->coords; 1986 PetscInt i, b, d; 1987 1988 for (i = 0; i < n; i++) { 1989 for (b = 0; b < dof; b++) { 1990 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1991 } 1992 } 1993 } 1994 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1995 pcbddc->mat_graph->cdim = cdim; 1996 pcbddc->mat_graph->cnloc = dof * n; 1997 pcbddc->mat_graph->cloc = PETSC_FALSE; 1998 } 1999 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 2000 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 2001 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 2002 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 2003 lo = (PetscBool)(l2l && corners); 2004 PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2005 if (gl) { /* From PETSc's DMDA */ 2006 const PetscInt *idx; 2007 PetscInt dof, bs, *idxout, n; 2008 2009 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 2010 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 2011 PetscCall(ISGetLocalSize(corners, &n)); 2012 PetscCall(ISGetIndices(corners, &idx)); 2013 if (bs == dof) { 2014 PetscCall(PetscMalloc1(n, &idxout)); 2015 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 2016 } else { /* the original DMDA local-to-local map have been modified */ 2017 PetscInt i, d; 2018 2019 PetscCall(PetscMalloc1(dof * n, &idxout)); 2020 for (i = 0; i < n; i++) 2021 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 2022 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 2023 2024 bs = 1; 2025 n *= dof; 2026 } 2027 PetscCall(ISRestoreIndices(corners, &idx)); 2028 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 2029 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 2030 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 2031 PetscCall(ISDestroy(&corners)); 2032 pcbddc->corner_selected = PETSC_TRUE; 2033 pcbddc->corner_selection = PETSC_TRUE; 2034 } 2035 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 2036 } 2037 } 2038 } 2039 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 2040 DM dm; 2041 2042 PetscCall(MatGetDM(pc->pmat, &dm)); 2043 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2044 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 2045 Vec vcoords; 2046 PetscSection section; 2047 PetscReal *coords; 2048 PetscInt d, cdim, nl, nf, **ctxs; 2049 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 2050 /* debug coordinates */ 2051 PetscViewer viewer; 2052 PetscBool flg; 2053 PetscViewerFormat format; 2054 const char *prefix; 2055 2056 PetscCall(DMGetCoordinateDim(dm, &cdim)); 2057 PetscCall(DMGetLocalSection(dm, §ion)); 2058 PetscCall(PetscSectionGetNumFields(section, &nf)); 2059 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 2060 PetscCall(VecGetLocalSize(vcoords, &nl)); 2061 PetscCall(PetscMalloc1(nl * cdim, &coords)); 2062 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 2063 PetscCall(PetscMalloc1(nf, &ctxs[0])); 2064 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 2065 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 2066 2067 /* debug coordinates */ 2068 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 2069 PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 2070 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 2071 for (d = 0; d < cdim; d++) { 2072 PetscInt i; 2073 const PetscScalar *v; 2074 char name[16]; 2075 2076 for (i = 0; i < nf; i++) ctxs[i][0] = d; 2077 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d)); 2078 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 2079 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 2080 if (flg) PetscCall(VecView(vcoords, viewer)); 2081 PetscCall(VecGetArrayRead(vcoords, &v)); 2082 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 2083 PetscCall(VecRestoreArrayRead(vcoords, &v)); 2084 } 2085 PetscCall(VecDestroy(&vcoords)); 2086 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 2087 PetscCall(PetscFree(coords)); 2088 PetscCall(PetscFree(ctxs[0])); 2089 PetscCall(PetscFree2(funcs, ctxs)); 2090 if (flg) { 2091 PetscCall(PetscViewerPopFormat(viewer)); 2092 PetscCall(PetscViewerDestroy(&viewer)); 2093 } 2094 } 2095 } 2096 PetscFunctionReturn(PETSC_SUCCESS); 2097 } 2098 2099 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 2100 { 2101 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2102 IS nis; 2103 const PetscInt *idxs; 2104 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 2105 2106 PetscFunctionBegin; 2107 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 2108 if (mop == MPI_LAND) { 2109 /* init rootdata with true */ 2110 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 2111 } else { 2112 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 2113 } 2114 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 2115 PetscCall(ISGetLocalSize(*is, &nd)); 2116 PetscCall(ISGetIndices(*is, &idxs)); 2117 for (i = 0; i < nd; i++) 2118 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 2119 PetscCall(ISRestoreIndices(*is, &idxs)); 2120 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2121 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2122 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2123 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2124 if (mop == MPI_LAND) { 2125 PetscCall(PetscMalloc1(nd, &nidxs)); 2126 } else { 2127 PetscCall(PetscMalloc1(n, &nidxs)); 2128 } 2129 for (i = 0, nnd = 0; i < n; i++) 2130 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 2131 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 2132 PetscCall(ISDestroy(is)); 2133 *is = nis; 2134 PetscFunctionReturn(PETSC_SUCCESS); 2135 } 2136 2137 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 2138 { 2139 PC_IS *pcis = (PC_IS *)pc->data; 2140 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2141 2142 PetscFunctionBegin; 2143 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 2144 if (pcbddc->ChangeOfBasisMatrix) { 2145 Vec swap; 2146 2147 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 2148 swap = pcbddc->work_change; 2149 pcbddc->work_change = r; 2150 r = swap; 2151 } 2152 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2153 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2154 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2155 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 2156 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2157 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 2158 PetscCall(VecSet(z, 0.)); 2159 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2160 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2161 if (pcbddc->ChangeOfBasisMatrix) { 2162 pcbddc->work_change = r; 2163 PetscCall(VecCopy(z, pcbddc->work_change)); 2164 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 2165 } 2166 PetscFunctionReturn(PETSC_SUCCESS); 2167 } 2168 2169 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 2170 { 2171 PCBDDCBenignMatMult_ctx ctx; 2172 PetscBool apply_right, apply_left, reset_x; 2173 2174 PetscFunctionBegin; 2175 PetscCall(MatShellGetContext(A, &ctx)); 2176 if (transpose) { 2177 apply_right = ctx->apply_left; 2178 apply_left = ctx->apply_right; 2179 } else { 2180 apply_right = ctx->apply_right; 2181 apply_left = ctx->apply_left; 2182 } 2183 reset_x = PETSC_FALSE; 2184 if (apply_right) { 2185 const PetscScalar *ax; 2186 PetscInt nl, i; 2187 2188 PetscCall(VecGetLocalSize(x, &nl)); 2189 PetscCall(VecGetArrayRead(x, &ax)); 2190 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 2191 PetscCall(VecRestoreArrayRead(x, &ax)); 2192 for (i = 0; i < ctx->benign_n; i++) { 2193 PetscScalar sum, val; 2194 const PetscInt *idxs; 2195 PetscInt nz, j; 2196 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2197 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2198 sum = 0.; 2199 if (ctx->apply_p0) { 2200 val = ctx->work[idxs[nz - 1]]; 2201 for (j = 0; j < nz - 1; j++) { 2202 sum += ctx->work[idxs[j]]; 2203 ctx->work[idxs[j]] += val; 2204 } 2205 } else { 2206 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 2207 } 2208 ctx->work[idxs[nz - 1]] -= sum; 2209 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2210 } 2211 PetscCall(VecPlaceArray(x, ctx->work)); 2212 reset_x = PETSC_TRUE; 2213 } 2214 if (transpose) { 2215 PetscCall(MatMultTranspose(ctx->A, x, y)); 2216 } else { 2217 PetscCall(MatMult(ctx->A, x, y)); 2218 } 2219 if (reset_x) PetscCall(VecResetArray(x)); 2220 if (apply_left) { 2221 PetscScalar *ay; 2222 PetscInt i; 2223 2224 PetscCall(VecGetArray(y, &ay)); 2225 for (i = 0; i < ctx->benign_n; i++) { 2226 PetscScalar sum, val; 2227 const PetscInt *idxs; 2228 PetscInt nz, j; 2229 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2230 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2231 val = -ay[idxs[nz - 1]]; 2232 if (ctx->apply_p0) { 2233 sum = 0.; 2234 for (j = 0; j < nz - 1; j++) { 2235 sum += ay[idxs[j]]; 2236 ay[idxs[j]] += val; 2237 } 2238 ay[idxs[nz - 1]] += sum; 2239 } else { 2240 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 2241 ay[idxs[nz - 1]] = 0.; 2242 } 2243 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2244 } 2245 PetscCall(VecRestoreArray(y, &ay)); 2246 } 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } 2249 2250 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2251 { 2252 PetscFunctionBegin; 2253 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 2254 PetscFunctionReturn(PETSC_SUCCESS); 2255 } 2256 2257 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2258 { 2259 PetscFunctionBegin; 2260 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 2261 PetscFunctionReturn(PETSC_SUCCESS); 2262 } 2263 2264 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2265 { 2266 PC_IS *pcis = (PC_IS *)pc->data; 2267 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2268 PCBDDCBenignMatMult_ctx ctx; 2269 2270 PetscFunctionBegin; 2271 if (!restore) { 2272 Mat A_IB, A_BI; 2273 PetscScalar *work; 2274 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2275 2276 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 2277 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 2278 PetscCall(PetscMalloc1(pcis->n, &work)); 2279 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 2280 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 2281 PetscCall(MatSetType(A_IB, MATSHELL)); 2282 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (PetscErrorCodeFn *)PCBDDCBenignMatMult_Private)); 2283 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)PCBDDCBenignMatMultTranspose_Private)); 2284 PetscCall(PetscNew(&ctx)); 2285 PetscCall(MatShellSetContext(A_IB, ctx)); 2286 ctx->apply_left = PETSC_TRUE; 2287 ctx->apply_right = PETSC_FALSE; 2288 ctx->apply_p0 = PETSC_FALSE; 2289 ctx->benign_n = pcbddc->benign_n; 2290 if (reuse) { 2291 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2292 ctx->free = PETSC_FALSE; 2293 } else { /* TODO: could be optimized for successive solves */ 2294 ISLocalToGlobalMapping N_to_D; 2295 PetscInt i; 2296 2297 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 2298 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 2299 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 2300 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2301 ctx->free = PETSC_TRUE; 2302 } 2303 ctx->A = pcis->A_IB; 2304 ctx->work = work; 2305 PetscCall(MatSetUp(A_IB)); 2306 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2307 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2308 pcis->A_IB = A_IB; 2309 2310 /* A_BI as A_IB^T */ 2311 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2312 pcbddc->benign_original_mat = pcis->A_BI; 2313 pcis->A_BI = A_BI; 2314 } else { 2315 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2316 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2317 PetscCall(MatDestroy(&pcis->A_IB)); 2318 pcis->A_IB = ctx->A; 2319 ctx->A = NULL; 2320 PetscCall(MatDestroy(&pcis->A_BI)); 2321 pcis->A_BI = pcbddc->benign_original_mat; 2322 pcbddc->benign_original_mat = NULL; 2323 if (ctx->free) { 2324 PetscInt i; 2325 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2326 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2327 } 2328 PetscCall(PetscFree(ctx->work)); 2329 PetscCall(PetscFree(ctx)); 2330 } 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 /* used just in bddc debug mode */ 2335 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2336 { 2337 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2338 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2339 Mat An; 2340 2341 PetscFunctionBegin; 2342 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2343 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2344 if (is1) { 2345 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2346 PetscCall(MatDestroy(&An)); 2347 } else { 2348 *B = An; 2349 } 2350 PetscFunctionReturn(PETSC_SUCCESS); 2351 } 2352 2353 /* TODO: add reuse flag */ 2354 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2355 { 2356 Mat Bt; 2357 PetscScalar *a, *bdata; 2358 const PetscInt *ii, *ij; 2359 PetscInt m, n, i, nnz, *bii, *bij; 2360 PetscBool flg_row; 2361 2362 PetscFunctionBegin; 2363 PetscCall(MatGetSize(A, &n, &m)); 2364 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2365 PetscCall(MatSeqAIJGetArray(A, &a)); 2366 nnz = n; 2367 for (i = 0; i < ii[n]; i++) { 2368 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2369 } 2370 PetscCall(PetscMalloc1(n + 1, &bii)); 2371 PetscCall(PetscMalloc1(nnz, &bij)); 2372 PetscCall(PetscMalloc1(nnz, &bdata)); 2373 nnz = 0; 2374 bii[0] = 0; 2375 for (i = 0; i < n; i++) { 2376 PetscInt j; 2377 for (j = ii[i]; j < ii[i + 1]; j++) { 2378 PetscScalar entry = a[j]; 2379 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2380 bij[nnz] = ij[j]; 2381 bdata[nnz] = entry; 2382 nnz++; 2383 } 2384 } 2385 bii[i + 1] = nnz; 2386 } 2387 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2388 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2389 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2390 { 2391 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data; 2392 b->free_a = PETSC_TRUE; 2393 b->free_ij = PETSC_TRUE; 2394 } 2395 if (*B == A) PetscCall(MatDestroy(&A)); 2396 *B = Bt; 2397 PetscFunctionReturn(PETSC_SUCCESS); 2398 } 2399 2400 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2401 { 2402 Mat B = NULL; 2403 DM dm; 2404 IS is_dummy, *cc_n; 2405 ISLocalToGlobalMapping l2gmap_dummy; 2406 PCBDDCGraph graph; 2407 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2408 PetscInt i, n; 2409 PetscInt *xadj, *adjncy; 2410 PetscBool isplex = PETSC_FALSE; 2411 2412 PetscFunctionBegin; 2413 if (ncc) *ncc = 0; 2414 if (cc) *cc = NULL; 2415 if (primalv) *primalv = NULL; 2416 PetscCall(PCBDDCGraphCreate(&graph)); 2417 PetscCall(MatGetDM(pc->pmat, &dm)); 2418 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2419 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2420 if (filter) isplex = PETSC_FALSE; 2421 2422 if (isplex) { /* this code has been modified from plexpartition.c */ 2423 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2424 PetscInt *adj = NULL; 2425 IS cellNumbering; 2426 const PetscInt *cellNum; 2427 PetscBool useCone, useClosure; 2428 PetscSection section; 2429 PetscSegBuffer adjBuffer; 2430 PetscSF sfPoint; 2431 2432 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2433 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2434 PetscCall(DMGetPointSF(dm, &sfPoint)); 2435 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2436 /* Build adjacency graph via a section/segbuffer */ 2437 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2438 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2439 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2440 /* Always use FVM adjacency to create partitioner graph */ 2441 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2442 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2443 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2444 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2445 for (n = 0, p = pStart; p < pEnd; p++) { 2446 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2447 if (nroots > 0) { 2448 if (cellNum[p] < 0) continue; 2449 } 2450 adjSize = PETSC_DETERMINE; 2451 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2452 for (a = 0; a < adjSize; ++a) { 2453 const PetscInt point = adj[a]; 2454 if (pStart <= point && point < pEnd) { 2455 PetscInt *PETSC_RESTRICT pBuf; 2456 PetscCall(PetscSectionAddDof(section, p, 1)); 2457 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2458 *pBuf = point; 2459 } 2460 } 2461 n++; 2462 } 2463 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2464 /* Derive CSR graph from section/segbuffer */ 2465 PetscCall(PetscSectionSetUp(section)); 2466 PetscCall(PetscSectionGetStorageSize(section, &size)); 2467 PetscCall(PetscMalloc1(n + 1, &xadj)); 2468 for (idx = 0, p = pStart; p < pEnd; p++) { 2469 if (nroots > 0) { 2470 if (cellNum[p] < 0) continue; 2471 } 2472 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++])); 2473 } 2474 xadj[n] = size; 2475 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2476 /* Clean up */ 2477 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2478 PetscCall(PetscSectionDestroy(§ion)); 2479 PetscCall(PetscFree(adj)); 2480 graph->xadj = xadj; 2481 graph->adjncy = adjncy; 2482 } else { 2483 Mat A; 2484 PetscBool isseqaij, flg_row; 2485 2486 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2487 if (!A->rmap->N || !A->cmap->N) { 2488 PetscCall(PCBDDCGraphDestroy(&graph)); 2489 PetscFunctionReturn(PETSC_SUCCESS); 2490 } 2491 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2492 if (!isseqaij && filter) { 2493 PetscBool isseqdense; 2494 2495 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2496 if (!isseqdense) { 2497 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2498 } else { /* TODO: rectangular case and LDA */ 2499 PetscScalar *array; 2500 PetscReal chop = 1.e-6; 2501 2502 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2503 PetscCall(MatDenseGetArray(B, &array)); 2504 PetscCall(MatGetSize(B, &n, NULL)); 2505 for (i = 0; i < n; i++) { 2506 PetscInt j; 2507 for (j = i + 1; j < n; j++) { 2508 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2509 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2510 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2511 } 2512 } 2513 PetscCall(MatDenseRestoreArray(B, &array)); 2514 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2515 } 2516 } else { 2517 PetscCall(PetscObjectReference((PetscObject)A)); 2518 B = A; 2519 } 2520 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2521 2522 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2523 if (filter) { 2524 PetscScalar *data; 2525 PetscInt j, cum; 2526 2527 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2528 PetscCall(MatSeqAIJGetArray(B, &data)); 2529 cum = 0; 2530 for (i = 0; i < n; i++) { 2531 PetscInt t; 2532 2533 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2534 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2535 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2536 } 2537 t = xadj_filtered[i]; 2538 xadj_filtered[i] = cum; 2539 cum += t; 2540 } 2541 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2542 graph->xadj = xadj_filtered; 2543 graph->adjncy = adjncy_filtered; 2544 } else { 2545 graph->xadj = xadj; 2546 graph->adjncy = adjncy; 2547 } 2548 } 2549 /* compute local connected components using PCBDDCGraph */ 2550 graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */ 2551 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2552 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2553 PetscCall(ISDestroy(&is_dummy)); 2554 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX)); 2555 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2556 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2557 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2558 2559 /* partial clean up */ 2560 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2561 if (B) { 2562 PetscBool flg_row; 2563 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2564 PetscCall(MatDestroy(&B)); 2565 } 2566 if (isplex) { 2567 PetscCall(PetscFree(xadj)); 2568 PetscCall(PetscFree(adjncy)); 2569 } 2570 2571 /* get back data */ 2572 if (isplex) { 2573 if (ncc) *ncc = graph->ncc; 2574 if (cc || primalv) { 2575 Mat A; 2576 PetscBT btv, btvt, btvc; 2577 PetscSection subSection; 2578 PetscInt *ids, cum, cump, *cids, *pids; 2579 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2580 2581 PetscCall(DMGetDimension(dm, &dim)); 2582 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2583 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2584 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2585 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2586 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2587 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2588 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2589 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2590 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2591 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2592 2593 /* First see if we find corners for the subdomains, i.e. a vertex 2594 shared by at least dim subdomain boundary faces. This does not 2595 cover all the possible cases with simplices but it is enough 2596 for tensor cells */ 2597 if (vStart != fStart && dim <= 3) { 2598 for (PetscInt c = cStart; c < cEnd; c++) { 2599 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2600 const PetscInt *faces; 2601 2602 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2603 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2604 PetscCall(DMPlexGetCone(dm, c, &faces)); 2605 for (PetscInt f = 0; f < nf; f++) { 2606 PetscInt nc, ff; 2607 2608 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2609 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2610 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2611 } 2612 if (cnt >= mcnt) { 2613 PetscInt size, *closure = NULL; 2614 2615 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2616 for (PetscInt k = 0; k < 2 * size; k += 2) { 2617 PetscInt v = closure[k]; 2618 if (v >= vStart && v < vEnd) { 2619 PetscInt vsize, *vclosure = NULL; 2620 2621 cnt = 0; 2622 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2623 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2624 PetscInt f = vclosure[vk]; 2625 if (f >= fStart && f < fEnd) { 2626 PetscInt nc, ff; 2627 PetscBool valid = PETSC_FALSE; 2628 2629 for (PetscInt fk = 0; fk < nf; fk++) 2630 if (f == cfaces[fk]) valid = PETSC_TRUE; 2631 if (!valid) continue; 2632 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2633 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2634 if (nc == 1 && f == ff) cnt++; 2635 } 2636 } 2637 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2638 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2639 } 2640 } 2641 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2642 } 2643 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2644 } 2645 } 2646 2647 cids[0] = 0; 2648 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2649 PetscInt j; 2650 2651 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2652 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2653 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2654 2655 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2656 for (k = 0; k < 2 * size; k += 2) { 2657 PetscInt s, pp, p = closure[k], off, dof, cdof; 2658 2659 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2660 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2661 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2662 for (s = 0; s < dof - cdof; s++) { 2663 if (PetscBTLookupSet(btvt, off + s)) continue; 2664 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2665 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2666 else pids[cump++] = off + s; /* cross-vertex */ 2667 } 2668 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2669 if (pp != p) { 2670 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2671 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2672 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2673 for (s = 0; s < dof - cdof; s++) { 2674 if (PetscBTLookupSet(btvt, off + s)) continue; 2675 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2676 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2677 else pids[cump++] = off + s; /* cross-vertex */ 2678 } 2679 } 2680 } 2681 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2682 } 2683 cids[i + 1] = cum; 2684 /* mark dofs as already assigned */ 2685 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2686 } 2687 if (cc) { 2688 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2689 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2690 *cc = cc_n; 2691 } 2692 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2693 PetscCall(PetscFree3(ids, cids, pids)); 2694 PetscCall(PetscBTDestroy(&btv)); 2695 PetscCall(PetscBTDestroy(&btvt)); 2696 PetscCall(PetscBTDestroy(&btvc)); 2697 PetscCall(DMDestroy(&dm)); 2698 } 2699 } else { 2700 if (ncc) *ncc = graph->ncc; 2701 if (cc) { 2702 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2703 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2704 *cc = cc_n; 2705 } 2706 } 2707 /* clean up graph */ 2708 graph->xadj = NULL; 2709 graph->adjncy = NULL; 2710 PetscCall(PCBDDCGraphDestroy(&graph)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2715 { 2716 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2717 PC_IS *pcis = (PC_IS *)pc->data; 2718 IS dirIS = NULL; 2719 PetscInt i; 2720 2721 PetscFunctionBegin; 2722 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2723 if (zerodiag) { 2724 Mat A; 2725 Vec vec3_N; 2726 PetscScalar *vals; 2727 const PetscInt *idxs; 2728 PetscInt nz, *count; 2729 2730 /* p0 */ 2731 PetscCall(VecSet(pcis->vec1_N, 0.)); 2732 PetscCall(PetscMalloc1(pcis->n, &vals)); 2733 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2734 PetscCall(ISGetIndices(zerodiag, &idxs)); 2735 for (i = 0; i < nz; i++) vals[i] = 1.; 2736 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2737 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2738 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2739 /* v_I */ 2740 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2741 for (i = 0; i < nz; i++) vals[i] = 0.; 2742 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2743 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2744 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2745 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2746 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2747 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2748 if (dirIS) { 2749 PetscInt n; 2750 2751 PetscCall(ISGetLocalSize(dirIS, &n)); 2752 PetscCall(ISGetIndices(dirIS, &idxs)); 2753 for (i = 0; i < n; i++) vals[i] = 0.; 2754 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2755 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2756 } 2757 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2758 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2759 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2760 PetscCall(VecSet(vec3_N, 0.)); 2761 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2762 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2763 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2764 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2765 PetscCall(PetscFree(vals)); 2766 PetscCall(VecDestroy(&vec3_N)); 2767 2768 /* there should not be any pressure dofs lying on the interface */ 2769 PetscCall(PetscCalloc1(pcis->n, &count)); 2770 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2771 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2772 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2773 PetscCall(ISGetIndices(zerodiag, &idxs)); 2774 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2775 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2776 PetscCall(PetscFree(count)); 2777 } 2778 PetscCall(ISDestroy(&dirIS)); 2779 2780 /* check PCBDDCBenignGetOrSetP0 */ 2781 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2782 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2783 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2784 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2785 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2786 for (i = 0; i < pcbddc->benign_n; i++) { 2787 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2788 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2789 } 2790 PetscFunctionReturn(PETSC_SUCCESS); 2791 } 2792 2793 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2794 { 2795 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2796 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2797 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2798 PetscInt nz, n, benign_n, bsp = 1; 2799 PetscInt *interior_dofs, n_interior_dofs, nneu; 2800 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2801 2802 PetscFunctionBegin; 2803 if (reuse) goto project_b0; 2804 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2805 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2806 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2807 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2808 has_null_pressures = PETSC_TRUE; 2809 have_null = PETSC_TRUE; 2810 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2811 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2812 Checks if all the pressure dofs in each subdomain have a zero diagonal 2813 If not, a change of basis on pressures is not needed 2814 since the local Schur complements are already SPD 2815 */ 2816 if (pcbddc->n_ISForDofsLocal) { 2817 IS iP = NULL; 2818 PetscInt p, *pp; 2819 PetscBool flg, blocked = PETSC_FALSE; 2820 2821 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2822 n = pcbddc->n_ISForDofsLocal; 2823 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2824 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2825 PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL)); 2826 PetscOptionsEnd(); 2827 if (!flg) { 2828 n = 1; 2829 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2830 } 2831 2832 bsp = 0; 2833 for (p = 0; p < n; p++) { 2834 PetscInt bs = 1; 2835 2836 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2837 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2838 bsp += bs; 2839 } 2840 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2841 bsp = 0; 2842 for (p = 0; p < n; p++) { 2843 const PetscInt *idxs; 2844 PetscInt b, bs = 1, npl, *bidxs; 2845 2846 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2847 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2848 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2849 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2850 for (b = 0; b < bs; b++) { 2851 PetscInt i; 2852 2853 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2854 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2855 bsp++; 2856 } 2857 PetscCall(PetscFree(bidxs)); 2858 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2859 } 2860 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2861 2862 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2863 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2864 if (iP) { 2865 IS newpressures; 2866 2867 PetscCall(ISDifference(pressures, iP, &newpressures)); 2868 PetscCall(ISDestroy(&pressures)); 2869 pressures = newpressures; 2870 } 2871 PetscCall(ISSorted(pressures, &sorted)); 2872 if (!sorted) PetscCall(ISSort(pressures)); 2873 PetscCall(PetscFree(pp)); 2874 } 2875 2876 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2877 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2878 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2879 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2880 PetscCall(ISSorted(zerodiag, &sorted)); 2881 if (!sorted) PetscCall(ISSort(zerodiag)); 2882 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2883 zerodiag_save = zerodiag; 2884 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2885 if (!nz) { 2886 if (n) have_null = PETSC_FALSE; 2887 has_null_pressures = PETSC_FALSE; 2888 PetscCall(ISDestroy(&zerodiag)); 2889 } 2890 recompute_zerodiag = PETSC_FALSE; 2891 2892 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2893 zerodiag_subs = NULL; 2894 benign_n = 0; 2895 n_interior_dofs = 0; 2896 interior_dofs = NULL; 2897 nneu = 0; 2898 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2899 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2900 if (checkb) { /* need to compute interior nodes */ 2901 PetscInt n, i; 2902 PetscInt *count; 2903 ISLocalToGlobalMapping mapping; 2904 2905 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL)); 2906 PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL)); 2907 PetscCall(PetscMalloc1(n, &interior_dofs)); 2908 for (i = 0; i < n; i++) 2909 if (count[i] < 2) interior_dofs[n_interior_dofs++] = i; 2910 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL)); 2911 } 2912 if (has_null_pressures) { 2913 IS *subs; 2914 PetscInt nsubs, i, j, nl; 2915 const PetscInt *idxs; 2916 PetscScalar *array; 2917 Vec *work; 2918 2919 subs = pcbddc->local_subs; 2920 nsubs = pcbddc->n_local_subs; 2921 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2922 if (checkb) { 2923 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2924 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2925 PetscCall(ISGetIndices(zerodiag, &idxs)); 2926 /* work[0] = 1_p */ 2927 PetscCall(VecSet(work[0], 0.)); 2928 PetscCall(VecGetArray(work[0], &array)); 2929 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2930 PetscCall(VecRestoreArray(work[0], &array)); 2931 /* work[0] = 1_v */ 2932 PetscCall(VecSet(work[1], 1.)); 2933 PetscCall(VecGetArray(work[1], &array)); 2934 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2935 PetscCall(VecRestoreArray(work[1], &array)); 2936 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2937 } 2938 2939 if (nsubs > 1 || bsp > 1) { 2940 IS *is; 2941 PetscInt b, totb; 2942 2943 totb = bsp; 2944 is = bsp > 1 ? bzerodiag : &zerodiag; 2945 nsubs = PetscMax(nsubs, 1); 2946 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2947 for (b = 0; b < totb; b++) { 2948 for (i = 0; i < nsubs; i++) { 2949 ISLocalToGlobalMapping l2g; 2950 IS t_zerodiag_subs; 2951 PetscInt nl; 2952 2953 if (subs) { 2954 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2955 } else { 2956 IS tis; 2957 2958 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2959 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2960 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2961 PetscCall(ISDestroy(&tis)); 2962 } 2963 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2964 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2965 if (nl) { 2966 PetscBool valid = PETSC_TRUE; 2967 2968 if (checkb) { 2969 PetscCall(VecSet(matis->x, 0)); 2970 PetscCall(ISGetLocalSize(subs[i], &nl)); 2971 PetscCall(ISGetIndices(subs[i], &idxs)); 2972 PetscCall(VecGetArray(matis->x, &array)); 2973 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2974 PetscCall(VecRestoreArray(matis->x, &array)); 2975 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2976 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2977 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2978 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2979 PetscCall(VecGetArray(matis->y, &array)); 2980 for (j = 0; j < n_interior_dofs; j++) { 2981 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2982 valid = PETSC_FALSE; 2983 break; 2984 } 2985 } 2986 PetscCall(VecRestoreArray(matis->y, &array)); 2987 } 2988 if (valid && nneu) { 2989 const PetscInt *idxs; 2990 PetscInt nzb; 2991 2992 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2993 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2994 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2995 if (nzb) valid = PETSC_FALSE; 2996 } 2997 if (valid && pressures) { 2998 IS t_pressure_subs, tmp; 2999 PetscInt i1, i2; 3000 3001 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 3002 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 3003 PetscCall(ISGetLocalSize(tmp, &i1)); 3004 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 3005 if (i2 != i1) valid = PETSC_FALSE; 3006 PetscCall(ISDestroy(&t_pressure_subs)); 3007 PetscCall(ISDestroy(&tmp)); 3008 } 3009 if (valid) { 3010 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 3011 benign_n++; 3012 } else recompute_zerodiag = PETSC_TRUE; 3013 } 3014 PetscCall(ISDestroy(&t_zerodiag_subs)); 3015 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 3016 } 3017 } 3018 } else { /* there's just one subdomain (or zero if they have not been detected */ 3019 PetscBool valid = PETSC_TRUE; 3020 3021 if (nneu) valid = PETSC_FALSE; 3022 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 3023 if (valid && checkb) { 3024 PetscCall(MatMult(matis->A, work[0], matis->x)); 3025 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 3026 PetscCall(VecGetArray(matis->x, &array)); 3027 for (j = 0; j < n_interior_dofs; j++) { 3028 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 3029 valid = PETSC_FALSE; 3030 break; 3031 } 3032 } 3033 PetscCall(VecRestoreArray(matis->x, &array)); 3034 } 3035 if (valid) { 3036 benign_n = 1; 3037 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 3038 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 3039 zerodiag_subs[0] = zerodiag; 3040 } 3041 } 3042 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 3043 } 3044 PetscCall(PetscFree(interior_dofs)); 3045 3046 if (!benign_n) { 3047 PetscInt n; 3048 3049 PetscCall(ISDestroy(&zerodiag)); 3050 recompute_zerodiag = PETSC_FALSE; 3051 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3052 if (n) have_null = PETSC_FALSE; 3053 } 3054 3055 /* final check for null pressures */ 3056 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 3057 3058 if (recompute_zerodiag) { 3059 PetscCall(ISDestroy(&zerodiag)); 3060 if (benign_n == 1) { 3061 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 3062 zerodiag = zerodiag_subs[0]; 3063 } else { 3064 PetscInt i, nzn, *new_idxs; 3065 3066 nzn = 0; 3067 for (i = 0; i < benign_n; i++) { 3068 PetscInt ns; 3069 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 3070 nzn += ns; 3071 } 3072 PetscCall(PetscMalloc1(nzn, &new_idxs)); 3073 nzn = 0; 3074 for (i = 0; i < benign_n; i++) { 3075 PetscInt ns, *idxs; 3076 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 3077 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 3078 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 3079 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 3080 nzn += ns; 3081 } 3082 PetscCall(PetscSortInt(nzn, new_idxs)); 3083 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 3084 } 3085 have_null = PETSC_FALSE; 3086 } 3087 3088 /* determines if the coarse solver will be singular or not */ 3089 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 3090 3091 /* Prepare matrix to compute no-net-flux */ 3092 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 3093 Mat A, loc_divudotp; 3094 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 3095 IS row, col, isused = NULL; 3096 PetscInt M, N, n, st, n_isused; 3097 3098 if (pressures) { 3099 isused = pressures; 3100 } else { 3101 isused = zerodiag_save; 3102 } 3103 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 3104 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 3105 PetscCall(MatGetLocalSize(A, &n, NULL)); 3106 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 3107 n_isused = 0; 3108 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 3109 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 3110 st = st - n_isused; 3111 if (n) { 3112 const PetscInt *gidxs; 3113 3114 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 3115 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 3116 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 3117 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3118 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 3119 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 3120 } else { 3121 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 3122 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3123 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 3124 } 3125 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 3126 PetscCall(ISGetSize(row, &M)); 3127 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 3128 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 3129 PetscCall(ISDestroy(&row)); 3130 PetscCall(ISDestroy(&col)); 3131 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 3132 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 3133 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3134 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 3135 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 3136 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 3137 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 3138 PetscCall(MatDestroy(&loc_divudotp)); 3139 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3140 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3141 } 3142 PetscCall(ISDestroy(&zerodiag_save)); 3143 PetscCall(ISDestroy(&pressures)); 3144 if (bzerodiag) { 3145 PetscInt i; 3146 3147 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 3148 PetscCall(PetscFree(bzerodiag)); 3149 } 3150 pcbddc->benign_n = benign_n; 3151 pcbddc->benign_zerodiag_subs = zerodiag_subs; 3152 3153 /* determines if the problem has subdomains with 0 pressure block */ 3154 have_null = (PetscBool)(!!pcbddc->benign_n); 3155 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 3156 3157 project_b0: 3158 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3159 /* change of basis and p0 dofs */ 3160 if (pcbddc->benign_n) { 3161 PetscInt i, s, *nnz; 3162 3163 /* local change of basis for pressures */ 3164 PetscCall(MatDestroy(&pcbddc->benign_change)); 3165 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 3166 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 3167 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 3168 PetscCall(PetscMalloc1(n, &nnz)); 3169 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 3170 for (i = 0; i < pcbddc->benign_n; i++) { 3171 const PetscInt *idxs; 3172 PetscInt nzs, j; 3173 3174 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 3175 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3176 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 3177 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 3178 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3179 } 3180 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 3181 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3182 PetscCall(PetscFree(nnz)); 3183 /* set identity by default */ 3184 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 3185 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3186 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 3187 /* set change on pressures */ 3188 for (s = 0; s < pcbddc->benign_n; s++) { 3189 PetscScalar *array; 3190 const PetscInt *idxs; 3191 PetscInt nzs; 3192 3193 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 3194 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3195 for (i = 0; i < nzs - 1; i++) { 3196 PetscScalar vals[2]; 3197 PetscInt cols[2]; 3198 3199 cols[0] = idxs[i]; 3200 cols[1] = idxs[nzs - 1]; 3201 vals[0] = 1.; 3202 vals[1] = 1.; 3203 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 3204 } 3205 PetscCall(PetscMalloc1(nzs, &array)); 3206 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 3207 array[nzs - 1] = 1.; 3208 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 3209 /* store local idxs for p0 */ 3210 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 3211 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3212 PetscCall(PetscFree(array)); 3213 } 3214 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3215 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3216 3217 /* project if needed */ 3218 if (pcbddc->benign_change_explicit) { 3219 Mat M; 3220 3221 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 3222 PetscCall(MatDestroy(&pcbddc->local_mat)); 3223 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 3224 PetscCall(MatDestroy(&M)); 3225 } 3226 /* store global idxs for p0 */ 3227 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 3228 } 3229 *zerodiaglocal = zerodiag; 3230 PetscFunctionReturn(PETSC_SUCCESS); 3231 } 3232 3233 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3234 { 3235 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3236 PetscScalar *array; 3237 3238 PetscFunctionBegin; 3239 if (!pcbddc->benign_sf) { 3240 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 3241 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 3242 } 3243 if (get) { 3244 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 3245 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3246 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3247 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 3248 } else { 3249 PetscCall(VecGetArray(v, &array)); 3250 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3251 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3252 PetscCall(VecRestoreArray(v, &array)); 3253 } 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3258 { 3259 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3260 3261 PetscFunctionBegin; 3262 /* TODO: add error checking 3263 - avoid nested pop (or push) calls. 3264 - cannot push before pop. 3265 - cannot call this if pcbddc->local_mat is NULL 3266 */ 3267 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 3268 if (pop) { 3269 if (pcbddc->benign_change_explicit) { 3270 IS is_p0; 3271 MatReuse reuse; 3272 3273 /* extract B_0 */ 3274 reuse = MAT_INITIAL_MATRIX; 3275 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 3276 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 3277 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 3278 /* remove rows and cols from local problem */ 3279 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 3280 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 3281 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 3282 PetscCall(ISDestroy(&is_p0)); 3283 } else { 3284 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 3285 PetscScalar *vals; 3286 PetscInt i, n, *idxs_ins; 3287 3288 PetscCall(VecGetLocalSize(matis->y, &n)); 3289 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 3290 if (!pcbddc->benign_B0) { 3291 PetscInt *nnz; 3292 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 3293 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 3294 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 3295 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 3296 for (i = 0; i < pcbddc->benign_n; i++) { 3297 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 3298 nnz[i] = n - nnz[i]; 3299 } 3300 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 3301 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3302 PetscCall(PetscFree(nnz)); 3303 } 3304 3305 for (i = 0; i < pcbddc->benign_n; i++) { 3306 PetscScalar *array; 3307 PetscInt *idxs, j, nz, cum; 3308 3309 PetscCall(VecSet(matis->x, 0.)); 3310 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3311 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3312 for (j = 0; j < nz; j++) vals[j] = 1.; 3313 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3314 PetscCall(VecAssemblyBegin(matis->x)); 3315 PetscCall(VecAssemblyEnd(matis->x)); 3316 PetscCall(VecSet(matis->y, 0.)); 3317 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3318 PetscCall(VecGetArray(matis->y, &array)); 3319 cum = 0; 3320 for (j = 0; j < n; j++) { 3321 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3322 vals[cum] = array[j]; 3323 idxs_ins[cum] = j; 3324 cum++; 3325 } 3326 } 3327 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3328 PetscCall(VecRestoreArray(matis->y, &array)); 3329 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3330 } 3331 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3332 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3333 PetscCall(PetscFree2(idxs_ins, vals)); 3334 } 3335 } else { /* push */ 3336 3337 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3338 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3339 PetscScalar *B0_vals; 3340 PetscInt *B0_cols, B0_ncol; 3341 3342 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3343 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3344 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3345 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3346 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3347 } 3348 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3349 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3350 } 3351 PetscFunctionReturn(PETSC_SUCCESS); 3352 } 3353 3354 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3355 { 3356 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3357 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3358 PetscBLASInt B_neigs, B_ierr, B_lwork; 3359 PetscBLASInt *B_iwork, *B_ifail; 3360 PetscScalar *work, lwork; 3361 PetscScalar *St, *S, *eigv; 3362 PetscScalar *Sarray, *Starray; 3363 PetscReal *eigs, thresh, lthresh, uthresh; 3364 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3365 PetscBool allocated_S_St, upart; 3366 #if defined(PETSC_USE_COMPLEX) 3367 PetscReal *rwork; 3368 #endif 3369 3370 PetscFunctionBegin; 3371 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3372 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3373 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3374 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3375 sub_schurs->is_posdef); 3376 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3377 3378 if (pcbddc->dbg_flag) { 3379 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3380 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3381 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3382 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3383 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3384 } 3385 3386 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3387 3388 /* max size of subsets */ 3389 mss = 0; 3390 for (i = 0; i < sub_schurs->n_subs; i++) { 3391 PetscInt subset_size; 3392 3393 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3394 mss = PetscMax(mss, subset_size); 3395 } 3396 3397 /* min/max and threshold */ 3398 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3399 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3400 nmax = PetscMax(nmin, nmax); 3401 allocated_S_St = PETSC_FALSE; 3402 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3403 allocated_S_St = PETSC_TRUE; 3404 } 3405 3406 /* allocate lapack workspace */ 3407 cum = cum2 = 0; 3408 maxneigs = 0; 3409 for (i = 0; i < sub_schurs->n_subs; i++) { 3410 PetscInt n, subset_size; 3411 3412 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3413 n = PetscMin(subset_size, nmax); 3414 cum += subset_size; 3415 cum2 += subset_size * n; 3416 maxneigs = PetscMax(maxneigs, n); 3417 } 3418 lwork = 0; 3419 if (mss) { 3420 PetscScalar sdummy = 0.; 3421 PetscBLASInt B_itype = 1; 3422 PetscBLASInt B_N, idummy = 0; 3423 PetscReal rdummy = 0., zero = 0.0; 3424 PetscReal eps = 0.0; /* dlamch? */ 3425 3426 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3427 PetscCall(PetscBLASIntCast(mss, &B_N)); 3428 B_lwork = -1; 3429 /* some implementations may complain about NULL pointers, even if we are querying */ 3430 S = &sdummy; 3431 St = &sdummy; 3432 eigs = &rdummy; 3433 eigv = &sdummy; 3434 B_iwork = &idummy; 3435 B_ifail = &idummy; 3436 #if defined(PETSC_USE_COMPLEX) 3437 rwork = &rdummy; 3438 #endif 3439 thresh = 1.0; 3440 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3441 #if defined(PETSC_USE_COMPLEX) 3442 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3443 #else 3444 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3445 #endif 3446 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr); 3447 PetscCall(PetscFPTrapPop()); 3448 } 3449 3450 nv = 0; 3451 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3452 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3453 } 3454 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3455 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3456 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3457 #if defined(PETSC_USE_COMPLEX) 3458 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3459 #endif 3460 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3461 &pcbddc->adaptive_constraints_data)); 3462 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3463 3464 maxneigs = 0; 3465 cum = cumarray = 0; 3466 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3467 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3468 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3469 const PetscInt *idxs; 3470 3471 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3472 for (cum = 0; cum < nv; cum++) { 3473 pcbddc->adaptive_constraints_n[cum] = 1; 3474 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3475 pcbddc->adaptive_constraints_data[cum] = 1.0; 3476 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3477 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3478 } 3479 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3480 } 3481 3482 if (mss) { /* multilevel */ 3483 if (sub_schurs->gdsw) { 3484 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3485 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3486 } else { 3487 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3488 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3489 } 3490 } 3491 3492 lthresh = pcbddc->adaptive_threshold[0]; 3493 uthresh = pcbddc->adaptive_threshold[1]; 3494 upart = pcbddc->use_deluxe_scaling; 3495 for (i = 0; i < sub_schurs->n_subs; i++) { 3496 const PetscInt *idxs; 3497 PetscReal upper, lower; 3498 PetscInt j, subset_size, eigs_start = 0; 3499 PetscBLASInt B_N; 3500 PetscBool same_data = PETSC_FALSE; 3501 PetscBool scal = PETSC_FALSE; 3502 3503 if (upart) { 3504 upper = PETSC_MAX_REAL; 3505 lower = uthresh; 3506 } else { 3507 if (sub_schurs->gdsw) { 3508 upper = uthresh; 3509 lower = PETSC_MIN_REAL; 3510 } else { 3511 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3512 upper = 1. / uthresh; 3513 lower = 0.; 3514 } 3515 } 3516 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3517 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3518 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3519 /* this is experimental: we assume the dofs have been properly grouped to have 3520 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3521 if (!sub_schurs->is_posdef) { 3522 Mat T; 3523 3524 for (j = 0; j < subset_size; j++) { 3525 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3526 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3527 PetscCall(MatScale(T, -1.0)); 3528 PetscCall(MatDestroy(&T)); 3529 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3530 PetscCall(MatScale(T, -1.0)); 3531 PetscCall(MatDestroy(&T)); 3532 if (sub_schurs->change_primal_sub) { 3533 PetscInt nz, k; 3534 const PetscInt *idxs; 3535 3536 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3537 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3538 for (k = 0; k < nz; k++) { 3539 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3540 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3541 } 3542 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3543 } 3544 scal = PETSC_TRUE; 3545 break; 3546 } 3547 } 3548 } 3549 3550 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3551 if (sub_schurs->is_symmetric) { 3552 PetscInt j, k; 3553 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3554 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3555 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3556 } 3557 for (j = 0; j < subset_size; j++) { 3558 for (k = j; k < subset_size; k++) { 3559 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3560 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3561 } 3562 } 3563 } else { 3564 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3565 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3566 } 3567 } else { 3568 S = Sarray + cumarray; 3569 St = Starray + cumarray; 3570 } 3571 /* see if we can save some work */ 3572 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3573 3574 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3575 B_neigs = 0; 3576 } else { 3577 PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0; 3578 PetscReal eps = -1.0; /* dlamch? */ 3579 PetscInt nmin_s; 3580 PetscBool compute_range; 3581 3582 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3583 B_neigs = 0; 3584 compute_range = (PetscBool)!same_data; 3585 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3586 3587 if (pcbddc->dbg_flag) { 3588 PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof; 3589 3590 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3591 PetscCall( 3592 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc)); 3593 } 3594 3595 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3596 if (compute_range) { 3597 /* ask for eigenvalues larger than thresh */ 3598 if (sub_schurs->is_posdef) { 3599 #if defined(PETSC_USE_COMPLEX) 3600 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3601 #else 3602 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3603 #endif 3604 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3605 } else { /* no theory so far, but it works nicely */ 3606 PetscInt recipe = 0, recipe_m = 1; 3607 PetscReal bb[2]; 3608 3609 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3610 switch (recipe) { 3611 case 0: 3612 if (scal) { 3613 bb[0] = PETSC_MIN_REAL; 3614 bb[1] = lthresh; 3615 } else { 3616 bb[0] = uthresh; 3617 bb[1] = PETSC_MAX_REAL; 3618 } 3619 #if defined(PETSC_USE_COMPLEX) 3620 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3621 #else 3622 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3623 #endif 3624 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3625 break; 3626 case 1: 3627 bb[0] = PETSC_MIN_REAL; 3628 bb[1] = lthresh * lthresh; 3629 #if defined(PETSC_USE_COMPLEX) 3630 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3631 #else 3632 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3633 #endif 3634 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3635 if (!scal) { 3636 PetscBLASInt B_neigs2 = 0; 3637 3638 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3639 bb[1] = PETSC_MAX_REAL; 3640 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3641 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3642 #if defined(PETSC_USE_COMPLEX) 3643 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3644 #else 3645 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3646 #endif 3647 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3648 B_neigs += B_neigs2; 3649 } 3650 break; 3651 case 2: 3652 if (scal) { 3653 bb[0] = PETSC_MIN_REAL; 3654 bb[1] = 0; 3655 #if defined(PETSC_USE_COMPLEX) 3656 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3657 #else 3658 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3659 #endif 3660 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3661 } else { 3662 PetscBLASInt B_neigs2 = 0; 3663 PetscBool do_copy = PETSC_FALSE; 3664 3665 lthresh = PetscMax(lthresh, 0.0); 3666 if (lthresh > 0.0) { 3667 bb[0] = PETSC_MIN_REAL; 3668 bb[1] = lthresh * lthresh; 3669 3670 do_copy = PETSC_TRUE; 3671 #if defined(PETSC_USE_COMPLEX) 3672 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3673 #else 3674 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3675 #endif 3676 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3677 } 3678 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3679 bb[1] = PETSC_MAX_REAL; 3680 if (do_copy) { 3681 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3682 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3683 } 3684 #if defined(PETSC_USE_COMPLEX) 3685 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3686 #else 3687 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3688 #endif 3689 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3690 B_neigs += B_neigs2; 3691 } 3692 break; 3693 case 3: 3694 if (scal) { 3695 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3696 } else { 3697 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3698 } 3699 if (!scal) { 3700 bb[0] = uthresh; 3701 bb[1] = PETSC_MAX_REAL; 3702 #if defined(PETSC_USE_COMPLEX) 3703 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3704 #else 3705 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3706 #endif 3707 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3708 } 3709 if (recipe_m > 0 && B_N - B_neigs > 0) { 3710 PetscBLASInt B_neigs2 = 0; 3711 3712 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3713 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3714 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3715 #if defined(PETSC_USE_COMPLEX) 3716 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3717 #else 3718 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3719 #endif 3720 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3721 B_neigs += B_neigs2; 3722 } 3723 break; 3724 case 4: 3725 bb[0] = PETSC_MIN_REAL; 3726 bb[1] = lthresh; 3727 #if defined(PETSC_USE_COMPLEX) 3728 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3729 #else 3730 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3731 #endif 3732 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3733 { 3734 PetscBLASInt B_neigs2 = 0; 3735 3736 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3737 bb[1] = PETSC_MAX_REAL; 3738 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3739 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3740 #if defined(PETSC_USE_COMPLEX) 3741 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3742 #else 3743 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3744 #endif 3745 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3746 B_neigs += B_neigs2; 3747 } 3748 break; 3749 case 5: /* same as before: first compute all eigenvalues, then filter */ 3750 #if defined(PETSC_USE_COMPLEX) 3751 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3752 #else 3753 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3754 #endif 3755 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3756 { 3757 PetscInt e, k, ne; 3758 for (e = 0, ne = 0; e < B_neigs; e++) { 3759 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3760 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3761 eigs[ne] = eigs[e]; 3762 ne++; 3763 } 3764 } 3765 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3766 PetscCall(PetscBLASIntCast(ne, &B_neigs)); 3767 } 3768 break; 3769 default: 3770 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3771 } 3772 } 3773 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3774 PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU)); 3775 #if defined(PETSC_USE_COMPLEX) 3776 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3777 #else 3778 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3779 #endif 3780 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3781 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3782 PetscInt k; 3783 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3784 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3785 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3786 nmin = nmax; 3787 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3788 for (k = 0; k < nmax; k++) { 3789 eigs[k] = 1. / PETSC_SMALL; 3790 eigv[k * (subset_size + 1)] = 1.0; 3791 } 3792 } 3793 PetscCall(PetscFPTrapPop()); 3794 if (B_ierr) { 3795 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3796 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3797 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3798 } 3799 3800 if (B_neigs > nmax) { 3801 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3802 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3803 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3804 } 3805 3806 nmin_s = PetscMin(nmin, B_N); 3807 if (B_neigs < nmin_s) { 3808 PetscBLASInt B_neigs2 = 0; 3809 3810 if (upart) { 3811 if (scal) { 3812 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3813 B_IL = B_neigs + 1; 3814 } else { 3815 PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL)); 3816 B_IU = B_N - B_neigs; 3817 } 3818 } else { 3819 B_IL = B_neigs + 1; 3820 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3821 } 3822 if (pcbddc->dbg_flag) { 3823 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3824 } 3825 if (sub_schurs->is_symmetric) { 3826 PetscInt j, k; 3827 for (j = 0; j < subset_size; j++) { 3828 for (k = j; k < subset_size; k++) { 3829 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3830 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3831 } 3832 } 3833 } else { 3834 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3835 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3836 } 3837 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3838 #if defined(PETSC_USE_COMPLEX) 3839 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3840 #else 3841 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3842 #endif 3843 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3844 PetscCall(PetscFPTrapPop()); 3845 B_neigs += B_neigs2; 3846 } 3847 if (B_ierr) { 3848 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3849 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3850 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3851 } 3852 if (pcbddc->dbg_flag) { 3853 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3854 for (j = 0; j < B_neigs; j++) { 3855 if (!sub_schurs->gdsw) { 3856 if (eigs[j] == 0.0) { 3857 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3858 } else { 3859 if (upart) { 3860 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3861 } else { 3862 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1 / eigs[j + eigs_start]))); 3863 } 3864 } 3865 } else { 3866 double pg = (double)eigs[j + eigs_start]; 3867 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3868 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3869 } 3870 } 3871 } 3872 } 3873 /* change the basis back to the original one */ 3874 if (sub_schurs->change) { 3875 Mat change, phi, phit; 3876 3877 if (pcbddc->dbg_flag > 2) { 3878 PetscInt ii; 3879 for (ii = 0; ii < B_neigs; ii++) { 3880 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3881 for (j = 0; j < B_N; j++) { 3882 #if defined(PETSC_USE_COMPLEX) 3883 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3884 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3885 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3886 #else 3887 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3888 #endif 3889 } 3890 } 3891 } 3892 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3893 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3894 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi)); 3895 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3896 PetscCall(MatDestroy(&phit)); 3897 PetscCall(MatDestroy(&phi)); 3898 } 3899 maxneigs = PetscMax(B_neigs, maxneigs); 3900 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3901 if (B_neigs) { 3902 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3903 3904 if (pcbddc->dbg_flag > 1) { 3905 PetscInt ii; 3906 for (ii = 0; ii < B_neigs; ii++) { 3907 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3908 for (j = 0; j < B_N; j++) { 3909 #if defined(PETSC_USE_COMPLEX) 3910 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3911 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3912 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3913 #else 3914 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3915 #endif 3916 } 3917 } 3918 } 3919 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3920 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3921 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3922 cum++; 3923 } 3924 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3925 /* shift for next computation */ 3926 cumarray += subset_size * subset_size; 3927 } 3928 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3929 3930 if (mss) { 3931 if (sub_schurs->gdsw) { 3932 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3933 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3934 } else { 3935 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3936 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3937 /* destroy matrices (junk) */ 3938 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3939 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3940 } 3941 } 3942 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3943 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3944 #if defined(PETSC_USE_COMPLEX) 3945 PetscCall(PetscFree(rwork)); 3946 #endif 3947 if (pcbddc->dbg_flag) { 3948 PetscInt maxneigs_r; 3949 PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3950 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3951 } 3952 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3953 PetscFunctionReturn(PETSC_SUCCESS); 3954 } 3955 3956 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3957 { 3958 Mat coarse_submat; 3959 3960 PetscFunctionBegin; 3961 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3962 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3963 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3964 3965 /* Setup local neumann solver ksp_R */ 3966 /* PCBDDCSetUpLocalScatters should be called first! */ 3967 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3968 3969 /* 3970 Setup local correction and local part of coarse basis. 3971 Gives back the dense local part of the coarse matrix in column major ordering 3972 */ 3973 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat)); 3974 3975 /* Compute total number of coarse nodes and setup coarse solver */ 3976 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat)); 3977 PetscCall(MatDestroy(&coarse_submat)); 3978 PetscFunctionReturn(PETSC_SUCCESS); 3979 } 3980 3981 PetscErrorCode PCBDDCResetCustomization(PC pc) 3982 { 3983 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3984 3985 PetscFunctionBegin; 3986 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3987 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3988 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3989 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3990 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3991 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3992 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3993 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3994 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3995 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3996 PetscFunctionReturn(PETSC_SUCCESS); 3997 } 3998 3999 PetscErrorCode PCBDDCResetTopography(PC pc) 4000 { 4001 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4002 PetscInt i; 4003 4004 PetscFunctionBegin; 4005 PetscCall(MatDestroy(&pcbddc->nedcG)); 4006 PetscCall(ISDestroy(&pcbddc->nedclocal)); 4007 PetscCall(MatDestroy(&pcbddc->discretegradient)); 4008 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 4009 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 4010 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 4011 PetscCall(VecDestroy(&pcbddc->work_change)); 4012 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 4013 PetscCall(MatDestroy(&pcbddc->divudotp)); 4014 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 4015 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 4016 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 4017 pcbddc->n_local_subs = 0; 4018 PetscCall(PetscFree(pcbddc->local_subs)); 4019 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 4020 pcbddc->graphanalyzed = PETSC_FALSE; 4021 pcbddc->recompute_topography = PETSC_TRUE; 4022 pcbddc->corner_selected = PETSC_FALSE; 4023 PetscFunctionReturn(PETSC_SUCCESS); 4024 } 4025 4026 PetscErrorCode PCBDDCResetSolvers(PC pc) 4027 { 4028 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4029 4030 PetscFunctionBegin; 4031 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 4032 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4033 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4034 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4035 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4036 PetscCall(VecDestroy(&pcbddc->vec1_P)); 4037 PetscCall(VecDestroy(&pcbddc->vec1_C)); 4038 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4039 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4040 PetscCall(VecDestroy(&pcbddc->vec1_R)); 4041 PetscCall(VecDestroy(&pcbddc->vec2_R)); 4042 PetscCall(ISDestroy(&pcbddc->is_R_local)); 4043 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 4044 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 4045 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 4046 PetscCall(KSPReset(pcbddc->ksp_D)); 4047 PetscCall(KSPReset(pcbddc->ksp_R)); 4048 PetscCall(KSPReset(pcbddc->coarse_ksp)); 4049 PetscCall(MatDestroy(&pcbddc->local_mat)); 4050 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 4051 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 4052 PetscCall(PetscFree(pcbddc->global_primal_indices)); 4053 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 4054 PetscCall(MatDestroy(&pcbddc->benign_change)); 4055 PetscCall(VecDestroy(&pcbddc->benign_vec)); 4056 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 4057 PetscCall(MatDestroy(&pcbddc->benign_B0)); 4058 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 4059 if (pcbddc->benign_zerodiag_subs) { 4060 PetscInt i; 4061 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 4062 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 4063 } 4064 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 4065 PetscFunctionReturn(PETSC_SUCCESS); 4066 } 4067 4068 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 4069 { 4070 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4071 PC_IS *pcis = (PC_IS *)pc->data; 4072 VecType impVecType; 4073 PetscInt n_constraints, n_R, old_size; 4074 4075 PetscFunctionBegin; 4076 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 4077 n_R = pcis->n - pcbddc->n_vertices; 4078 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 4079 /* local work vectors (try to avoid unneeded work)*/ 4080 /* R nodes */ 4081 old_size = -1; 4082 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 4083 if (n_R != old_size) { 4084 PetscCall(VecDestroy(&pcbddc->vec1_R)); 4085 PetscCall(VecDestroy(&pcbddc->vec2_R)); 4086 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 4087 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 4088 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 4089 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 4090 } 4091 /* local primal dofs */ 4092 old_size = -1; 4093 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 4094 if (pcbddc->local_primal_size != old_size) { 4095 PetscCall(VecDestroy(&pcbddc->vec1_P)); 4096 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 4097 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 4098 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 4099 } 4100 /* local explicit constraints */ 4101 old_size = -1; 4102 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 4103 if (n_constraints && n_constraints != old_size) { 4104 PetscCall(VecDestroy(&pcbddc->vec1_C)); 4105 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 4106 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 4107 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 4108 } 4109 PetscFunctionReturn(PETSC_SUCCESS); 4110 } 4111 4112 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode) 4113 { 4114 PetscBool flg; 4115 const PetscScalar *a; 4116 4117 PetscFunctionBegin; 4118 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg)); 4119 if (flg) { 4120 PetscCall(MatDenseGetArrayRead(S, &a)); 4121 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE)); 4122 PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode)); 4123 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE)); 4124 PetscCall(MatDenseRestoreArrayRead(S, &a)); 4125 } else { 4126 const PetscInt *ii, *jj; 4127 PetscInt n; 4128 PetscInt buf[8192], *bufc = NULL; 4129 PetscBool freeb = PETSC_FALSE; 4130 Mat Sm = S; 4131 4132 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg)); 4133 if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm)); 4134 else PetscCall(PetscObjectReference((PetscObject)S)); 4135 PetscCall(MatSeqAIJGetArrayRead(Sm, &a)); 4136 PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4137 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure"); 4138 if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) { 4139 bufc = buf; 4140 } else { 4141 PetscCall(PetscMalloc1(nc, &bufc)); 4142 freeb = PETSC_TRUE; 4143 } 4144 4145 for (PetscInt i = 0; i < n; i++) { 4146 const PetscInt nci = ii[i + 1] - ii[i]; 4147 4148 for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]]; 4149 PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode)); 4150 } 4151 PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4152 PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a)); 4153 PetscCall(MatDestroy(&Sm)); 4154 if (freeb) PetscCall(PetscFree(bufc)); 4155 } 4156 PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY)); 4157 PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY)); 4158 PetscFunctionReturn(PETSC_SUCCESS); 4159 } 4160 4161 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat) 4162 { 4163 Mat_SeqAIJ *aij; 4164 PetscInt *ii, *jj; 4165 PetscScalar *aa; 4166 PetscInt nnz = 0, m, nc; 4167 const PetscScalar *a; 4168 const PetscScalar zero = 0.0; 4169 4170 PetscFunctionBegin; 4171 PetscCall(MatGetLocalSize(D, &m, &nc)); 4172 PetscCall(MatDenseGetArrayRead(D, &a)); 4173 PetscCall(PetscMalloc1(m + 1, &ii)); 4174 PetscCall(PetscMalloc1(m * nc, &jj)); 4175 PetscCall(PetscMalloc1(m * nc, &aa)); 4176 ii[0] = 0; 4177 for (PetscInt k = 0; k < m; k++) { 4178 for (PetscInt s = 0; s < nc; s++) { 4179 const PetscInt c = s + k * nc; 4180 const PetscScalar v = a[k + s * m]; 4181 4182 if (PetscUnlikely(j[c] < 0 || v == zero)) continue; 4183 jj[nnz] = j[c]; 4184 aa[nnz] = a[k + s * m]; 4185 nnz++; 4186 } 4187 ii[k + 1] = nnz; 4188 } 4189 4190 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat)); 4191 PetscCall(MatDenseRestoreArrayRead(D, &a)); 4192 4193 aij = (Mat_SeqAIJ *)(*mat)->data; 4194 aij->free_a = PETSC_TRUE; 4195 aij->free_ij = PETSC_TRUE; 4196 PetscFunctionReturn(PETSC_SUCCESS); 4197 } 4198 4199 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */ 4200 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B) 4201 { 4202 PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL; 4203 const PetscBool allowzeropivot = PETSC_FALSE; 4204 PetscBool zeropivotdetected = PETSC_FALSE; 4205 const PetscReal shift = 0.0; 4206 PetscInt ipvt[5], *ii, *jj, *indi, *indj; 4207 PetscScalar work[25], *v_work = NULL, *aa, *diag; 4208 PetscLogDouble flops = 0.0; 4209 4210 PetscFunctionBegin; 4211 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices"); 4212 for (PetscInt i = 0; i < nblocks; i++) { 4213 ncnt += bsizes[i]; 4214 ncnt2 += PetscSqr(bsizes[i]); 4215 } 4216 PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n); 4217 for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]); 4218 if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots)); 4219 4220 PetscCall(PetscMalloc1(n + 1, &ii)); 4221 PetscCall(PetscMalloc1(ncnt2, &jj)); 4222 PetscCall(PetscCalloc1(ncnt2, &aa)); 4223 4224 ncnt = 0; 4225 ii[0] = 0; 4226 indi = ii; 4227 indj = jj; 4228 diag = aa; 4229 for (PetscInt i = 0; i < nblocks; i++) { 4230 const PetscInt bs = bsizes[i]; 4231 4232 for (PetscInt k = 0; k < bs; k++) { 4233 indi[k + 1] = indi[k] + bs; 4234 for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j; 4235 } 4236 PetscCall(MatGetValues(A, bs, indj, bs, indj, diag)); 4237 switch (bs) { 4238 case 1: 4239 *diag = 1.0 / (*diag); 4240 break; 4241 case 2: 4242 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 4243 break; 4244 case 3: 4245 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 4246 break; 4247 case 4: 4248 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 4249 break; 4250 case 5: 4251 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4252 break; 4253 case 6: 4254 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 4255 break; 4256 case 7: 4257 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 4258 break; 4259 default: 4260 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4261 } 4262 ncnt += bs; 4263 flops += 2.0 * PetscPowInt(bs, 3) / 3.0; 4264 diag += bs * bs; 4265 indj += bs * bs; 4266 indi += bs; 4267 } 4268 PetscCall(PetscLogFlops(flops)); 4269 PetscCall(PetscFree2(v_work, v_pivots)); 4270 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B)); 4271 { 4272 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data; 4273 aij->free_a = PETSC_TRUE; 4274 aij->free_ij = PETSC_TRUE; 4275 } 4276 PetscFunctionReturn(PETSC_SUCCESS); 4277 } 4278 4279 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat) 4280 { 4281 PC_IS *pcis = (PC_IS *)pc->data; 4282 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4283 PCBDDCGraph graph = pcbddc->mat_graph; 4284 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4285 /* submatrices of local problem */ 4286 Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL; 4287 /* submatrices of local coarse problem */ 4288 Mat S_CV = NULL, S_VC = NULL, S_CC = NULL; 4289 /* working matrices */ 4290 Mat C_CR; 4291 4292 /* additional working stuff */ 4293 PC pc_R; 4294 IS is_R, is_V, is_C; 4295 const PetscInt *idx_V, *idx_C; 4296 Mat F, Brhs = NULL; 4297 Vec dummy_vec; 4298 PetscBool isPreonly, isLU, isCHOL, need_benign_correction, sparserhs; 4299 PetscInt *idx_V_B; 4300 PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I; 4301 PetscInt n_eff_vertices, n_eff_constraints; 4302 PetscInt i, n_R, n_D, n_B; 4303 PetscScalar one = 1.0, m_one = -1.0; 4304 4305 /* Multi-element support */ 4306 PetscBool multi_element = graph->multi_element; 4307 PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL; 4308 PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL; 4309 IS is_C_perm = NULL; 4310 PetscInt n_C_bss = 0, *C_bss = NULL; 4311 Mat coarse_phi_multi; 4312 4313 PetscFunctionBegin; 4314 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 4315 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4316 4317 /* Set Non-overlapping dimensions */ 4318 n_vertices = pcbddc->n_vertices; 4319 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 4320 n_B = pcis->n_B; 4321 n_D = pcis->n - n_B; 4322 n_R = pcis->n - n_vertices; 4323 4324 /* vertices in boundary numbering */ 4325 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 4326 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 4327 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 4328 4329 /* these two cases still need to be optimized */ 4330 if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE; 4331 4332 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 4333 if (multi_element) { 4334 PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 4335 4336 PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat)); 4337 PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size)); 4338 PetscCall(MatSetType(*coarse_submat, MATSEQAIJ)); 4339 PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 4340 PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 4341 4342 /* group vertices and constraints by subdomain id */ 4343 const PetscInt *vidxs = pcbddc->primal_indices_local_idxs; 4344 const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices; 4345 PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz; 4346 PetscInt n_el = PetscMax(graph->n_local_subs, 1); 4347 4348 PetscCall(PetscCalloc1(2 * n_el, &count_eff)); 4349 PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V)); 4350 PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C)); 4351 for (PetscInt i = 0; i < n_vertices; i++) { 4352 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4353 4354 V_to_eff_V[i] = count_eff[s]; 4355 count_eff[s] += 1; 4356 } 4357 for (PetscInt i = 0; i < n_constraints; i++) { 4358 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1; 4359 4360 C_to_eff_C[i] = count_eff[s]; 4361 count_eff[s] += 1; 4362 } 4363 4364 /* preallocation */ 4365 PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz)); 4366 for (PetscInt i = 0; i < n_vertices; i++) { 4367 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4368 4369 nnz[i] = count_eff[s] + count_eff[s + 1]; 4370 } 4371 for (PetscInt i = 0; i < n_constraints; i++) { 4372 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub; 4373 4374 nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1]; 4375 } 4376 PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz)); 4377 PetscCall(PetscFree(nnz)); 4378 4379 n_eff_vertices = 0; 4380 n_eff_constraints = 0; 4381 for (PetscInt i = 0; i < n_el; i++) { 4382 n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]); 4383 n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]); 4384 count_eff[2 * i] = 0; 4385 count_eff[2 * i + 1] = 0; 4386 } 4387 4388 const PetscInt *idx; 4389 PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C)); 4390 4391 for (PetscInt i = 0; i < n_vertices; i++) { 4392 const PetscInt e = graph->nodes[vidxs[i]].local_sub; 4393 const PetscInt s = 2 * e; 4394 4395 V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i; 4396 count_eff[s] += 1; 4397 } 4398 for (PetscInt i = 0; i < n_constraints; i++) { 4399 const PetscInt e = graph->nodes[cidxs[i]].local_sub; 4400 const PetscInt s = 2 * e + 1; 4401 4402 C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i; 4403 count_eff[s] += 1; 4404 } 4405 4406 PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J)); 4407 PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J)); 4408 PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J)); 4409 PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J)); 4410 for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1; 4411 for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1; 4412 for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1; 4413 for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1; 4414 4415 PetscCall(ISGetIndices(pcbddc->is_R_local, &idx)); 4416 for (PetscInt i = 0; i < n_R; i++) { 4417 const PetscInt e = graph->nodes[idx[i]].local_sub; 4418 const PetscInt s = 2 * e; 4419 PetscInt j; 4420 4421 for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4422 for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4423 } 4424 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx)); 4425 PetscCall(ISGetIndices(pcis->is_B_local, &idx)); 4426 for (PetscInt i = 0; i < n_B; i++) { 4427 const PetscInt e = graph->nodes[idx[i]].local_sub; 4428 const PetscInt s = 2 * e; 4429 PetscInt j; 4430 4431 for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4432 for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4433 } 4434 PetscCall(ISRestoreIndices(pcis->is_B_local, &idx)); 4435 4436 /* permutation and blocksizes for block invert of S_CC */ 4437 PetscInt *idxp; 4438 4439 PetscCall(PetscMalloc1(n_constraints, &idxp)); 4440 PetscCall(PetscMalloc1(n_el, &C_bss)); 4441 n_C_bss = 0; 4442 for (PetscInt e = 0, cnt = 0; e < n_el; e++) { 4443 const PetscInt nc = count_eff[2 * e + 1]; 4444 4445 if (nc) C_bss[n_C_bss++] = nc; 4446 for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; 4447 cnt += nc; 4448 } 4449 4450 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm)); 4451 4452 PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C)); 4453 PetscCall(PetscFree(count_eff)); 4454 } else { 4455 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat)); 4456 n_eff_constraints = n_constraints; 4457 n_eff_vertices = n_vertices; 4458 } 4459 4460 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 4461 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 4462 PetscCall(PCSetUp(pc_R)); 4463 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->ksp_R, KSPPREONLY, &isPreonly)); 4464 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 4465 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 4466 lda_rhs = n_R; 4467 need_benign_correction = PETSC_FALSE; 4468 F = NULL; 4469 if (isPreonly && (isLU || isCHOL)) { 4470 PetscCall(PCFactorGetMatrix(pc_R, &F)); 4471 } else if (sub_schurs && sub_schurs->reuse_solver) { 4472 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4473 MatFactorType type; 4474 4475 F = reuse_solver->F; 4476 PetscCall(MatGetFactorType(F, &type)); 4477 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 4478 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 4479 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 4480 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 4481 } 4482 4483 /* determine if we can use a sparse right-hand side */ 4484 sparserhs = PETSC_FALSE; 4485 if (F && !multi_element) { 4486 MatSolverType solver; 4487 4488 PetscCall(MatFactorGetSolverType(F, &solver)); 4489 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 4490 } 4491 4492 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 4493 dummy_vec = NULL; 4494 if (need_benign_correction && lda_rhs != n_R && F) { 4495 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 4496 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 4497 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 4498 } 4499 4500 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4501 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4502 4503 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R)); 4504 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V)); 4505 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C)); 4506 PetscCall(ISGetIndices(is_V, &idx_V)); 4507 PetscCall(ISGetIndices(is_C, &idx_C)); 4508 4509 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4510 if (n_constraints) { 4511 Mat C_B; 4512 4513 /* Extract constraints on R nodes: C_{CR} */ 4514 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 4515 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4516 4517 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4518 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4519 if (!sparserhs) { 4520 PetscScalar *marr; 4521 4522 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs)); 4523 PetscCall(MatDenseGetArrayWrite(Brhs, &marr)); 4524 for (i = 0; i < n_constraints; i++) { 4525 const PetscScalar *row_cmat_values; 4526 const PetscInt *row_cmat_indices; 4527 PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i; 4528 4529 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4530 for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j]; 4531 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4532 } 4533 PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr)); 4534 } else { 4535 Mat tC_CR; 4536 4537 PetscCall(MatScale(C_CR, -1.0)); 4538 if (lda_rhs != n_R) { 4539 PetscScalar *aa; 4540 PetscInt r, *ii, *jj; 4541 PetscBool done; 4542 4543 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4544 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4545 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 4546 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 4547 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4548 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4549 } else { 4550 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4551 tC_CR = C_CR; 4552 } 4553 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 4554 PetscCall(MatDestroy(&tC_CR)); 4555 } 4556 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R)); 4557 if (F) { 4558 if (need_benign_correction) { 4559 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4560 4561 /* rhs is already zero on interior dofs, no need to change the rhs */ 4562 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 4563 } 4564 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 4565 if (need_benign_correction) { 4566 PetscScalar *marr; 4567 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4568 4569 /* XXX multi_element? */ 4570 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4571 if (lda_rhs != n_R) { 4572 for (i = 0; i < n_eff_constraints; i++) { 4573 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4574 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4575 PetscCall(VecResetArray(dummy_vec)); 4576 } 4577 } else { 4578 for (i = 0; i < n_eff_constraints; i++) { 4579 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4580 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4581 PetscCall(VecResetArray(pcbddc->vec1_R)); 4582 } 4583 } 4584 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4585 } 4586 } else { 4587 const PetscScalar *barr; 4588 PetscScalar *marr; 4589 4590 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4591 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4592 for (i = 0; i < n_eff_constraints; i++) { 4593 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4594 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4595 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4596 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4597 PetscCall(VecResetArray(pcbddc->vec1_R)); 4598 PetscCall(VecResetArray(pcbddc->vec2_R)); 4599 } 4600 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4601 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4602 } 4603 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4604 PetscCall(MatDestroy(&Brhs)); 4605 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4606 if (!pcbddc->switch_static) { 4607 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2)); 4608 PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, local_auxmat2_R, pcbddc->local_auxmat2, INSERT_VALUES, SCATTER_FORWARD)); 4609 if (multi_element) { 4610 Mat T; 4611 4612 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4613 PetscCall(MatDestroy(&local_auxmat2_R)); 4614 local_auxmat2_R = T; 4615 PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T)); 4616 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4617 pcbddc->local_auxmat2 = T; 4618 } 4619 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4620 } else { 4621 if (multi_element) { 4622 Mat T; 4623 4624 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4625 PetscCall(MatDestroy(&local_auxmat2_R)); 4626 local_auxmat2_R = T; 4627 } 4628 if (lda_rhs != n_R) { 4629 PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4630 } else { 4631 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4632 pcbddc->local_auxmat2 = local_auxmat2_R; 4633 } 4634 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4635 } 4636 PetscCall(MatScale(S_CC, m_one)); 4637 if (multi_element) { 4638 Mat T, T2; 4639 IS isp, ispi; 4640 4641 isp = is_C_perm; 4642 4643 PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi)); 4644 PetscCall(MatPermute(S_CC, isp, isp, &T)); 4645 PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2)); 4646 PetscCall(MatDestroy(&T)); 4647 PetscCall(MatDestroy(&S_CC)); 4648 PetscCall(MatPermute(T2, ispi, ispi, &S_CC)); 4649 PetscCall(MatDestroy(&T2)); 4650 PetscCall(ISDestroy(&ispi)); 4651 } else { 4652 if (isCHOL) { 4653 PetscCall(MatCholeskyFactor(S_CC, NULL, NULL)); 4654 } else { 4655 PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL)); 4656 } 4657 PetscCall(MatSeqDenseInvertFactors_Private(S_CC)); 4658 } 4659 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4660 PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1)); 4661 PetscCall(MatDestroy(&C_B)); 4662 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES)); 4663 } 4664 4665 /* Get submatrices from subdomain matrix */ 4666 if (n_vertices) { 4667 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4668 PetscBool oldpin; 4669 #endif 4670 IS is_aux; 4671 4672 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4673 IS tis; 4674 4675 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4676 PetscCall(ISSort(tis)); 4677 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4678 PetscCall(ISDestroy(&tis)); 4679 } else { 4680 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4681 } 4682 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4683 oldpin = pcbddc->local_mat->boundtocpu; 4684 #endif 4685 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4686 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4687 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4688 /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4689 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4690 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4691 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4692 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4693 #endif 4694 PetscCall(ISDestroy(&is_aux)); 4695 } 4696 PetscCall(ISDestroy(&is_C_perm)); 4697 PetscCall(PetscFree(C_bss)); 4698 4699 p0_lidx_I = NULL; 4700 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4701 const PetscInt *idxs; 4702 4703 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4704 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4705 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4706 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4707 } 4708 4709 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4710 4711 /* Matrices of coarse basis functions (local) */ 4712 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4713 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4714 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4715 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4716 if (!multi_element) { 4717 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B)); 4718 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D)); 4719 coarse_phi_multi = NULL; 4720 } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */ 4721 IS is_rows[2] = {pcbddc->is_R_local, NULL}; 4722 IS is_cols[2] = {is_V, is_C}; 4723 4724 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1])); 4725 PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi)); 4726 PetscCall(ISDestroy(&is_rows[1])); 4727 } 4728 4729 /* vertices */ 4730 if (n_vertices) { 4731 PetscBool restoreavr = PETSC_FALSE; 4732 Mat A_RRmA_RV = NULL; 4733 4734 PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4735 PetscCall(MatDestroy(&A_VV)); 4736 4737 if (n_R) { 4738 Mat A_RV_bcorr = NULL, S_VV; 4739 4740 PetscCall(MatScale(A_RV, m_one)); 4741 if (need_benign_correction) { 4742 ISLocalToGlobalMapping RtoN; 4743 IS is_p0; 4744 PetscInt *idxs_p0, n; 4745 4746 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4747 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4748 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4749 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4750 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4751 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4752 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4753 PetscCall(ISDestroy(&is_p0)); 4754 } 4755 4756 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV)); 4757 if (!sparserhs || need_benign_correction) { 4758 if (lda_rhs == n_R && !multi_element) { 4759 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4760 } else { 4761 Mat T; 4762 PetscScalar *av, *array; 4763 const PetscInt *xadj, *adjncy; 4764 PetscInt n; 4765 PetscBool flg_row; 4766 4767 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T)); 4768 PetscCall(MatDenseGetArrayWrite(T, &array)); 4769 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4770 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4771 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4772 for (i = 0; i < n; i++) { 4773 PetscInt j; 4774 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j]; 4775 } 4776 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4777 PetscCall(MatDenseRestoreArrayWrite(T, &array)); 4778 PetscCall(MatDestroy(&A_RV)); 4779 A_RV = T; 4780 } 4781 if (need_benign_correction) { 4782 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4783 PetscScalar *marr; 4784 4785 /* XXX multi_element */ 4786 PetscCall(MatDenseGetArray(A_RV, &marr)); 4787 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4788 4789 | 0 0 0 | (V) 4790 L = | 0 0 -1 | (P-p0) 4791 | 0 0 -1 | (p0) 4792 4793 */ 4794 for (i = 0; i < reuse_solver->benign_n; i++) { 4795 const PetscScalar *vals; 4796 const PetscInt *idxs, *idxs_zero; 4797 PetscInt n, j, nz; 4798 4799 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4800 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4801 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4802 for (j = 0; j < n; j++) { 4803 PetscScalar val = vals[j]; 4804 PetscInt k, col = idxs[j]; 4805 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4806 } 4807 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4808 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4809 } 4810 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4811 } 4812 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4813 Brhs = A_RV; 4814 } else { 4815 Mat tA_RVT, A_RVT; 4816 4817 if (!pcbddc->symmetric_primal) { 4818 /* A_RV already scaled by -1 */ 4819 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4820 } else { 4821 restoreavr = PETSC_TRUE; 4822 PetscCall(MatScale(A_VR, -1.0)); 4823 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4824 A_RVT = A_VR; 4825 } 4826 if (lda_rhs != n_R) { 4827 PetscScalar *aa; 4828 PetscInt r, *ii, *jj; 4829 PetscBool done; 4830 4831 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4832 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4833 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4834 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4835 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4836 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4837 } else { 4838 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4839 tA_RVT = A_RVT; 4840 } 4841 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4842 PetscCall(MatDestroy(&tA_RVT)); 4843 PetscCall(MatDestroy(&A_RVT)); 4844 } 4845 if (F) { 4846 /* need to correct the rhs */ 4847 if (need_benign_correction) { 4848 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4849 PetscScalar *marr; 4850 4851 PetscCall(MatDenseGetArray(Brhs, &marr)); 4852 if (lda_rhs != n_R) { 4853 for (i = 0; i < n_eff_vertices; i++) { 4854 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4855 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4856 PetscCall(VecResetArray(dummy_vec)); 4857 } 4858 } else { 4859 for (i = 0; i < n_eff_vertices; i++) { 4860 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4861 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4862 PetscCall(VecResetArray(pcbddc->vec1_R)); 4863 } 4864 } 4865 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4866 } 4867 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4868 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4869 /* need to correct the solution */ 4870 if (need_benign_correction) { 4871 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4872 PetscScalar *marr; 4873 4874 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4875 if (lda_rhs != n_R) { 4876 for (i = 0; i < n_eff_vertices; i++) { 4877 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4878 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4879 PetscCall(VecResetArray(dummy_vec)); 4880 } 4881 } else { 4882 for (i = 0; i < n_eff_vertices; i++) { 4883 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4884 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4885 PetscCall(VecResetArray(pcbddc->vec1_R)); 4886 } 4887 } 4888 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4889 } 4890 } else { 4891 const PetscScalar *barr; 4892 PetscScalar *marr; 4893 4894 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4895 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4896 for (i = 0; i < n_eff_vertices; i++) { 4897 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4898 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4899 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4900 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4901 PetscCall(VecResetArray(pcbddc->vec1_R)); 4902 PetscCall(VecResetArray(pcbddc->vec2_R)); 4903 } 4904 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4905 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4906 } 4907 PetscCall(MatDestroy(&A_RV)); 4908 PetscCall(MatDestroy(&Brhs)); 4909 /* S_VV and S_CV */ 4910 if (n_constraints) { 4911 Mat B; 4912 4913 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B)); 4914 PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, A_RRmA_RV, B, INSERT_VALUES, SCATTER_FORWARD)); 4915 4916 /* S_CV = pcbddc->local_auxmat1 * B */ 4917 if (multi_element) { 4918 Mat T; 4919 4920 PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T)); 4921 PetscCall(MatDestroy(&B)); 4922 B = T; 4923 } 4924 PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV)); 4925 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4926 PetscCall(MatProductSetFromOptions(S_CV)); 4927 PetscCall(MatProductSymbolic(S_CV)); 4928 PetscCall(MatProductNumeric(S_CV)); 4929 PetscCall(MatProductClear(S_CV)); 4930 PetscCall(MatDestroy(&B)); 4931 4932 /* B = local_auxmat2_R * S_CV */ 4933 PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B)); 4934 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4935 PetscCall(MatProductSetFromOptions(B)); 4936 PetscCall(MatProductSymbolic(B)); 4937 PetscCall(MatProductNumeric(B)); 4938 4939 PetscCall(MatScale(S_CV, m_one)); 4940 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES)); 4941 4942 if (multi_element) { 4943 Mat T; 4944 4945 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4946 PetscCall(MatDestroy(&A_RRmA_RV)); 4947 A_RRmA_RV = T; 4948 } 4949 PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */ 4950 PetscCall(MatDestroy(&B)); 4951 } else if (multi_element) { 4952 Mat T; 4953 4954 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4955 PetscCall(MatDestroy(&A_RRmA_RV)); 4956 A_RRmA_RV = T; 4957 } 4958 4959 if (lda_rhs != n_R) { 4960 Mat T; 4961 4962 PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T)); 4963 PetscCall(MatDestroy(&A_RRmA_RV)); 4964 A_RRmA_RV = T; 4965 } 4966 4967 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4968 if (need_benign_correction) { /* XXX SPARSE */ 4969 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4970 PetscScalar *sums; 4971 const PetscScalar *marr; 4972 4973 PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr)); 4974 PetscCall(PetscMalloc1(n_vertices, &sums)); 4975 for (i = 0; i < reuse_solver->benign_n; i++) { 4976 const PetscScalar *vals; 4977 const PetscInt *idxs, *idxs_zero; 4978 PetscInt n, j, nz; 4979 4980 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4981 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4982 for (j = 0; j < n_vertices; j++) { 4983 sums[j] = 0.; 4984 for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R]; 4985 } 4986 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4987 for (j = 0; j < n; j++) { 4988 PetscScalar val = vals[j]; 4989 for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES)); 4990 } 4991 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4992 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4993 } 4994 PetscCall(PetscFree(sums)); 4995 PetscCall(MatDestroy(&A_RV_bcorr)); 4996 PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr)); 4997 } 4998 4999 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV)); 5000 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 5001 PetscCall(MatDestroy(&S_VV)); 5002 } 5003 5004 /* coarse basis functions */ 5005 if (coarse_phi_multi) { 5006 Mat Vid; 5007 5008 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid)); 5009 PetscCall(MatShift_Basic(Vid, 1.0)); 5010 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV)); 5011 PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid)); 5012 PetscCall(MatDestroy(&Vid)); 5013 } else { 5014 if (A_RRmA_RV) { 5015 Mat B; 5016 5017 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, 0, n_vertices, &B)); 5018 PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, A_RRmA_RV, B, INSERT_VALUES, SCATTER_FORWARD)); 5019 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B)); 5020 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5021 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, 0, n_vertices, &B)); 5022 PetscCall(MatDenseScatter_Private(pcbddc->R_to_D, A_RRmA_RV, B, INSERT_VALUES, SCATTER_FORWARD)); 5023 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B)); 5024 if (pcbddc->benign_n) { 5025 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); 5026 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 5027 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 5028 } 5029 } 5030 } 5031 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES)); 5032 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 5033 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 5034 } 5035 PetscCall(MatDestroy(&A_RRmA_RV)); 5036 } 5037 PetscCall(MatDestroy(&A_RV)); 5038 PetscCall(VecDestroy(&dummy_vec)); 5039 5040 if (n_constraints) { 5041 Mat B, B2; 5042 5043 PetscCall(MatScale(S_CC, m_one)); 5044 PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B)); 5045 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 5046 PetscCall(MatProductSetFromOptions(B)); 5047 PetscCall(MatProductSymbolic(B)); 5048 PetscCall(MatProductNumeric(B)); 5049 5050 if (n_vertices) { 5051 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 5052 PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC)); 5053 } else { 5054 if (lda_rhs != n_R) { 5055 Mat tB; 5056 5057 PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB)); 5058 PetscCall(MatDestroy(&B)); 5059 B = tB; 5060 } 5061 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC)); 5062 } 5063 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES)); 5064 } 5065 5066 /* coarse basis functions */ 5067 if (coarse_phi_multi) { 5068 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B)); 5069 } else { 5070 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 5071 PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, B, B2, INSERT_VALUES, SCATTER_FORWARD)); 5072 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2)); 5073 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5074 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 5075 PetscCall(MatDenseScatter_Private(pcbddc->R_to_D, B, B2, INSERT_VALUES, SCATTER_FORWARD)); 5076 if (pcbddc->benign_n) { 5077 for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); 5078 } 5079 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2)); 5080 } 5081 } 5082 PetscCall(MatDestroy(&B)); 5083 } 5084 5085 /* assemble sparse coarse basis functions */ 5086 if (coarse_phi_multi) { 5087 Mat T; 5088 5089 PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T)); 5090 PetscCall(MatDestroy(&coarse_phi_multi)); 5091 PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B)); 5092 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); 5093 PetscCall(MatDestroy(&T)); 5094 } 5095 PetscCall(MatDestroy(&local_auxmat2_R)); 5096 PetscCall(PetscFree(p0_lidx_I)); 5097 5098 /* coarse matrix entries relative to B_0 */ 5099 if (pcbddc->benign_n) { 5100 Mat B0_B, B0_BPHI; 5101 IS is_dummy; 5102 const PetscScalar *data; 5103 PetscInt j; 5104 5105 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5106 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5107 PetscCall(ISDestroy(&is_dummy)); 5108 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5109 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5110 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 5111 for (j = 0; j < pcbddc->benign_n; j++) { 5112 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5113 for (i = 0; i < pcbddc->local_primal_size; i++) { 5114 PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5115 PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5116 } 5117 } 5118 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 5119 PetscCall(MatDestroy(&B0_B)); 5120 PetscCall(MatDestroy(&B0_BPHI)); 5121 } 5122 5123 /* compute other basis functions for non-symmetric problems */ 5124 if (!pcbddc->symmetric_primal) { 5125 Mat B_V = NULL, B_C = NULL; 5126 PetscScalar *marray, *work; 5127 5128 /* TODO multi_element MatDenseScatter */ 5129 if (n_constraints) { 5130 Mat S_CCT, C_CRT; 5131 5132 PetscCall(MatScale(S_CC, m_one)); 5133 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 5134 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 5135 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C)); 5136 PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C)); 5137 PetscCall(MatDestroy(&S_CCT)); 5138 if (n_vertices) { 5139 Mat S_VCT; 5140 5141 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 5142 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V)); 5143 PetscCall(MatDestroy(&S_VCT)); 5144 PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V)); 5145 } 5146 PetscCall(MatDestroy(&C_CRT)); 5147 } else { 5148 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 5149 } 5150 if (n_vertices && n_R) { 5151 PetscScalar *av, *marray; 5152 const PetscInt *xadj, *adjncy; 5153 PetscInt n; 5154 PetscBool flg_row; 5155 5156 /* B_V = B_V - A_VR^T */ 5157 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 5158 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5159 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 5160 PetscCall(MatDenseGetArray(B_V, &marray)); 5161 for (i = 0; i < n; i++) { 5162 PetscInt j; 5163 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 5164 } 5165 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5166 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5167 PetscCall(MatDestroy(&A_VR)); 5168 } 5169 5170 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 5171 PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work)); 5172 if (n_vertices) { 5173 PetscCall(MatDenseGetArray(B_V, &marray)); 5174 for (i = 0; i < n_vertices; i++) { 5175 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 5176 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5177 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5178 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5179 PetscCall(VecResetArray(pcbddc->vec1_R)); 5180 PetscCall(VecResetArray(pcbddc->vec2_R)); 5181 } 5182 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5183 } 5184 if (B_C) { 5185 PetscCall(MatDenseGetArray(B_C, &marray)); 5186 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 5187 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 5188 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5189 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5190 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5191 PetscCall(VecResetArray(pcbddc->vec1_R)); 5192 PetscCall(VecResetArray(pcbddc->vec2_R)); 5193 } 5194 PetscCall(MatDenseRestoreArray(B_C, &marray)); 5195 } 5196 /* coarse basis functions */ 5197 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B)); 5198 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D)); 5199 for (i = 0; i < pcbddc->local_primal_size; i++) { 5200 Vec v; 5201 5202 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 5203 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 5204 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5205 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5206 if (i < n_vertices) { 5207 PetscScalar one = 1.0; 5208 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 5209 PetscCall(VecAssemblyBegin(v)); 5210 PetscCall(VecAssemblyEnd(v)); 5211 } 5212 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 5213 5214 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5215 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 5216 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5217 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5218 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 5219 } 5220 PetscCall(VecResetArray(pcbddc->vec1_R)); 5221 } 5222 PetscCall(MatDestroy(&B_V)); 5223 PetscCall(MatDestroy(&B_C)); 5224 PetscCall(PetscFree(work)); 5225 } else { 5226 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 5227 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 5228 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 5229 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 5230 } 5231 PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5232 PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5233 5234 /* free memory */ 5235 PetscCall(PetscFree(V_to_eff_V)); 5236 PetscCall(PetscFree(C_to_eff_C)); 5237 PetscCall(PetscFree(R_eff_V_J)); 5238 PetscCall(PetscFree(R_eff_C_J)); 5239 PetscCall(PetscFree(B_eff_V_J)); 5240 PetscCall(PetscFree(B_eff_C_J)); 5241 PetscCall(ISDestroy(&is_R)); 5242 PetscCall(ISRestoreIndices(is_V, &idx_V)); 5243 PetscCall(ISRestoreIndices(is_C, &idx_C)); 5244 PetscCall(ISDestroy(&is_V)); 5245 PetscCall(ISDestroy(&is_C)); 5246 PetscCall(PetscFree(idx_V_B)); 5247 PetscCall(MatDestroy(&S_CV)); 5248 PetscCall(MatDestroy(&S_VC)); 5249 PetscCall(MatDestroy(&S_CC)); 5250 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 5251 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 5252 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 5253 5254 /* Checking coarse_sub_mat and coarse basis functions */ 5255 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5256 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5257 if (pcbddc->dbg_flag) { 5258 Mat AUXMAT, TM1, TM2, TM3, TM4; 5259 Mat coarse_phi_D, coarse_phi_B; 5260 Mat coarse_psi_D, coarse_psi_B; 5261 Mat A_II, A_BB, A_IB, A_BI; 5262 Mat C_B, CPHI; 5263 IS is_dummy; 5264 Vec mones; 5265 MatType checkmattype = MATSEQAIJ; 5266 PetscReal real_value; 5267 5268 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5269 Mat A; 5270 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 5271 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 5272 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 5273 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 5274 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 5275 PetscCall(MatDestroy(&A)); 5276 } else { 5277 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 5278 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 5279 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 5280 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 5281 } 5282 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 5283 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 5284 if (!pcbddc->symmetric_primal) { 5285 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 5286 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 5287 } 5288 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5289 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 5290 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5291 if (!pcbddc->symmetric_primal) { 5292 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5293 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5294 PetscCall(MatDestroy(&AUXMAT)); 5295 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5296 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5297 PetscCall(MatDestroy(&AUXMAT)); 5298 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5299 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5300 PetscCall(MatDestroy(&AUXMAT)); 5301 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5302 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5303 PetscCall(MatDestroy(&AUXMAT)); 5304 } else { 5305 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5306 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5307 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5308 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5309 PetscCall(MatDestroy(&AUXMAT)); 5310 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5311 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5312 PetscCall(MatDestroy(&AUXMAT)); 5313 } 5314 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 5315 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 5316 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 5317 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 5318 if (pcbddc->benign_n) { 5319 Mat B0_B, B0_BPHI; 5320 const PetscScalar *data2; 5321 PetscScalar *data; 5322 PetscInt j; 5323 5324 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5325 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5326 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5327 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5328 PetscCall(MatDenseGetArray(TM1, &data)); 5329 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 5330 for (j = 0; j < pcbddc->benign_n; j++) { 5331 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5332 for (i = 0; i < pcbddc->local_primal_size; i++) { 5333 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 5334 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 5335 } 5336 } 5337 PetscCall(MatDenseRestoreArray(TM1, &data)); 5338 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 5339 PetscCall(MatDestroy(&B0_B)); 5340 PetscCall(ISDestroy(&is_dummy)); 5341 PetscCall(MatDestroy(&B0_BPHI)); 5342 } 5343 PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN)); 5344 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 5345 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5346 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5347 5348 /* check constraints */ 5349 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 5350 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 5351 if (!pcbddc->benign_n) { /* TODO: add benign case */ 5352 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5353 } else { 5354 PetscScalar *data; 5355 Mat tmat; 5356 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 5357 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 5358 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 5359 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5360 PetscCall(MatDestroy(&tmat)); 5361 } 5362 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 5363 PetscCall(VecSet(mones, -1.0)); 5364 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5365 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5366 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5367 if (!pcbddc->symmetric_primal) { 5368 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 5369 PetscCall(VecSet(mones, -1.0)); 5370 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5371 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5372 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5373 } 5374 PetscCall(MatDestroy(&C_B)); 5375 PetscCall(MatDestroy(&CPHI)); 5376 PetscCall(ISDestroy(&is_dummy)); 5377 PetscCall(VecDestroy(&mones)); 5378 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5379 PetscCall(MatDestroy(&A_II)); 5380 PetscCall(MatDestroy(&A_BB)); 5381 PetscCall(MatDestroy(&A_IB)); 5382 PetscCall(MatDestroy(&A_BI)); 5383 PetscCall(MatDestroy(&TM1)); 5384 PetscCall(MatDestroy(&TM2)); 5385 PetscCall(MatDestroy(&TM3)); 5386 PetscCall(MatDestroy(&TM4)); 5387 PetscCall(MatDestroy(&coarse_phi_D)); 5388 PetscCall(MatDestroy(&coarse_phi_B)); 5389 if (!pcbddc->symmetric_primal) { 5390 PetscCall(MatDestroy(&coarse_psi_D)); 5391 PetscCall(MatDestroy(&coarse_psi_B)); 5392 } 5393 } 5394 5395 #if 0 5396 { 5397 PetscViewer viewer; 5398 char filename[256]; 5399 5400 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 5401 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 5402 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 5403 PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat")); 5404 PetscCall(MatView(*coarse_submat,viewer)); 5405 if (pcbddc->coarse_phi_B) { 5406 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 5407 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 5408 } 5409 if (pcbddc->coarse_phi_D) { 5410 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 5411 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 5412 } 5413 if (pcbddc->coarse_psi_B) { 5414 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 5415 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 5416 } 5417 if (pcbddc->coarse_psi_D) { 5418 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 5419 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 5420 } 5421 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 5422 PetscCall(MatView(pcbddc->local_mat,viewer)); 5423 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 5424 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 5425 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 5426 PetscCall(ISView(pcis->is_I_local,viewer)); 5427 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 5428 PetscCall(ISView(pcis->is_B_local,viewer)); 5429 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 5430 PetscCall(ISView(pcbddc->is_R_local,viewer)); 5431 PetscCall(PetscViewerDestroy(&viewer)); 5432 } 5433 #endif 5434 5435 /* device support */ 5436 { 5437 PetscBool iscuda, iship, iskokkos; 5438 MatType mtype = NULL; 5439 5440 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, "")); 5441 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, "")); 5442 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, "")); 5443 if (iskokkos) { 5444 if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE; 5445 else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE; 5446 } 5447 if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP); 5448 else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP; 5449 else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA; 5450 if (mtype) { 5451 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 5452 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 5453 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 5454 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 5455 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 5456 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 5457 } 5458 } 5459 PetscFunctionReturn(PETSC_SUCCESS); 5460 } 5461 5462 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 5463 { 5464 Mat *work_mat; 5465 IS isrow_s, iscol_s; 5466 PetscBool rsorted, csorted; 5467 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 5468 5469 PetscFunctionBegin; 5470 PetscCall(ISSorted(isrow, &rsorted)); 5471 PetscCall(ISSorted(iscol, &csorted)); 5472 PetscCall(ISGetLocalSize(isrow, &rsize)); 5473 PetscCall(ISGetLocalSize(iscol, &csize)); 5474 5475 if (!rsorted) { 5476 const PetscInt *idxs; 5477 PetscInt *idxs_sorted, i; 5478 5479 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 5480 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 5481 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 5482 PetscCall(ISGetIndices(isrow, &idxs)); 5483 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 5484 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 5485 PetscCall(ISRestoreIndices(isrow, &idxs)); 5486 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 5487 } else { 5488 PetscCall(PetscObjectReference((PetscObject)isrow)); 5489 isrow_s = isrow; 5490 } 5491 5492 if (!csorted) { 5493 if (isrow == iscol) { 5494 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 5495 iscol_s = isrow_s; 5496 } else { 5497 const PetscInt *idxs; 5498 PetscInt *idxs_sorted, i; 5499 5500 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 5501 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 5502 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 5503 PetscCall(ISGetIndices(iscol, &idxs)); 5504 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 5505 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5506 PetscCall(ISRestoreIndices(iscol, &idxs)); 5507 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 5508 } 5509 } else { 5510 PetscCall(PetscObjectReference((PetscObject)iscol)); 5511 iscol_s = iscol; 5512 } 5513 5514 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 5515 5516 if (!rsorted || !csorted) { 5517 Mat new_mat; 5518 IS is_perm_r, is_perm_c; 5519 5520 if (!rsorted) { 5521 PetscInt *idxs_r, i; 5522 PetscCall(PetscMalloc1(rsize, &idxs_r)); 5523 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 5524 PetscCall(PetscFree(idxs_perm_r)); 5525 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 5526 } else { 5527 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 5528 } 5529 PetscCall(ISSetPermutation(is_perm_r)); 5530 5531 if (!csorted) { 5532 if (isrow_s == iscol_s) { 5533 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5534 is_perm_c = is_perm_r; 5535 } else { 5536 PetscInt *idxs_c, i; 5537 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 5538 PetscCall(PetscMalloc1(csize, &idxs_c)); 5539 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 5540 PetscCall(PetscFree(idxs_perm_c)); 5541 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 5542 } 5543 } else { 5544 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 5545 } 5546 PetscCall(ISSetPermutation(is_perm_c)); 5547 5548 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 5549 PetscCall(MatDestroy(&work_mat[0])); 5550 work_mat[0] = new_mat; 5551 PetscCall(ISDestroy(&is_perm_r)); 5552 PetscCall(ISDestroy(&is_perm_c)); 5553 } 5554 5555 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5556 *B = work_mat[0]; 5557 PetscCall(MatDestroyMatrices(1, &work_mat)); 5558 PetscCall(ISDestroy(&isrow_s)); 5559 PetscCall(ISDestroy(&iscol_s)); 5560 PetscFunctionReturn(PETSC_SUCCESS); 5561 } 5562 5563 static PetscErrorCode MatPtAPWithPrefix_Private(Mat A, Mat P, PetscReal fill, const char *prefix, Mat *C) 5564 { 5565 PetscFunctionBegin; 5566 PetscCall(MatProductCreate(A, P, NULL, C)); 5567 PetscCall(MatProductSetType(*C, MATPRODUCT_PtAP)); 5568 PetscCall(MatProductSetAlgorithm(*C, "default")); 5569 PetscCall(MatProductSetFill(*C, fill)); 5570 PetscCall(MatSetOptionsPrefix(*C, prefix)); 5571 PetscCall(MatProductSetFromOptions(*C)); 5572 PetscCall(MatProductSymbolic(*C)); 5573 PetscCall(MatProductNumeric(*C)); 5574 (*C)->symmetric = A->symmetric; 5575 (*C)->spd = A->spd; 5576 PetscFunctionReturn(PETSC_SUCCESS); 5577 } 5578 5579 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5580 { 5581 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5582 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5583 Mat new_mat, lA; 5584 IS is_local, is_global; 5585 PetscInt local_size; 5586 PetscBool isseqaij, issym, isset; 5587 char ptapprefix[256]; 5588 5589 PetscFunctionBegin; 5590 PetscCall(MatDestroy(&pcbddc->local_mat)); 5591 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 5592 if (pcbddc->mat_graph->multi_element) { 5593 Mat *mats, *bdiags; 5594 IS *gsubs; 5595 PetscInt nsubs = pcbddc->n_local_subs; 5596 5597 PetscCall(PetscCalloc1(nsubs * nsubs, &mats)); 5598 #if 1 5599 PetscCall(PetscMalloc1(nsubs, &gsubs)); 5600 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i])); 5601 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags)); 5602 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i])); 5603 PetscCall(PetscFree(gsubs)); 5604 #else /* this does not work since MatCreateSubMatrices does not support repeated indices */ 5605 Mat *tmats; 5606 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5607 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5608 PetscCall(ISDestroy(&is_local)); 5609 PetscCall(MatSetOption(ChangeOfBasisMatrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE)); 5610 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, 1, &is_global, &is_global, MAT_INITIAL_MATRIX, &tmats)); 5611 PetscCall(ISDestroy(&is_global)); 5612 PetscCall(MatCreateSubMatrices(tmats[0], nsubs, pcbddc->local_subs, pcbddc->local_subs, MAT_INITIAL_MATRIX, &bdiags)); 5613 PetscCall(MatDestroySubMatrices(1, &tmats)); 5614 #endif 5615 for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i]; 5616 PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat)); 5617 PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat)); 5618 PetscCall(MatDestroySubMatrices(nsubs, &bdiags)); 5619 PetscCall(PetscFree(mats)); 5620 } else { 5621 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5622 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5623 PetscCall(ISDestroy(&is_local)); 5624 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 5625 PetscCall(ISDestroy(&is_global)); 5626 } 5627 if (pcbddc->dbg_flag) { 5628 Vec x, x_change; 5629 PetscReal error; 5630 5631 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 5632 PetscCall(VecSetRandom(x, NULL)); 5633 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 5634 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5635 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5636 PetscCall(MatMult(new_mat, matis->x, matis->y)); 5637 if (!pcbddc->change_interior) { 5638 const PetscScalar *x, *y, *v; 5639 PetscReal lerror = 0.; 5640 PetscInt i; 5641 5642 PetscCall(VecGetArrayRead(matis->x, &x)); 5643 PetscCall(VecGetArrayRead(matis->y, &y)); 5644 PetscCall(VecGetArrayRead(matis->counter, &v)); 5645 for (i = 0; i < local_size; i++) 5646 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 5647 PetscCall(VecRestoreArrayRead(matis->x, &x)); 5648 PetscCall(VecRestoreArrayRead(matis->y, &y)); 5649 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 5650 PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 5651 if (error > PETSC_SMALL) { 5652 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5653 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 5654 } else { 5655 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 5656 } 5657 } 5658 } 5659 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5660 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5661 PetscCall(VecAXPY(x, -1.0, x_change)); 5662 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 5663 if (error > PETSC_SMALL) { 5664 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5665 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 5666 } else { 5667 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 5668 } 5669 } 5670 PetscCall(VecDestroy(&x)); 5671 PetscCall(VecDestroy(&x_change)); 5672 } 5673 5674 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5675 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5676 5677 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5678 if (((PetscObject)pc)->prefix) PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "%spc_bddc_change_", ((PetscObject)pc)->prefix)); 5679 else PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "pc_bddc_change_")); 5680 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5681 if (isseqaij) { 5682 PetscCall(MatDestroy(&pcbddc->local_mat)); 5683 PetscCall(MatPtAPWithPrefix_Private(matis->A, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat)); 5684 if (lA) { 5685 Mat work; 5686 PetscCall(MatPtAPWithPrefix_Private(lA, new_mat, PETSC_DEFAULT, ptapprefix, &work)); 5687 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5688 PetscCall(MatDestroy(&work)); 5689 } 5690 } else { 5691 Mat work_mat; 5692 5693 PetscCall(MatDestroy(&pcbddc->local_mat)); 5694 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5695 PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat)); 5696 PetscCall(MatDestroy(&work_mat)); 5697 if (lA) { 5698 Mat work; 5699 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5700 PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &work)); 5701 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5702 PetscCall(MatDestroy(&work)); 5703 } 5704 } 5705 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5706 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5707 PetscCall(MatDestroy(&new_mat)); 5708 PetscFunctionReturn(PETSC_SUCCESS); 5709 } 5710 5711 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5712 { 5713 PC_IS *pcis = (PC_IS *)pc->data; 5714 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5715 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5716 PetscInt *idx_R_local = NULL; 5717 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5718 PetscInt vbs, bs; 5719 PetscBT bitmask = NULL; 5720 5721 PetscFunctionBegin; 5722 /* 5723 No need to setup local scatters if 5724 - primal space is unchanged 5725 AND 5726 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5727 AND 5728 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5729 */ 5730 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5731 /* destroy old objects */ 5732 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5733 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5734 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5735 /* Set Non-overlapping dimensions */ 5736 n_B = pcis->n_B; 5737 n_D = pcis->n - n_B; 5738 n_vertices = pcbddc->n_vertices; 5739 5740 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5741 5742 /* create auxiliary bitmask and allocate workspace */ 5743 if (!sub_schurs || !sub_schurs->reuse_solver) { 5744 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5745 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5746 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5747 5748 for (i = 0, n_R = 0; i < pcis->n; i++) { 5749 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5750 } 5751 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5752 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5753 5754 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5755 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5756 } 5757 5758 /* Block code */ 5759 vbs = 1; 5760 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5761 if (bs > 1 && !(n_vertices % bs)) { 5762 PetscBool is_blocked = PETSC_TRUE; 5763 PetscInt *vary; 5764 if (!sub_schurs || !sub_schurs->reuse_solver) { 5765 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5766 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5767 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5768 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5769 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5770 for (i = 0; i < pcis->n / bs; i++) { 5771 if (vary[i] != 0 && vary[i] != bs) { 5772 is_blocked = PETSC_FALSE; 5773 break; 5774 } 5775 } 5776 PetscCall(PetscFree(vary)); 5777 } else { 5778 /* Verify directly the R set */ 5779 for (i = 0; i < n_R / bs; i++) { 5780 PetscInt j, node = idx_R_local[bs * i]; 5781 for (j = 1; j < bs; j++) { 5782 if (node != idx_R_local[bs * i + j] - j) { 5783 is_blocked = PETSC_FALSE; 5784 break; 5785 } 5786 } 5787 } 5788 } 5789 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5790 vbs = bs; 5791 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5792 } 5793 } 5794 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5795 if (sub_schurs && sub_schurs->reuse_solver) { 5796 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5797 5798 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5799 PetscCall(ISDestroy(&reuse_solver->is_R)); 5800 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5801 reuse_solver->is_R = pcbddc->is_R_local; 5802 } else { 5803 PetscCall(PetscFree(idx_R_local)); 5804 } 5805 5806 /* print some info if requested */ 5807 if (pcbddc->dbg_flag) { 5808 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5809 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5810 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5811 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5812 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5813 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5814 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5815 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5816 } 5817 5818 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5819 if (!sub_schurs || !sub_schurs->reuse_solver) { 5820 IS is_aux1, is_aux2; 5821 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5822 5823 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5824 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5825 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5826 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5827 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5828 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5829 for (i = 0, j = 0; i < n_R; i++) { 5830 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5831 } 5832 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5833 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5834 for (i = 0, j = 0; i < n_B; i++) { 5835 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5836 } 5837 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5838 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5839 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5840 PetscCall(ISDestroy(&is_aux1)); 5841 PetscCall(ISDestroy(&is_aux2)); 5842 5843 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5844 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5845 for (i = 0, j = 0; i < n_R; i++) { 5846 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5847 } 5848 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5849 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5850 PetscCall(ISDestroy(&is_aux1)); 5851 } 5852 PetscCall(PetscBTDestroy(&bitmask)); 5853 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5854 } else { 5855 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5856 IS tis; 5857 PetscInt schur_size; 5858 5859 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5860 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5861 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5862 PetscCall(ISDestroy(&tis)); 5863 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5864 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5865 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5866 PetscCall(ISDestroy(&tis)); 5867 } 5868 } 5869 PetscFunctionReturn(PETSC_SUCCESS); 5870 } 5871 5872 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5873 { 5874 MatNullSpace NullSpace; 5875 Mat dmat; 5876 const Vec *nullvecs; 5877 Vec v, v2, *nullvecs2; 5878 VecScatter sct = NULL; 5879 PetscScalar *ddata; 5880 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5881 PetscBool nnsp_has_cnst; 5882 5883 PetscFunctionBegin; 5884 if (!is && !B) { /* MATIS */ 5885 Mat_IS *matis = (Mat_IS *)A->data; 5886 5887 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5888 sct = matis->cctx; 5889 PetscCall(PetscObjectReference((PetscObject)sct)); 5890 } else { 5891 PetscCall(MatGetNullSpace(B, &NullSpace)); 5892 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5893 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5894 } 5895 PetscCall(MatGetNullSpace(A, &NullSpace)); 5896 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5897 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5898 5899 PetscCall(MatCreateVecs(A, &v, NULL)); 5900 PetscCall(MatCreateVecs(B, &v2, NULL)); 5901 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5902 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs)); 5903 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5904 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5905 PetscCall(VecGetBlockSize(v2, &bs)); 5906 PetscCall(VecGetSize(v2, &N)); 5907 PetscCall(VecGetLocalSize(v2, &n)); 5908 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5909 for (k = 0; k < nnsp_size; k++) { 5910 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5911 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5912 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5913 } 5914 if (nnsp_has_cnst) { 5915 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5916 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5917 } 5918 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5919 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5920 5921 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5922 PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault)); 5923 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5924 PetscCall(MatDestroy(&dmat)); 5925 5926 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5927 PetscCall(PetscFree(nullvecs2)); 5928 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5929 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5930 PetscCall(VecDestroy(&v)); 5931 PetscCall(VecDestroy(&v2)); 5932 PetscCall(VecScatterDestroy(&sct)); 5933 PetscFunctionReturn(PETSC_SUCCESS); 5934 } 5935 5936 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5937 { 5938 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5939 PC_IS *pcis = (PC_IS *)pc->data; 5940 PC pc_temp; 5941 Mat A_RR; 5942 MatNullSpace nnsp; 5943 MatReuse reuse; 5944 PetscScalar m_one = -1.0; 5945 PetscReal value; 5946 PetscInt n_D, n_R; 5947 PetscBool issbaij, opts, isset, issym; 5948 PetscBool f = PETSC_FALSE; 5949 char dir_prefix[256], neu_prefix[256], str_level[16]; 5950 size_t len; 5951 5952 PetscFunctionBegin; 5953 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5954 /* approximate solver, propagate NearNullSpace if needed */ 5955 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5956 MatNullSpace gnnsp1, gnnsp2; 5957 PetscBool lhas, ghas; 5958 5959 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5960 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5961 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5962 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5963 PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5964 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5965 } 5966 5967 /* compute prefixes */ 5968 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5969 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5970 if (!pcbddc->current_level) { 5971 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5972 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5973 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5974 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5975 } else { 5976 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 5977 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5978 len -= 15; /* remove "pc_bddc_coarse_" */ 5979 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5980 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5981 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5982 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5983 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5984 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5985 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5986 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5987 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5988 } 5989 5990 /* DIRICHLET PROBLEM */ 5991 if (dirichlet) { 5992 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5993 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5994 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5995 if (pcbddc->dbg_flag) { 5996 Mat A_IIn; 5997 5998 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5999 PetscCall(MatDestroy(&pcis->A_II)); 6000 pcis->A_II = A_IIn; 6001 } 6002 } 6003 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6004 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 6005 6006 /* Matrix for Dirichlet problem is pcis->A_II */ 6007 n_D = pcis->n - pcis->n_B; 6008 opts = PETSC_FALSE; 6009 if (!pcbddc->ksp_D) { /* create object if not yet build */ 6010 opts = PETSC_TRUE; 6011 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 6012 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 6013 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 6014 /* default */ 6015 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 6016 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 6017 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 6018 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 6019 if (issbaij) { 6020 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6021 } else { 6022 PetscCall(PCSetType(pc_temp, PCLU)); 6023 } 6024 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 6025 } 6026 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 6027 PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view")); 6028 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 6029 /* Allow user's customization */ 6030 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 6031 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 6032 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6033 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 6034 } 6035 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 6036 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 6037 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6038 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6039 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6040 const PetscInt *idxs; 6041 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6042 6043 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 6044 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 6045 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6046 for (i = 0; i < nl; i++) { 6047 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6048 } 6049 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 6050 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6051 PetscCall(PetscFree(scoords)); 6052 } 6053 if (sub_schurs && sub_schurs->reuse_solver) { 6054 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6055 6056 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 6057 } 6058 6059 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6060 if (!n_D) { 6061 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 6062 PetscCall(PCSetType(pc_temp, PCNONE)); 6063 } 6064 PetscCall(KSPSetUp(pcbddc->ksp_D)); 6065 /* set ksp_D into pcis data */ 6066 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 6067 PetscCall(KSPDestroy(&pcis->ksp_D)); 6068 pcis->ksp_D = pcbddc->ksp_D; 6069 } 6070 6071 /* NEUMANN PROBLEM */ 6072 A_RR = NULL; 6073 if (neumann) { 6074 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6075 PetscInt ibs, mbs; 6076 PetscBool issbaij, reuse_neumann_solver, isset, issym; 6077 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6078 6079 reuse_neumann_solver = PETSC_FALSE; 6080 if (sub_schurs && sub_schurs->reuse_solver) { 6081 IS iP; 6082 6083 reuse_neumann_solver = PETSC_TRUE; 6084 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 6085 if (iP) reuse_neumann_solver = PETSC_FALSE; 6086 } 6087 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 6088 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 6089 if (pcbddc->ksp_R) { /* already created ksp */ 6090 PetscInt nn_R; 6091 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 6092 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6093 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 6094 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 6095 PetscCall(KSPReset(pcbddc->ksp_R)); 6096 PetscCall(MatDestroy(&A_RR)); 6097 reuse = MAT_INITIAL_MATRIX; 6098 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 6099 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 6100 PetscCall(MatDestroy(&A_RR)); 6101 reuse = MAT_INITIAL_MATRIX; 6102 } else { /* safe to reuse the matrix */ 6103 reuse = MAT_REUSE_MATRIX; 6104 } 6105 } 6106 /* last check */ 6107 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 6108 PetscCall(MatDestroy(&A_RR)); 6109 reuse = MAT_INITIAL_MATRIX; 6110 } 6111 } else { /* first time, so we need to create the matrix */ 6112 reuse = MAT_INITIAL_MATRIX; 6113 } 6114 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 6115 TODO: Get Rid of these conversions */ 6116 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 6117 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 6118 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 6119 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 6120 if (matis->A == pcbddc->local_mat) { 6121 PetscCall(MatDestroy(&pcbddc->local_mat)); 6122 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6123 } else { 6124 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6125 } 6126 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 6127 if (matis->A == pcbddc->local_mat) { 6128 PetscCall(MatDestroy(&pcbddc->local_mat)); 6129 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6130 } else { 6131 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6132 } 6133 } 6134 /* extract A_RR */ 6135 if (reuse_neumann_solver) { 6136 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6137 6138 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 6139 PetscCall(MatDestroy(&A_RR)); 6140 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 6141 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 6142 } else { 6143 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 6144 } 6145 } else { 6146 PetscCall(MatDestroy(&A_RR)); 6147 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 6148 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6149 } 6150 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 6151 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 6152 } 6153 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6154 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 6155 opts = PETSC_FALSE; 6156 if (!pcbddc->ksp_R) { /* create object if not present */ 6157 opts = PETSC_TRUE; 6158 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 6159 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 6160 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 6161 /* default */ 6162 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 6163 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 6164 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6165 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 6166 if (issbaij) { 6167 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6168 } else { 6169 PetscCall(PCSetType(pc_temp, PCLU)); 6170 } 6171 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 6172 } 6173 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 6174 PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view")); 6175 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 6176 if (opts) { /* Allow user's customization once */ 6177 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 6178 } 6179 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6180 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6181 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 6182 } 6183 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6184 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6185 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6186 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6187 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6188 const PetscInt *idxs; 6189 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6190 6191 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 6192 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 6193 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6194 for (i = 0; i < nl; i++) { 6195 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6196 } 6197 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 6198 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6199 PetscCall(PetscFree(scoords)); 6200 } 6201 6202 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6203 if (!n_R) { 6204 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6205 PetscCall(PCSetType(pc_temp, PCNONE)); 6206 } 6207 /* Reuse solver if it is present */ 6208 if (reuse_neumann_solver) { 6209 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6210 6211 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 6212 } 6213 PetscCall(KSPSetUp(pcbddc->ksp_R)); 6214 } 6215 6216 if (pcbddc->dbg_flag) { 6217 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6218 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6219 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 6220 } 6221 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 6222 6223 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 6224 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 6225 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 6226 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 6227 /* check Dirichlet and Neumann solvers */ 6228 if (pcbddc->dbg_flag) { 6229 if (dirichlet) { /* Dirichlet */ 6230 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 6231 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 6232 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 6233 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 6234 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 6235 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 6236 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value)); 6237 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6238 } 6239 if (neumann) { /* Neumann */ 6240 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 6241 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 6242 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 6243 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 6244 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 6245 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 6246 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value)); 6247 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6248 } 6249 } 6250 /* free Neumann problem's matrix */ 6251 PetscCall(MatDestroy(&A_RR)); 6252 PetscFunctionReturn(PETSC_SUCCESS); 6253 } 6254 6255 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 6256 { 6257 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6258 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6259 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 6260 6261 PetscFunctionBegin; 6262 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 6263 if (!pcbddc->switch_static) { 6264 if (applytranspose && pcbddc->local_auxmat1) { 6265 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 6266 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6267 } 6268 if (!reuse_solver) { 6269 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6270 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6271 } else { 6272 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6273 6274 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6275 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6276 } 6277 } else { 6278 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6279 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6280 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6281 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6282 if (applytranspose && pcbddc->local_auxmat1) { 6283 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 6284 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6285 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6286 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6287 } 6288 } 6289 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6290 if (!reuse_solver || pcbddc->switch_static) { 6291 if (applytranspose) { 6292 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6293 } else { 6294 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6295 } 6296 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 6297 } else { 6298 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6299 6300 if (applytranspose) { 6301 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6302 } else { 6303 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6304 } 6305 } 6306 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6307 PetscCall(VecSet(inout_B, 0.)); 6308 if (!pcbddc->switch_static) { 6309 if (!reuse_solver) { 6310 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6311 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6312 } else { 6313 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6314 6315 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6316 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6317 } 6318 if (!applytranspose && pcbddc->local_auxmat1) { 6319 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6320 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 6321 } 6322 } else { 6323 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6324 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6325 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6326 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6327 if (!applytranspose && pcbddc->local_auxmat1) { 6328 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6329 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 6330 } 6331 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6332 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6333 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6334 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6335 } 6336 PetscFunctionReturn(PETSC_SUCCESS); 6337 } 6338 6339 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 6340 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 6341 { 6342 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6343 PC_IS *pcis = (PC_IS *)pc->data; 6344 const PetscScalar zero = 0.0; 6345 6346 PetscFunctionBegin; 6347 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 6348 if (!pcbddc->benign_apply_coarse_only) { 6349 if (applytranspose) { 6350 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 6351 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6352 } else { 6353 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 6354 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6355 } 6356 } else { 6357 PetscCall(VecSet(pcbddc->vec1_P, zero)); 6358 } 6359 6360 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 6361 if (pcbddc->benign_n) { 6362 PetscScalar *array; 6363 PetscInt j; 6364 6365 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6366 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 6367 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6368 } 6369 6370 /* start communications from local primal nodes to rhs of coarse solver */ 6371 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 6372 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 6373 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 6374 6375 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 6376 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6377 if (pcbddc->coarse_ksp) { 6378 Mat coarse_mat; 6379 Vec rhs, sol; 6380 MatNullSpace nullsp; 6381 PetscBool isbddc = PETSC_FALSE; 6382 6383 if (pcbddc->benign_have_null) { 6384 PC coarse_pc; 6385 6386 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6387 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 6388 /* we need to propagate to coarser levels the need for a possible benign correction */ 6389 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 6390 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6391 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 6392 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 6393 } 6394 } 6395 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 6396 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 6397 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 6398 if (applytranspose) { 6399 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 6400 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 6401 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6402 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 6403 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6404 } else { 6405 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 6406 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 6407 PC coarse_pc; 6408 6409 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 6410 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6411 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 6412 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 6413 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 6414 } else { 6415 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 6416 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6417 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6418 } 6419 } 6420 /* we don't need the benign correction at coarser levels anymore */ 6421 if (pcbddc->benign_have_null && isbddc) { 6422 PC coarse_pc; 6423 PC_BDDC *coarsepcbddc; 6424 6425 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6426 coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6427 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 6428 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 6429 } 6430 } 6431 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6432 6433 /* Local solution on R nodes */ 6434 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 6435 /* communications from coarse sol to local primal nodes */ 6436 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 6437 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 6438 6439 /* Sum contributions from the two levels */ 6440 if (!pcbddc->benign_apply_coarse_only) { 6441 if (applytranspose) { 6442 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6443 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6444 } else { 6445 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6446 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6447 } 6448 /* store p0 */ 6449 if (pcbddc->benign_n) { 6450 PetscScalar *array; 6451 PetscInt j; 6452 6453 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6454 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 6455 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6456 } 6457 } else { /* expand the coarse solution */ 6458 if (applytranspose) { 6459 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 6460 } else { 6461 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 6462 } 6463 } 6464 PetscFunctionReturn(PETSC_SUCCESS); 6465 } 6466 6467 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 6468 { 6469 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6470 Vec from, to; 6471 const PetscScalar *array; 6472 6473 PetscFunctionBegin; 6474 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6475 from = pcbddc->coarse_vec; 6476 to = pcbddc->vec1_P; 6477 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6478 Vec tvec; 6479 6480 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6481 PetscCall(VecResetArray(tvec)); 6482 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 6483 PetscCall(VecGetArrayRead(tvec, &array)); 6484 PetscCall(VecPlaceArray(from, array)); 6485 PetscCall(VecRestoreArrayRead(tvec, &array)); 6486 } 6487 } else { /* from local to global -> put data in coarse right-hand side */ 6488 from = pcbddc->vec1_P; 6489 to = pcbddc->coarse_vec; 6490 } 6491 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6492 PetscFunctionReturn(PETSC_SUCCESS); 6493 } 6494 6495 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6496 { 6497 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6498 Vec from, to; 6499 const PetscScalar *array; 6500 6501 PetscFunctionBegin; 6502 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6503 from = pcbddc->coarse_vec; 6504 to = pcbddc->vec1_P; 6505 } else { /* from local to global -> put data in coarse right-hand side */ 6506 from = pcbddc->vec1_P; 6507 to = pcbddc->coarse_vec; 6508 } 6509 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6510 if (smode == SCATTER_FORWARD) { 6511 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6512 Vec tvec; 6513 6514 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6515 PetscCall(VecGetArrayRead(to, &array)); 6516 PetscCall(VecPlaceArray(tvec, array)); 6517 PetscCall(VecRestoreArrayRead(to, &array)); 6518 } 6519 } else { 6520 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6521 PetscCall(VecResetArray(from)); 6522 } 6523 } 6524 PetscFunctionReturn(PETSC_SUCCESS); 6525 } 6526 6527 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6528 { 6529 PC_IS *pcis = (PC_IS *)pc->data; 6530 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6531 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6532 /* one and zero */ 6533 PetscScalar one = 1.0, zero = 0.0; 6534 /* space to store constraints and their local indices */ 6535 PetscScalar *constraints_data; 6536 PetscInt *constraints_idxs, *constraints_idxs_B; 6537 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 6538 PetscInt *constraints_n; 6539 /* iterators */ 6540 PetscInt i, j, k, total_counts, total_counts_cc, cum; 6541 /* BLAS integers */ 6542 PetscBLASInt lwork, lierr; 6543 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 6544 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 6545 /* reuse */ 6546 PetscInt olocal_primal_size, olocal_primal_size_cc; 6547 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 6548 /* change of basis */ 6549 PetscBool qr_needed; 6550 PetscBT change_basis, qr_needed_idx; 6551 /* auxiliary stuff */ 6552 PetscInt *nnz, *is_indices; 6553 PetscInt ncc; 6554 /* some quantities */ 6555 PetscInt n_vertices, total_primal_vertices, valid_constraints; 6556 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 6557 PetscReal tol; /* tolerance for retaining eigenmodes */ 6558 6559 PetscFunctionBegin; 6560 tol = PetscSqrtReal(PETSC_SMALL); 6561 /* Destroy Mat objects computed previously */ 6562 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6563 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6564 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6565 /* save info on constraints from previous setup (if any) */ 6566 olocal_primal_size = pcbddc->local_primal_size; 6567 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6568 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 6569 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 6570 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 6571 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 6572 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6573 6574 if (!pcbddc->adaptive_selection) { 6575 IS ISForVertices, *ISForFaces, *ISForEdges; 6576 MatNullSpace nearnullsp; 6577 const Vec *nearnullvecs; 6578 Vec *localnearnullsp; 6579 PetscScalar *array; 6580 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 6581 PetscBool nnsp_has_cnst; 6582 /* LAPACK working arrays for SVD or POD */ 6583 PetscBool skip_lapack, boolforchange; 6584 PetscScalar *work; 6585 PetscReal *singular_vals; 6586 #if defined(PETSC_USE_COMPLEX) 6587 PetscReal *rwork; 6588 #endif 6589 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 6590 PetscBLASInt dummy_int = 1; 6591 PetscScalar dummy_scalar = 1.; 6592 PetscBool use_pod = PETSC_FALSE; 6593 6594 /* MKL SVD with same input gives different results on different processes! */ 6595 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6596 use_pod = PETSC_TRUE; 6597 #endif 6598 /* Get index sets for faces, edges and vertices from graph */ 6599 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 6600 o_nf = n_ISForFaces; 6601 o_ne = n_ISForEdges; 6602 n_vertices = 0; 6603 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 6604 /* print some info */ 6605 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6606 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6607 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 6608 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6609 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6610 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 6611 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 6612 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 6613 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6614 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6615 } 6616 6617 if (!pcbddc->use_vertices) n_vertices = 0; 6618 if (!pcbddc->use_edges) n_ISForEdges = 0; 6619 if (!pcbddc->use_faces) n_ISForFaces = 0; 6620 6621 /* check if near null space is attached to global mat */ 6622 if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 6623 else nearnullsp = NULL; 6624 6625 if (nearnullsp) { 6626 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 6627 /* remove any stored info */ 6628 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6629 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6630 /* store information for BDDC solver reuse */ 6631 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6632 pcbddc->onearnullspace = nearnullsp; 6633 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 6634 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 6635 } else { /* if near null space is not provided BDDC uses constants by default */ 6636 nnsp_size = 0; 6637 nnsp_has_cnst = PETSC_TRUE; 6638 } 6639 /* get max number of constraints on a single cc */ 6640 max_constraints = nnsp_size; 6641 if (nnsp_has_cnst) max_constraints++; 6642 6643 /* 6644 Evaluate maximum storage size needed by the procedure 6645 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6646 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6647 There can be multiple constraints per connected component 6648 */ 6649 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 6650 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 6651 6652 total_counts = n_ISForFaces + n_ISForEdges; 6653 total_counts *= max_constraints; 6654 total_counts += n_vertices; 6655 PetscCall(PetscBTCreate(total_counts, &change_basis)); 6656 6657 total_counts = 0; 6658 max_size_of_constraint = 0; 6659 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 6660 IS used_is; 6661 if (i < n_ISForEdges) { 6662 used_is = ISForEdges[i]; 6663 } else { 6664 used_is = ISForFaces[i - n_ISForEdges]; 6665 } 6666 PetscCall(ISGetSize(used_is, &j)); 6667 total_counts += j; 6668 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6669 } 6670 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6671 6672 /* get local part of global near null space vectors */ 6673 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6674 for (k = 0; k < nnsp_size; k++) { 6675 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6676 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6677 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6678 } 6679 6680 /* whether or not to skip lapack calls */ 6681 skip_lapack = PETSC_TRUE; 6682 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6683 6684 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6685 if (!skip_lapack) { 6686 PetscScalar temp_work; 6687 6688 if (use_pod) { 6689 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6690 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6691 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6692 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6693 #if defined(PETSC_USE_COMPLEX) 6694 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6695 #endif 6696 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6697 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6698 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6699 lwork = -1; 6700 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6701 #if !defined(PETSC_USE_COMPLEX) 6702 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6703 #else 6704 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6705 #endif 6706 PetscCall(PetscFPTrapPop()); 6707 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6708 } else { 6709 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6710 /* SVD */ 6711 PetscInt max_n, min_n; 6712 max_n = max_size_of_constraint; 6713 min_n = max_constraints; 6714 if (max_size_of_constraint < max_constraints) { 6715 min_n = max_size_of_constraint; 6716 max_n = max_constraints; 6717 } 6718 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6719 #if defined(PETSC_USE_COMPLEX) 6720 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6721 #endif 6722 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6723 lwork = -1; 6724 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6725 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6726 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6727 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6728 #if !defined(PETSC_USE_COMPLEX) 6729 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6730 #else 6731 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6732 #endif 6733 PetscCall(PetscFPTrapPop()); 6734 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6735 #else 6736 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6737 #endif /* on missing GESVD */ 6738 } 6739 /* Allocate optimal workspace */ 6740 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6741 PetscCall(PetscMalloc1(lwork, &work)); 6742 } 6743 /* Now we can loop on constraining sets */ 6744 total_counts = 0; 6745 constraints_idxs_ptr[0] = 0; 6746 constraints_data_ptr[0] = 0; 6747 /* vertices */ 6748 if (n_vertices) { 6749 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6750 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6751 for (i = 0; i < n_vertices; i++) { 6752 constraints_n[total_counts] = 1; 6753 constraints_data[total_counts] = 1.0; 6754 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6755 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6756 total_counts++; 6757 } 6758 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6759 } 6760 6761 /* edges and faces */ 6762 total_counts_cc = total_counts; 6763 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6764 IS used_is; 6765 PetscBool idxs_copied = PETSC_FALSE; 6766 6767 if (ncc < n_ISForEdges) { 6768 used_is = ISForEdges[ncc]; 6769 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6770 } else { 6771 used_is = ISForFaces[ncc - n_ISForEdges]; 6772 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6773 } 6774 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6775 6776 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6777 if (!size_of_constraint) continue; 6778 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6779 if (nnsp_has_cnst) { 6780 PetscScalar quad_value; 6781 6782 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6783 idxs_copied = PETSC_TRUE; 6784 6785 if (!pcbddc->use_nnsp_true) { 6786 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6787 } else { 6788 quad_value = 1.0; 6789 } 6790 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6791 temp_constraints++; 6792 total_counts++; 6793 } 6794 for (k = 0; k < nnsp_size; k++) { 6795 PetscReal real_value; 6796 PetscScalar *ptr_to_data; 6797 6798 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6799 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6800 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6801 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6802 /* check if array is null on the connected component */ 6803 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6804 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6805 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6806 temp_constraints++; 6807 total_counts++; 6808 if (!idxs_copied) { 6809 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6810 idxs_copied = PETSC_TRUE; 6811 } 6812 } 6813 } 6814 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6815 valid_constraints = temp_constraints; 6816 if (!pcbddc->use_nnsp_true && temp_constraints) { 6817 if (temp_constraints == 1) { /* just normalize the constraint */ 6818 PetscScalar norm, *ptr_to_data; 6819 6820 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6821 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6822 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6823 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6824 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6825 } else { /* perform SVD */ 6826 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6827 6828 if (use_pod) { 6829 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6830 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6831 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6832 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6833 from that computed using LAPACKgesvd 6834 -> This is due to a different computation of eigenvectors in LAPACKheev 6835 -> The quality of the POD-computed basis will be the same */ 6836 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6837 /* Store upper triangular part of correlation matrix */ 6838 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6839 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6840 for (j = 0; j < temp_constraints; j++) { 6841 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6842 } 6843 /* compute eigenvalues and eigenvectors of correlation matrix */ 6844 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6845 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6846 #if !defined(PETSC_USE_COMPLEX) 6847 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6848 #else 6849 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6850 #endif 6851 PetscCall(PetscFPTrapPop()); 6852 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6853 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6854 j = 0; 6855 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6856 total_counts = total_counts - j; 6857 valid_constraints = temp_constraints - j; 6858 /* scale and copy POD basis into used quadrature memory */ 6859 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6860 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6861 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6862 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6863 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6864 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6865 if (j < temp_constraints) { 6866 PetscInt ii; 6867 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6868 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6869 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6870 PetscCall(PetscFPTrapPop()); 6871 for (k = 0; k < temp_constraints - j; k++) { 6872 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6873 } 6874 } 6875 } else { 6876 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6877 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6878 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6879 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6880 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6881 #if !defined(PETSC_USE_COMPLEX) 6882 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6883 #else 6884 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6885 #endif 6886 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6887 PetscCall(PetscFPTrapPop()); 6888 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6889 k = temp_constraints; 6890 if (k > size_of_constraint) k = size_of_constraint; 6891 j = 0; 6892 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6893 valid_constraints = k - j; 6894 total_counts = total_counts - temp_constraints + valid_constraints; 6895 #else 6896 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6897 #endif /* on missing GESVD */ 6898 } 6899 } 6900 } 6901 /* update pointers information */ 6902 if (valid_constraints) { 6903 constraints_n[total_counts_cc] = valid_constraints; 6904 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6905 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6906 /* set change_of_basis flag */ 6907 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6908 total_counts_cc++; 6909 } 6910 } 6911 /* free workspace */ 6912 if (!skip_lapack) { 6913 PetscCall(PetscFree(work)); 6914 #if defined(PETSC_USE_COMPLEX) 6915 PetscCall(PetscFree(rwork)); 6916 #endif 6917 PetscCall(PetscFree(singular_vals)); 6918 PetscCall(PetscFree(correlation_mat)); 6919 PetscCall(PetscFree(temp_basis)); 6920 } 6921 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6922 PetscCall(PetscFree(localnearnullsp)); 6923 /* free index sets of faces, edges and vertices */ 6924 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6925 } else { 6926 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6927 6928 total_counts = 0; 6929 n_vertices = 0; 6930 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6931 max_constraints = 0; 6932 total_counts_cc = 0; 6933 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6934 total_counts += pcbddc->adaptive_constraints_n[i]; 6935 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6936 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6937 } 6938 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6939 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6940 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6941 constraints_data = pcbddc->adaptive_constraints_data; 6942 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6943 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6944 total_counts_cc = 0; 6945 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6946 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6947 } 6948 6949 max_size_of_constraint = 0; 6950 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6951 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6952 /* Change of basis */ 6953 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6954 if (pcbddc->use_change_of_basis) { 6955 for (i = 0; i < sub_schurs->n_subs; i++) { 6956 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6957 } 6958 } 6959 } 6960 pcbddc->local_primal_size = total_counts; 6961 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6962 6963 /* map constraints_idxs in boundary numbering */ 6964 if (pcbddc->use_change_of_basis) { 6965 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6966 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6967 } 6968 6969 /* Create constraint matrix */ 6970 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6971 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6972 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6973 6974 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6975 /* determine if a QR strategy is needed for change of basis */ 6976 qr_needed = pcbddc->use_qr_single; 6977 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6978 total_primal_vertices = 0; 6979 pcbddc->local_primal_size_cc = 0; 6980 for (i = 0; i < total_counts_cc; i++) { 6981 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6982 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6983 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6984 pcbddc->local_primal_size_cc += 1; 6985 } else if (PetscBTLookup(change_basis, i)) { 6986 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6987 pcbddc->local_primal_size_cc += constraints_n[i]; 6988 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6989 PetscCall(PetscBTSet(qr_needed_idx, i)); 6990 qr_needed = PETSC_TRUE; 6991 } 6992 } else { 6993 pcbddc->local_primal_size_cc += 1; 6994 } 6995 } 6996 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6997 pcbddc->n_vertices = total_primal_vertices; 6998 /* permute indices in order to have a sorted set of vertices */ 6999 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 7000 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 7001 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 7002 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 7003 7004 /* nonzero structure of constraint matrix */ 7005 /* and get reference dof for local constraints */ 7006 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 7007 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 7008 7009 j = total_primal_vertices; 7010 total_counts = total_primal_vertices; 7011 cum = total_primal_vertices; 7012 for (i = n_vertices; i < total_counts_cc; i++) { 7013 if (!PetscBTLookup(change_basis, i)) { 7014 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 7015 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 7016 cum++; 7017 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7018 for (k = 0; k < constraints_n[i]; k++) { 7019 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 7020 nnz[j + k] = size_of_constraint; 7021 } 7022 j += constraints_n[i]; 7023 } 7024 } 7025 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 7026 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 7027 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 7028 PetscCall(PetscFree(nnz)); 7029 7030 /* set values in constraint matrix */ 7031 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 7032 total_counts = total_primal_vertices; 7033 for (i = n_vertices; i < total_counts_cc; i++) { 7034 if (!PetscBTLookup(change_basis, i)) { 7035 PetscInt *cols; 7036 7037 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7038 cols = constraints_idxs + constraints_idxs_ptr[i]; 7039 for (k = 0; k < constraints_n[i]; k++) { 7040 PetscInt row = total_counts + k; 7041 PetscScalar *vals; 7042 7043 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 7044 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 7045 } 7046 total_counts += constraints_n[i]; 7047 } 7048 } 7049 /* assembling */ 7050 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 7051 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 7052 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 7053 7054 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 7055 if (pcbddc->use_change_of_basis) { 7056 /* dual and primal dofs on a single cc */ 7057 PetscInt dual_dofs, primal_dofs; 7058 /* working stuff for GEQRF */ 7059 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 7060 PetscBLASInt lqr_work; 7061 /* working stuff for UNGQR */ 7062 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 7063 PetscBLASInt lgqr_work; 7064 /* working stuff for TRTRS */ 7065 PetscScalar *trs_rhs = NULL; 7066 PetscBLASInt Blas_NRHS; 7067 /* pointers for values insertion into change of basis matrix */ 7068 PetscInt *start_rows, *start_cols; 7069 PetscScalar *start_vals; 7070 /* working stuff for values insertion */ 7071 PetscBT is_primal; 7072 PetscInt *aux_primal_numbering_B; 7073 /* matrix sizes */ 7074 PetscInt global_size, local_size; 7075 /* temporary change of basis */ 7076 Mat localChangeOfBasisMatrix; 7077 /* extra space for debugging */ 7078 PetscScalar *dbg_work = NULL; 7079 7080 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 7081 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 7082 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 7083 /* nonzeros for local mat */ 7084 PetscCall(PetscMalloc1(pcis->n, &nnz)); 7085 if (!pcbddc->benign_change || pcbddc->fake_change) { 7086 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 7087 } else { 7088 const PetscInt *ii; 7089 PetscInt n; 7090 PetscBool flg_row; 7091 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 7092 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 7093 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 7094 } 7095 for (i = n_vertices; i < total_counts_cc; i++) { 7096 if (PetscBTLookup(change_basis, i)) { 7097 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7098 if (PetscBTLookup(qr_needed_idx, i)) { 7099 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 7100 } else { 7101 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 7102 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 7103 } 7104 } 7105 } 7106 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 7107 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 7108 PetscCall(PetscFree(nnz)); 7109 /* Set interior change in the matrix */ 7110 if (!pcbddc->benign_change || pcbddc->fake_change) { 7111 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 7112 } else { 7113 const PetscInt *ii, *jj; 7114 PetscScalar *aa; 7115 PetscInt n; 7116 PetscBool flg_row; 7117 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 7118 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 7119 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 7120 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 7121 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 7122 } 7123 7124 if (pcbddc->dbg_flag) { 7125 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 7126 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 7127 } 7128 7129 /* Now we loop on the constraints which need a change of basis */ 7130 /* 7131 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 7132 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 7133 7134 Basic blocks of change of basis matrix T computed: 7135 7136 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 7137 7138 | 1 0 ... 0 s_1/S | 7139 | 0 1 ... 0 s_2/S | 7140 | ... | 7141 | 0 ... 1 s_{n-1}/S | 7142 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 7143 7144 with S = \sum_{i=1}^n s_i^2 7145 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 7146 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 7147 7148 - QR decomposition of constraints otherwise 7149 */ 7150 if (qr_needed && max_size_of_constraint) { 7151 /* space to store Q */ 7152 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 7153 /* array to store scaling factors for reflectors */ 7154 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 7155 /* first we issue queries for optimal work */ 7156 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7157 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 7158 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7159 lqr_work = -1; 7160 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 7161 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7162 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 7163 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 7164 lgqr_work = -1; 7165 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7166 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 7167 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 7168 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7169 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 7170 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 7171 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7172 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 7173 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 7174 /* array to store rhs and solution of triangular solver */ 7175 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 7176 /* allocating workspace for check */ 7177 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 7178 } 7179 /* array to store whether a node is primal or not */ 7180 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 7181 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 7182 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 7183 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 7184 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 7185 PetscCall(PetscFree(aux_primal_numbering_B)); 7186 7187 /* loop on constraints and see whether or not they need a change of basis and compute it */ 7188 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 7189 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 7190 if (PetscBTLookup(change_basis, total_counts)) { 7191 /* get constraint info */ 7192 primal_dofs = constraints_n[total_counts]; 7193 dual_dofs = size_of_constraint - primal_dofs; 7194 7195 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 7196 7197 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 7198 7199 /* copy quadrature constraints for change of basis check */ 7200 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7201 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 7202 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7203 7204 /* compute QR decomposition of constraints */ 7205 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7206 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7207 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7208 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7209 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 7210 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7211 PetscCall(PetscFPTrapPop()); 7212 7213 /* explicitly compute R^-T */ 7214 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 7215 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 7216 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7217 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 7218 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7219 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7220 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7221 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 7222 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr); 7223 PetscCall(PetscFPTrapPop()); 7224 7225 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 7226 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7227 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7228 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7229 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7230 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7231 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 7232 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7233 PetscCall(PetscFPTrapPop()); 7234 7235 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 7236 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 7237 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 7238 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7239 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7240 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7241 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7242 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7243 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 7244 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7245 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 7246 PetscCall(PetscFPTrapPop()); 7247 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7248 7249 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 7250 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 7251 /* insert cols for primal dofs */ 7252 for (j = 0; j < primal_dofs; j++) { 7253 start_vals = &qr_basis[j * size_of_constraint]; 7254 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7255 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7256 } 7257 /* insert cols for dual dofs */ 7258 for (j = 0, k = 0; j < dual_dofs; k++) { 7259 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 7260 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 7261 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7262 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7263 j++; 7264 } 7265 } 7266 7267 /* check change of basis */ 7268 if (pcbddc->dbg_flag) { 7269 PetscInt ii, jj; 7270 PetscBool valid_qr = PETSC_TRUE; 7271 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 7272 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7273 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 7274 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7275 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 7276 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 7277 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7278 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 7279 PetscCall(PetscFPTrapPop()); 7280 for (jj = 0; jj < size_of_constraint; jj++) { 7281 for (ii = 0; ii < primal_dofs; ii++) { 7282 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 7283 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 7284 } 7285 } 7286 if (!valid_qr) { 7287 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 7288 for (jj = 0; jj < size_of_constraint; jj++) { 7289 for (ii = 0; ii < primal_dofs; ii++) { 7290 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 7291 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7292 } 7293 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 7294 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7295 } 7296 } 7297 } 7298 } else { 7299 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 7300 } 7301 } 7302 } else { /* simple transformation block */ 7303 PetscInt row, col; 7304 PetscScalar val, norm; 7305 7306 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7307 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 7308 for (j = 0; j < size_of_constraint; j++) { 7309 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 7310 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7311 if (!PetscBTLookup(is_primal, row_B)) { 7312 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 7313 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 7314 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 7315 } else { 7316 for (k = 0; k < size_of_constraint; k++) { 7317 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7318 if (row != col) { 7319 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 7320 } else { 7321 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 7322 } 7323 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 7324 } 7325 } 7326 } 7327 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 7328 } 7329 } else { 7330 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 7331 } 7332 } 7333 7334 /* free workspace */ 7335 if (qr_needed) { 7336 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 7337 PetscCall(PetscFree(trs_rhs)); 7338 PetscCall(PetscFree(qr_tau)); 7339 PetscCall(PetscFree(qr_work)); 7340 PetscCall(PetscFree(gqr_work)); 7341 PetscCall(PetscFree(qr_basis)); 7342 } 7343 PetscCall(PetscBTDestroy(&is_primal)); 7344 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7345 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7346 7347 /* assembling of global change of variable */ 7348 if (!pcbddc->fake_change) { 7349 Mat tmat; 7350 7351 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 7352 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 7353 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 7354 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 7355 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 7356 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 7357 PetscCall(MatConvert(tmat, MATAIJ, MAT_INITIAL_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 7358 PetscCall(MatDestroy(&tmat)); 7359 PetscCall(VecSet(pcis->vec1_global, 0.0)); 7360 PetscCall(VecSet(pcis->vec1_N, 1.0)); 7361 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7362 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7363 PetscCall(VecReciprocal(pcis->vec1_global)); 7364 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 7365 7366 /* check */ 7367 if (pcbddc->dbg_flag) { 7368 PetscReal error; 7369 Vec x, x_change; 7370 7371 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 7372 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 7373 PetscCall(VecSetRandom(x, NULL)); 7374 PetscCall(VecCopy(x, pcis->vec1_global)); 7375 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7376 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7377 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 7378 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7379 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7380 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 7381 PetscCall(VecAXPY(x, -1.0, x_change)); 7382 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 7383 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 7384 PetscCall(VecDestroy(&x)); 7385 PetscCall(VecDestroy(&x_change)); 7386 } 7387 /* adapt sub_schurs computed (if any) */ 7388 if (pcbddc->use_deluxe_scaling) { 7389 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 7390 7391 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 7392 if (sub_schurs && sub_schurs->S_Ej_all) { 7393 Mat S_new, tmat; 7394 IS is_all_N, is_V_Sall = NULL; 7395 7396 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 7397 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 7398 if (pcbddc->deluxe_zerorows) { 7399 ISLocalToGlobalMapping NtoSall; 7400 IS is_V; 7401 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 7402 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 7403 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 7404 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 7405 PetscCall(ISDestroy(&is_V)); 7406 } 7407 PetscCall(ISDestroy(&is_all_N)); 7408 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7409 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 7410 PetscCall(PetscObjectReference((PetscObject)S_new)); 7411 if (pcbddc->deluxe_zerorows) { 7412 const PetscScalar *array; 7413 const PetscInt *idxs_V, *idxs_all; 7414 PetscInt i, n_V; 7415 7416 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7417 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 7418 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 7419 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 7420 PetscCall(VecGetArrayRead(pcis->D, &array)); 7421 for (i = 0; i < n_V; i++) { 7422 PetscScalar val; 7423 PetscInt idx; 7424 7425 idx = idxs_V[i]; 7426 val = array[idxs_all[idxs_V[i]]]; 7427 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 7428 } 7429 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 7430 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 7431 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 7432 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 7433 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 7434 } 7435 sub_schurs->S_Ej_all = S_new; 7436 PetscCall(MatDestroy(&S_new)); 7437 if (sub_schurs->sum_S_Ej_all) { 7438 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7439 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7440 PetscCall(PetscObjectReference((PetscObject)S_new)); 7441 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7442 sub_schurs->sum_S_Ej_all = S_new; 7443 PetscCall(MatDestroy(&S_new)); 7444 } 7445 PetscCall(ISDestroy(&is_V_Sall)); 7446 PetscCall(MatDestroy(&tmat)); 7447 } 7448 /* destroy any change of basis context in sub_schurs */ 7449 if (sub_schurs && sub_schurs->change) { 7450 PetscInt i; 7451 7452 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 7453 PetscCall(PetscFree(sub_schurs->change)); 7454 } 7455 } 7456 if (pcbddc->switch_static) { /* need to save the local change */ 7457 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7458 } else { 7459 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7460 } 7461 /* determine if any process has changed the pressures locally */ 7462 pcbddc->change_interior = pcbddc->benign_have_null; 7463 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7464 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7465 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7466 pcbddc->use_qr_single = qr_needed; 7467 } 7468 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7469 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7470 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7471 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7472 } else { 7473 Mat benign_global = NULL; 7474 if (pcbddc->benign_have_null) { 7475 Mat M; 7476 7477 pcbddc->change_interior = PETSC_TRUE; 7478 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 7479 PetscCall(VecReciprocal(pcis->vec1_N)); 7480 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 7481 if (pcbddc->benign_change) { 7482 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 7483 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 7484 } else { 7485 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 7486 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 7487 } 7488 PetscCall(MatISSetLocalMat(benign_global, M)); 7489 PetscCall(MatDestroy(&M)); 7490 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 7491 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 7492 } 7493 if (pcbddc->user_ChangeOfBasisMatrix) { 7494 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix)); 7495 PetscCall(MatDestroy(&benign_global)); 7496 } else if (pcbddc->benign_have_null) { 7497 pcbddc->ChangeOfBasisMatrix = benign_global; 7498 } 7499 } 7500 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7501 IS is_global; 7502 const PetscInt *gidxs; 7503 7504 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 7505 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 7506 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 7507 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 7508 PetscCall(ISDestroy(&is_global)); 7509 } 7510 } 7511 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 7512 7513 if (!pcbddc->fake_change) { 7514 /* add pressure dofs to set of primal nodes for numbering purposes */ 7515 for (i = 0; i < pcbddc->benign_n; i++) { 7516 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7517 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7518 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7519 pcbddc->local_primal_size_cc++; 7520 pcbddc->local_primal_size++; 7521 } 7522 7523 /* check if a new primal space has been introduced (also take into account benign trick) */ 7524 pcbddc->new_primal_space_local = PETSC_TRUE; 7525 if (olocal_primal_size == pcbddc->local_primal_size) { 7526 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7527 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7528 if (!pcbddc->new_primal_space_local) { 7529 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7530 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7531 } 7532 } 7533 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7534 PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7535 } 7536 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 7537 7538 /* flush dbg viewer */ 7539 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7540 7541 /* free workspace */ 7542 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7543 PetscCall(PetscBTDestroy(&change_basis)); 7544 if (!pcbddc->adaptive_selection) { 7545 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 7546 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 7547 } else { 7548 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 7549 PetscCall(PetscFree(constraints_n)); 7550 PetscCall(PetscFree(constraints_idxs_B)); 7551 } 7552 PetscFunctionReturn(PETSC_SUCCESS); 7553 } 7554 7555 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7556 { 7557 ISLocalToGlobalMapping map; 7558 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7559 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 7560 PetscInt i, N; 7561 PetscBool rcsr = PETSC_FALSE; 7562 7563 PetscFunctionBegin; 7564 if (pcbddc->recompute_topography) { 7565 pcbddc->graphanalyzed = PETSC_FALSE; 7566 /* Reset previously computed graph */ 7567 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7568 /* Init local Graph struct */ 7569 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 7570 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 7571 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 7572 7573 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 7574 /* Check validity of the csr graph passed in by the user */ 7575 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 7576 pcbddc->mat_graph->nvtxs); 7577 7578 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7579 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7580 PetscInt *xadj, *adjncy; 7581 PetscInt nvtxs; 7582 PetscBool flg_row; 7583 Mat A; 7584 7585 PetscCall(PetscObjectReference((PetscObject)matis->A)); 7586 A = matis->A; 7587 for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) { 7588 Mat AtA; 7589 7590 PetscCall(MatProductCreate(A, A, NULL, &AtA)); 7591 PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_")); 7592 PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB)); 7593 PetscCall(MatProductSetFromOptions(AtA)); 7594 PetscCall(MatProductSymbolic(AtA)); 7595 PetscCall(MatProductClear(AtA)); 7596 /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */ 7597 AtA->assembled = PETSC_TRUE; 7598 PetscCall(MatDestroy(&A)); 7599 A = AtA; 7600 } 7601 PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7602 if (flg_row) { 7603 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 7604 pcbddc->computed_rowadj = PETSC_TRUE; 7605 PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7606 rcsr = PETSC_TRUE; 7607 } 7608 PetscCall(MatDestroy(&A)); 7609 } 7610 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7611 7612 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7613 PetscReal *lcoords; 7614 PetscInt n; 7615 MPI_Datatype dimrealtype; 7616 PetscMPIInt cdimi; 7617 7618 /* TODO: support for blocked */ 7619 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 7620 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7621 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 7622 PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi)); 7623 PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype)); 7624 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7625 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7626 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7627 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7628 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7629 7630 pcbddc->mat_graph->coords = lcoords; 7631 pcbddc->mat_graph->cloc = PETSC_TRUE; 7632 pcbddc->mat_graph->cnloc = n; 7633 } 7634 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 7635 pcbddc->mat_graph->nvtxs); 7636 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7637 7638 /* attach info on disconnected subdomains if present */ 7639 if (pcbddc->n_local_subs) { 7640 PetscInt *local_subs, n, totn; 7641 7642 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7643 PetscCall(PetscMalloc1(n, &local_subs)); 7644 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 7645 for (i = 0; i < pcbddc->n_local_subs; i++) { 7646 const PetscInt *idxs; 7647 PetscInt nl, j; 7648 7649 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 7650 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 7651 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 7652 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 7653 } 7654 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 7655 pcbddc->mat_graph->n_local_subs = totn + 1; 7656 pcbddc->mat_graph->local_subs = local_subs; 7657 } 7658 7659 /* Setup of Graph */ 7660 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 7661 } 7662 7663 if (!pcbddc->graphanalyzed) { 7664 /* Graph's connected components analysis */ 7665 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7666 pcbddc->graphanalyzed = PETSC_TRUE; 7667 pcbddc->corner_selected = pcbddc->corner_selection; 7668 } 7669 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7670 PetscFunctionReturn(PETSC_SUCCESS); 7671 } 7672 7673 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7674 { 7675 PetscInt i, j, n; 7676 PetscScalar *alphas; 7677 PetscReal norm, *onorms; 7678 7679 PetscFunctionBegin; 7680 n = *nio; 7681 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7682 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7683 PetscCall(VecNormalize(vecs[0], &norm)); 7684 if (norm < PETSC_SMALL) { 7685 onorms[0] = 0.0; 7686 PetscCall(VecSet(vecs[0], 0.0)); 7687 } else { 7688 onorms[0] = norm; 7689 } 7690 7691 for (i = 1; i < n; i++) { 7692 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7693 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7694 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7695 PetscCall(VecNormalize(vecs[i], &norm)); 7696 if (norm < PETSC_SMALL) { 7697 onorms[i] = 0.0; 7698 PetscCall(VecSet(vecs[i], 0.0)); 7699 } else { 7700 onorms[i] = norm; 7701 } 7702 } 7703 /* push nonzero vectors at the beginning */ 7704 for (i = 0; i < n; i++) { 7705 if (onorms[i] == 0.0) { 7706 for (j = i + 1; j < n; j++) { 7707 if (onorms[j] != 0.0) { 7708 PetscCall(VecCopy(vecs[j], vecs[i])); 7709 onorms[i] = onorms[j]; 7710 onorms[j] = 0.0; 7711 break; 7712 } 7713 } 7714 } 7715 } 7716 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7717 PetscCall(PetscFree2(alphas, onorms)); 7718 PetscFunctionReturn(PETSC_SUCCESS); 7719 } 7720 7721 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7722 { 7723 ISLocalToGlobalMapping mapping; 7724 Mat A; 7725 PetscInt n_neighs, *neighs, *n_shared, **shared; 7726 PetscMPIInt size, rank, color; 7727 PetscInt *xadj, *adjncy; 7728 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7729 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7730 PetscInt void_procs, *procs_candidates = NULL; 7731 PetscInt xadj_count, *count; 7732 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7733 PetscSubcomm psubcomm; 7734 MPI_Comm subcomm; 7735 7736 PetscFunctionBegin; 7737 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7738 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7739 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7740 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7741 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7742 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7743 7744 if (have_void) *have_void = PETSC_FALSE; 7745 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7746 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7747 PetscCall(MatISGetLocalMat(mat, &A)); 7748 PetscCall(MatGetLocalSize(A, &n, NULL)); 7749 im_active = !!n; 7750 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7751 void_procs = size - active_procs; 7752 /* get ranks of non-active processes in mat communicator */ 7753 if (void_procs) { 7754 PetscInt ncand; 7755 7756 if (have_void) *have_void = PETSC_TRUE; 7757 PetscCall(PetscMalloc1(size, &procs_candidates)); 7758 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7759 for (i = 0, ncand = 0; i < size; i++) { 7760 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7761 } 7762 /* force n_subdomains to be not greater that the number of non-active processes */ 7763 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7764 } 7765 7766 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7767 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7768 PetscCall(MatGetSize(mat, &N, NULL)); 7769 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7770 PetscInt issize, isidx, dest; 7771 PetscBool default_sub; 7772 7773 if (*n_subdomains == 1) dest = 0; 7774 else dest = rank; 7775 if (im_active) { 7776 issize = 1; 7777 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7778 isidx = procs_candidates[dest]; 7779 } else { 7780 isidx = dest; 7781 } 7782 } else { 7783 issize = 0; 7784 isidx = rank; 7785 } 7786 if (*n_subdomains != 1) *n_subdomains = active_procs; 7787 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7788 default_sub = (PetscBool)(isidx == rank); 7789 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &default_sub, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 7790 if (default_sub) PetscCall(PetscObjectSetName((PetscObject)*is_sends, "default subassembling")); 7791 PetscCall(PetscFree(procs_candidates)); 7792 PetscFunctionReturn(PETSC_SUCCESS); 7793 } 7794 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL)); 7795 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL)); 7796 threshold = PetscMax(threshold, 2); 7797 7798 /* Get info on mapping */ 7799 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7800 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7801 7802 /* build local CSR graph of subdomains' connectivity */ 7803 PetscCall(PetscMalloc1(2, &xadj)); 7804 xadj[0] = 0; 7805 xadj[1] = PetscMax(n_neighs - 1, 0); 7806 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7807 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7808 PetscCall(PetscCalloc1(n, &count)); 7809 for (i = 1; i < n_neighs; i++) 7810 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7811 7812 xadj_count = 0; 7813 for (i = 1; i < n_neighs; i++) { 7814 for (j = 0; j < n_shared[i]; j++) { 7815 if (count[shared[i][j]] < threshold) { 7816 adjncy[xadj_count] = neighs[i]; 7817 adjncy_wgt[xadj_count] = n_shared[i]; 7818 xadj_count++; 7819 break; 7820 } 7821 } 7822 } 7823 xadj[1] = xadj_count; 7824 PetscCall(PetscFree(count)); 7825 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7826 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7827 7828 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7829 7830 /* Restrict work on active processes only */ 7831 PetscCall(PetscMPIIntCast(im_active, &color)); 7832 if (void_procs) { 7833 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7834 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7835 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7836 subcomm = PetscSubcommChild(psubcomm); 7837 } else { 7838 psubcomm = NULL; 7839 subcomm = PetscObjectComm((PetscObject)mat); 7840 } 7841 7842 v_wgt = NULL; 7843 if (!color) { 7844 PetscCall(PetscFree(xadj)); 7845 PetscCall(PetscFree(adjncy)); 7846 PetscCall(PetscFree(adjncy_wgt)); 7847 } else { 7848 Mat subdomain_adj; 7849 IS new_ranks, new_ranks_contig; 7850 MatPartitioning partitioner; 7851 PetscInt rstart, rend; 7852 PetscMPIInt irstart = 0, irend = 0; 7853 PetscInt *is_indices, *oldranks; 7854 PetscMPIInt size; 7855 PetscBool aggregate; 7856 7857 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7858 if (void_procs) { 7859 PetscInt prank = rank; 7860 PetscCall(PetscMalloc1(size, &oldranks)); 7861 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7862 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7863 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7864 } else { 7865 oldranks = NULL; 7866 } 7867 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7868 if (aggregate) { /* TODO: all this part could be made more efficient */ 7869 PetscInt lrows, row, ncols, *cols; 7870 PetscMPIInt nrank; 7871 PetscScalar *vals; 7872 7873 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7874 lrows = 0; 7875 if (nrank < redprocs) { 7876 lrows = size / redprocs; 7877 if (nrank < size % redprocs) lrows++; 7878 } 7879 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7880 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7881 PetscCall(PetscMPIIntCast(rstart, &irstart)); 7882 PetscCall(PetscMPIIntCast(rend, &irend)); 7883 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7884 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7885 row = nrank; 7886 ncols = xadj[1] - xadj[0]; 7887 cols = adjncy; 7888 PetscCall(PetscMalloc1(ncols, &vals)); 7889 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7890 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7891 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7892 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7893 PetscCall(PetscFree(xadj)); 7894 PetscCall(PetscFree(adjncy)); 7895 PetscCall(PetscFree(adjncy_wgt)); 7896 PetscCall(PetscFree(vals)); 7897 if (use_vwgt) { 7898 Vec v; 7899 const PetscScalar *array; 7900 PetscInt nl; 7901 7902 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7903 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7904 PetscCall(VecAssemblyBegin(v)); 7905 PetscCall(VecAssemblyEnd(v)); 7906 PetscCall(VecGetLocalSize(v, &nl)); 7907 PetscCall(VecGetArrayRead(v, &array)); 7908 PetscCall(PetscMalloc1(nl, &v_wgt)); 7909 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7910 PetscCall(VecRestoreArrayRead(v, &array)); 7911 PetscCall(VecDestroy(&v)); 7912 } 7913 } else { 7914 PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7915 if (use_vwgt) { 7916 PetscCall(PetscMalloc1(1, &v_wgt)); 7917 v_wgt[0] = n; 7918 } 7919 } 7920 /* PetscCall(MatView(subdomain_adj,0)); */ 7921 7922 /* Partition */ 7923 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7924 #if defined(PETSC_HAVE_PTSCOTCH) 7925 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7926 #elif defined(PETSC_HAVE_PARMETIS) 7927 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7928 #else 7929 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7930 #endif 7931 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7932 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7933 *n_subdomains = PetscMin(size, *n_subdomains); 7934 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7935 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7936 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7937 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7938 7939 /* renumber new_ranks to avoid "holes" in new set of processors */ 7940 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7941 PetscCall(ISDestroy(&new_ranks)); 7942 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7943 if (!aggregate) { 7944 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7945 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7946 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7947 } else if (oldranks) { 7948 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7949 } else { 7950 ranks_send_to_idx[0] = is_indices[0]; 7951 } 7952 } else { 7953 PetscInt idx = 0; 7954 PetscMPIInt tag; 7955 MPI_Request *reqs; 7956 7957 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7958 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7959 for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7960 PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7961 PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE)); 7962 PetscCall(PetscFree(reqs)); 7963 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7964 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7965 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7966 } else if (oldranks) { 7967 ranks_send_to_idx[0] = oldranks[idx]; 7968 } else { 7969 ranks_send_to_idx[0] = idx; 7970 } 7971 } 7972 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7973 /* clean up */ 7974 PetscCall(PetscFree(oldranks)); 7975 PetscCall(ISDestroy(&new_ranks_contig)); 7976 PetscCall(MatDestroy(&subdomain_adj)); 7977 PetscCall(MatPartitioningDestroy(&partitioner)); 7978 } 7979 PetscCall(PetscSubcommDestroy(&psubcomm)); 7980 PetscCall(PetscFree(procs_candidates)); 7981 7982 /* assemble parallel IS for sends */ 7983 i = 1; 7984 if (!color) i = 0; 7985 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7986 PetscFunctionReturn(PETSC_SUCCESS); 7987 } 7988 7989 typedef enum { 7990 MATDENSE_PRIVATE = 0, 7991 MATAIJ_PRIVATE, 7992 MATBAIJ_PRIVATE, 7993 MATSBAIJ_PRIVATE 7994 } MatTypePrivate; 7995 7996 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7997 { 7998 Mat local_mat; 7999 IS is_sends_internal; 8000 PetscInt rows, cols, new_local_rows; 8001 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 8002 PetscBool ismatis, isdense, newisdense, destroy_mat; 8003 ISLocalToGlobalMapping l2gmap; 8004 PetscInt *l2gmap_indices; 8005 const PetscInt *is_indices; 8006 MatType new_local_type; 8007 /* buffers */ 8008 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 8009 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 8010 PetscInt *recv_buffer_idxs_local; 8011 PetscScalar *ptr_vals, *recv_buffer_vals; 8012 const PetscScalar *send_buffer_vals; 8013 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 8014 /* MPI */ 8015 MPI_Comm comm, comm_n; 8016 PetscSubcomm subcomm; 8017 PetscMPIInt n_sends, n_recvs, size; 8018 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 8019 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 8020 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 8021 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 8022 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 8023 8024 PetscFunctionBegin; 8025 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 8026 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 8027 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 8028 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 8029 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 8030 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 8031 PetscValidLogicalCollectiveBool(mat, reuse, 6); 8032 PetscValidLogicalCollectiveInt(mat, nis, 8); 8033 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 8034 if (nvecs) { 8035 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 8036 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 8037 } 8038 /* further checks */ 8039 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8040 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 8041 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 8042 8043 PetscCall(MatGetSize(local_mat, &rows, &cols)); 8044 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 8045 if (reuse && *mat_n) { 8046 PetscInt mrows, mcols, mnrows, mncols; 8047 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 8048 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 8049 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 8050 PetscCall(MatGetSize(mat, &mrows, &mcols)); 8051 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 8052 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 8053 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 8054 } 8055 PetscCall(MatGetBlockSize(local_mat, &bs)); 8056 PetscValidLogicalCollectiveInt(mat, bs, 1); 8057 8058 /* prepare IS for sending if not provided */ 8059 if (!is_sends) { 8060 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 8061 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 8062 } else { 8063 PetscCall(PetscObjectReference((PetscObject)is_sends)); 8064 is_sends_internal = is_sends; 8065 } 8066 8067 /* get comm */ 8068 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 8069 8070 /* compute number of sends */ 8071 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 8072 PetscCall(PetscMPIIntCast(i, &n_sends)); 8073 8074 /* compute number of receives */ 8075 PetscCallMPI(MPI_Comm_size(comm, &size)); 8076 PetscCall(PetscMalloc1(size, &iflags)); 8077 PetscCall(PetscArrayzero(iflags, size)); 8078 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 8079 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 8080 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 8081 PetscCall(PetscFree(iflags)); 8082 8083 /* restrict comm if requested */ 8084 subcomm = NULL; 8085 destroy_mat = PETSC_FALSE; 8086 if (restrict_comm) { 8087 PetscMPIInt color, subcommsize; 8088 8089 color = 0; 8090 if (restrict_full) { 8091 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 8092 } else { 8093 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 8094 } 8095 PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 8096 subcommsize = size - subcommsize; 8097 /* check if reuse has been requested */ 8098 if (reuse) { 8099 if (*mat_n) { 8100 PetscMPIInt subcommsize2; 8101 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 8102 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 8103 comm_n = PetscObjectComm((PetscObject)*mat_n); 8104 } else { 8105 comm_n = PETSC_COMM_SELF; 8106 } 8107 } else { /* MAT_INITIAL_MATRIX */ 8108 PetscMPIInt rank; 8109 8110 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 8111 PetscCall(PetscSubcommCreate(comm, &subcomm)); 8112 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 8113 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 8114 comm_n = PetscSubcommChild(subcomm); 8115 } 8116 /* flag to destroy *mat_n if not significative */ 8117 if (color) destroy_mat = PETSC_TRUE; 8118 } else { 8119 comm_n = comm; 8120 } 8121 8122 /* prepare send/receive buffers */ 8123 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 8124 PetscCall(PetscArrayzero(ilengths_idxs, size)); 8125 PetscCall(PetscMalloc1(size, &ilengths_vals)); 8126 PetscCall(PetscArrayzero(ilengths_vals, size)); 8127 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 8128 8129 /* Get data from local matrices */ 8130 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 8131 /* TODO: See below some guidelines on how to prepare the local buffers */ 8132 /* 8133 send_buffer_vals should contain the raw values of the local matrix 8134 send_buffer_idxs should contain: 8135 - MatType_PRIVATE type 8136 - PetscInt size_of_l2gmap 8137 - PetscInt global_row_indices[size_of_l2gmap] 8138 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 8139 */ 8140 { 8141 ISLocalToGlobalMapping mapping; 8142 8143 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 8144 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 8145 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 8146 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 8147 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 8148 send_buffer_idxs[1] = i; 8149 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 8150 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 8151 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 8152 PetscCall(PetscMPIIntCast(i, &len)); 8153 for (i = 0; i < n_sends; i++) { 8154 ilengths_vals[is_indices[i]] = len * len; 8155 ilengths_idxs[is_indices[i]] = len + 2; 8156 } 8157 } 8158 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 8159 /* additional is (if any) */ 8160 if (nis) { 8161 PetscMPIInt psum; 8162 PetscInt j; 8163 for (j = 0, psum = 0; j < nis; j++) { 8164 PetscInt plen; 8165 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8166 PetscCall(PetscMPIIntCast(plen, &len)); 8167 psum += len + 1; /* indices + length */ 8168 } 8169 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 8170 for (j = 0, psum = 0; j < nis; j++) { 8171 PetscInt plen; 8172 const PetscInt *is_array_idxs; 8173 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8174 send_buffer_idxs_is[psum] = plen; 8175 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 8176 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 8177 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 8178 psum += plen + 1; /* indices + length */ 8179 } 8180 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 8181 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 8182 } 8183 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8184 8185 buf_size_idxs = 0; 8186 buf_size_vals = 0; 8187 buf_size_idxs_is = 0; 8188 buf_size_vecs = 0; 8189 for (i = 0; i < n_recvs; i++) { 8190 buf_size_idxs += olengths_idxs[i]; 8191 buf_size_vals += olengths_vals[i]; 8192 if (nis) buf_size_idxs_is += olengths_idxs_is[i]; 8193 if (nvecs) buf_size_vecs += olengths_idxs[i]; 8194 } 8195 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 8196 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 8197 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 8198 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 8199 8200 /* get new tags for clean communications */ 8201 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 8202 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 8203 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 8204 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 8205 8206 /* allocate for requests */ 8207 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 8208 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 8209 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 8210 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 8211 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 8212 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 8213 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 8214 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 8215 8216 /* communications */ 8217 ptr_idxs = recv_buffer_idxs; 8218 ptr_vals = recv_buffer_vals; 8219 ptr_idxs_is = recv_buffer_idxs_is; 8220 ptr_vecs = recv_buffer_vecs; 8221 for (i = 0; i < n_recvs; i++) { 8222 PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i])); 8223 PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i])); 8224 ptr_idxs += olengths_idxs[i]; 8225 ptr_vals += olengths_vals[i]; 8226 if (nis) { 8227 PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i])); 8228 ptr_idxs_is += olengths_idxs_is[i]; 8229 } 8230 if (nvecs) { 8231 PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i])); 8232 ptr_vecs += olengths_idxs[i] - 2; 8233 } 8234 } 8235 for (i = 0; i < n_sends; i++) { 8236 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 8237 PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 8238 PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 8239 if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 8240 if (nvecs) { 8241 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8242 PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 8243 } 8244 } 8245 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 8246 PetscCall(ISDestroy(&is_sends_internal)); 8247 8248 /* assemble new l2g map */ 8249 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 8250 ptr_idxs = recv_buffer_idxs; 8251 new_local_rows = 0; 8252 for (i = 0; i < n_recvs; i++) { 8253 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8254 ptr_idxs += olengths_idxs[i]; 8255 } 8256 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 8257 ptr_idxs = recv_buffer_idxs; 8258 new_local_rows = 0; 8259 for (i = 0; i < n_recvs; i++) { 8260 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 8261 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8262 ptr_idxs += olengths_idxs[i]; 8263 } 8264 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 8265 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 8266 PetscCall(PetscFree(l2gmap_indices)); 8267 8268 /* infer new local matrix type from received local matrices type */ 8269 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 8270 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 8271 if (n_recvs) { 8272 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 8273 ptr_idxs = recv_buffer_idxs; 8274 for (i = 0; i < n_recvs; i++) { 8275 if ((PetscInt)new_local_type_private != *ptr_idxs) { 8276 new_local_type_private = MATAIJ_PRIVATE; 8277 break; 8278 } 8279 ptr_idxs += olengths_idxs[i]; 8280 } 8281 switch (new_local_type_private) { 8282 case MATDENSE_PRIVATE: 8283 new_local_type = MATSEQAIJ; 8284 bs = 1; 8285 break; 8286 case MATAIJ_PRIVATE: 8287 new_local_type = MATSEQAIJ; 8288 bs = 1; 8289 break; 8290 case MATBAIJ_PRIVATE: 8291 new_local_type = MATSEQBAIJ; 8292 break; 8293 case MATSBAIJ_PRIVATE: 8294 new_local_type = MATSEQSBAIJ; 8295 break; 8296 default: 8297 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 8298 } 8299 } else { /* by default, new_local_type is seqaij */ 8300 new_local_type = MATSEQAIJ; 8301 bs = 1; 8302 } 8303 8304 /* create MATIS object if needed */ 8305 if (!reuse) { 8306 PetscCall(MatGetSize(mat, &rows, &cols)); 8307 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8308 } else { 8309 /* it also destroys the local matrices */ 8310 if (*mat_n) { 8311 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 8312 } else { /* this is a fake object */ 8313 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8314 } 8315 } 8316 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 8317 PetscCall(MatSetType(local_mat, new_local_type)); 8318 8319 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 8320 8321 /* Global to local map of received indices */ 8322 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 8323 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 8324 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 8325 8326 /* restore attributes -> type of incoming data and its size */ 8327 buf_size_idxs = 0; 8328 for (i = 0; i < n_recvs; i++) { 8329 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 8330 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 8331 buf_size_idxs += olengths_idxs[i]; 8332 } 8333 PetscCall(PetscFree(recv_buffer_idxs)); 8334 8335 /* set preallocation */ 8336 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 8337 if (!newisdense) { 8338 PetscInt *new_local_nnz = NULL; 8339 8340 ptr_idxs = recv_buffer_idxs_local; 8341 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 8342 for (i = 0; i < n_recvs; i++) { 8343 PetscInt j; 8344 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 8345 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 8346 } else { 8347 /* TODO */ 8348 } 8349 ptr_idxs += olengths_idxs[i]; 8350 } 8351 if (new_local_nnz) { 8352 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 8353 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 8354 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 8355 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8356 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 8357 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8358 } else { 8359 PetscCall(MatSetUp(local_mat)); 8360 } 8361 PetscCall(PetscFree(new_local_nnz)); 8362 } else { 8363 PetscCall(MatSetUp(local_mat)); 8364 } 8365 8366 /* set values */ 8367 ptr_vals = recv_buffer_vals; 8368 ptr_idxs = recv_buffer_idxs_local; 8369 for (i = 0; i < n_recvs; i++) { 8370 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 8371 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 8372 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 8373 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 8374 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 8375 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 8376 } else { 8377 /* TODO */ 8378 } 8379 ptr_idxs += olengths_idxs[i]; 8380 ptr_vals += olengths_vals[i]; 8381 } 8382 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 8383 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 8384 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 8385 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 8386 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 8387 PetscCall(PetscFree(recv_buffer_vals)); 8388 8389 #if 0 8390 if (!restrict_comm) { /* check */ 8391 Vec lvec,rvec; 8392 PetscReal infty_error; 8393 8394 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 8395 PetscCall(VecSetRandom(rvec,NULL)); 8396 PetscCall(MatMult(mat,rvec,lvec)); 8397 PetscCall(VecScale(lvec,-1.0)); 8398 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 8399 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 8400 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 8401 PetscCall(VecDestroy(&rvec)); 8402 PetscCall(VecDestroy(&lvec)); 8403 } 8404 #endif 8405 8406 /* assemble new additional is (if any) */ 8407 if (nis) { 8408 PetscInt **temp_idxs, *count_is, j, psum; 8409 8410 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 8411 PetscCall(PetscCalloc1(nis, &count_is)); 8412 ptr_idxs = recv_buffer_idxs_is; 8413 psum = 0; 8414 for (i = 0; i < n_recvs; i++) { 8415 for (j = 0; j < nis; j++) { 8416 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8417 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8418 psum += plen; 8419 ptr_idxs += plen + 1; /* shift pointer to received data */ 8420 } 8421 } 8422 PetscCall(PetscMalloc1(nis, &temp_idxs)); 8423 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 8424 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 8425 PetscCall(PetscArrayzero(count_is, nis)); 8426 ptr_idxs = recv_buffer_idxs_is; 8427 for (i = 0; i < n_recvs; i++) { 8428 for (j = 0; j < nis; j++) { 8429 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8430 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 8431 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8432 ptr_idxs += plen + 1; /* shift pointer to received data */ 8433 } 8434 } 8435 for (i = 0; i < nis; i++) { 8436 PetscCall(ISDestroy(&isarray[i])); 8437 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 8438 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 8439 } 8440 PetscCall(PetscFree(count_is)); 8441 PetscCall(PetscFree(temp_idxs[0])); 8442 PetscCall(PetscFree(temp_idxs)); 8443 } 8444 /* free workspace */ 8445 PetscCall(PetscFree(recv_buffer_idxs_is)); 8446 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 8447 PetscCall(PetscFree(send_buffer_idxs)); 8448 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 8449 if (isdense) { 8450 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8451 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 8452 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8453 } else { 8454 /* PetscCall(PetscFree(send_buffer_vals)); */ 8455 } 8456 if (nis) { 8457 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 8458 PetscCall(PetscFree(send_buffer_idxs_is)); 8459 } 8460 8461 if (nvecs) { 8462 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 8463 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 8464 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8465 PetscCall(VecDestroy(&nnsp_vec[0])); 8466 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 8467 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 8468 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 8469 /* set values */ 8470 ptr_vals = recv_buffer_vecs; 8471 ptr_idxs = recv_buffer_idxs_local; 8472 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8473 for (i = 0; i < n_recvs; i++) { 8474 PetscInt j; 8475 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 8476 ptr_idxs += olengths_idxs[i]; 8477 ptr_vals += olengths_idxs[i] - 2; 8478 } 8479 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8480 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8481 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8482 } 8483 8484 PetscCall(PetscFree(recv_buffer_vecs)); 8485 PetscCall(PetscFree(recv_buffer_idxs_local)); 8486 PetscCall(PetscFree(recv_req_idxs)); 8487 PetscCall(PetscFree(recv_req_vals)); 8488 PetscCall(PetscFree(recv_req_vecs)); 8489 PetscCall(PetscFree(recv_req_idxs_is)); 8490 PetscCall(PetscFree(send_req_idxs)); 8491 PetscCall(PetscFree(send_req_vals)); 8492 PetscCall(PetscFree(send_req_vecs)); 8493 PetscCall(PetscFree(send_req_idxs_is)); 8494 PetscCall(PetscFree(ilengths_vals)); 8495 PetscCall(PetscFree(ilengths_idxs)); 8496 PetscCall(PetscFree(olengths_vals)); 8497 PetscCall(PetscFree(olengths_idxs)); 8498 PetscCall(PetscFree(onodes)); 8499 if (nis) { 8500 PetscCall(PetscFree(ilengths_idxs_is)); 8501 PetscCall(PetscFree(olengths_idxs_is)); 8502 PetscCall(PetscFree(onodes_is)); 8503 } 8504 PetscCall(PetscSubcommDestroy(&subcomm)); 8505 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8506 PetscCall(MatDestroy(mat_n)); 8507 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 8508 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8509 PetscCall(VecDestroy(&nnsp_vec[0])); 8510 } 8511 *mat_n = NULL; 8512 } 8513 PetscFunctionReturn(PETSC_SUCCESS); 8514 } 8515 8516 /* temporary hack into ksp private data structure */ 8517 #include <petsc/private/kspimpl.h> 8518 8519 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat) 8520 { 8521 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8522 PC_IS *pcis = (PC_IS *)pc->data; 8523 PCBDDCGraph graph = pcbddc->mat_graph; 8524 Mat coarse_mat, coarse_mat_is; 8525 Mat coarsedivudotp = NULL; 8526 Mat coarseG, t_coarse_mat_is; 8527 MatNullSpace CoarseNullSpace = NULL; 8528 ISLocalToGlobalMapping coarse_islg; 8529 IS coarse_is, *isarray, corners; 8530 PetscInt i, im_active = -1, active_procs = -1; 8531 PetscInt nis, nisdofs, nisneu, nisvert; 8532 PetscInt coarse_eqs_per_proc, coarsening_ratio; 8533 PC pc_temp; 8534 PCType coarse_pc_type; 8535 KSPType coarse_ksp_type; 8536 PetscBool multilevel_requested, multilevel_allowed; 8537 PetscBool coarse_reuse, multi_element = graph->multi_element; 8538 PetscInt ncoarse, nedcfield; 8539 PetscBool compute_vecs = PETSC_FALSE; 8540 PetscScalar *array; 8541 MatReuse coarse_mat_reuse; 8542 PetscBool restr, full_restr, have_void; 8543 PetscMPIInt size; 8544 8545 PetscFunctionBegin; 8546 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8547 /* Assign global numbering to coarse dofs */ 8548 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8549 PetscInt ocoarse_size; 8550 compute_vecs = PETSC_TRUE; 8551 8552 pcbddc->new_primal_space = PETSC_TRUE; 8553 ocoarse_size = pcbddc->coarse_size; 8554 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8555 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 8556 /* see if we can avoid some work */ 8557 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8558 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8559 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8560 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8561 coarse_reuse = PETSC_FALSE; 8562 } else { /* we can safely reuse already computed coarse matrix */ 8563 coarse_reuse = PETSC_TRUE; 8564 } 8565 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8566 coarse_reuse = PETSC_FALSE; 8567 } 8568 /* reset any subassembling information */ 8569 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8570 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8571 coarse_reuse = PETSC_TRUE; 8572 } 8573 if (coarse_reuse && pcbddc->coarse_ksp) { 8574 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 8575 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8576 coarse_mat_reuse = MAT_REUSE_MATRIX; 8577 } else { 8578 coarse_mat = NULL; 8579 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8580 } 8581 8582 /* creates temporary l2gmap and IS for coarse indexes */ 8583 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 8584 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 8585 8586 /* creates temporary MATIS object for coarse matrix */ 8587 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is)); 8588 PetscCall(MatSetType(t_coarse_mat_is, MATIS)); 8589 PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size)); 8590 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, multi_element)); 8591 PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg)); 8592 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat)); 8593 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8594 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8595 PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view")); 8596 8597 /* count "active" (i.e. with positive local size) and "void" processes */ 8598 im_active = !!pcis->n; 8599 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8600 8601 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8602 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8603 /* full_restr : just use the receivers from the subassembling pattern */ 8604 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 8605 coarse_mat_is = NULL; 8606 multilevel_allowed = PETSC_FALSE; 8607 multilevel_requested = PETSC_FALSE; 8608 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 8609 if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1); 8610 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8611 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8612 coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio; 8613 if (multilevel_requested) { 8614 ncoarse = active_procs / coarsening_ratio; 8615 restr = PETSC_FALSE; 8616 full_restr = PETSC_FALSE; 8617 } else { 8618 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 8619 restr = PETSC_TRUE; 8620 full_restr = PETSC_TRUE; 8621 } 8622 if (!pcbddc->coarse_size || (size == 1 && !multi_element)) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8623 ncoarse = PetscMax(1, ncoarse); 8624 if (!pcbddc->coarse_subassembling) { 8625 if (coarsening_ratio > 1) { 8626 if (multilevel_requested) { 8627 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8628 } else { 8629 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8630 } 8631 } else { 8632 PetscMPIInt rank; 8633 8634 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 8635 have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE; 8636 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 8637 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_subassembling, "default subassembling")); 8638 } 8639 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8640 PetscInt psum; 8641 if (pcbddc->coarse_ksp) psum = 1; 8642 else psum = 0; 8643 PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8644 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8645 } 8646 /* determine if we can go multilevel */ 8647 if (multilevel_requested) { 8648 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8649 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8650 } 8651 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8652 8653 /* dump subassembling pattern */ 8654 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 8655 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8656 nedcfield = -1; 8657 corners = NULL; 8658 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8659 PetscInt *tidxs, *tidxs2, nout, tsize, i; 8660 const PetscInt *idxs; 8661 ISLocalToGlobalMapping tmap; 8662 8663 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8664 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 8665 /* allocate space for temporary storage */ 8666 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 8667 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 8668 /* allocate for IS array */ 8669 nisdofs = pcbddc->n_ISForDofsLocal; 8670 if (pcbddc->nedclocal) { 8671 if (pcbddc->nedfield > -1) { 8672 nedcfield = pcbddc->nedfield; 8673 } else { 8674 nedcfield = 0; 8675 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 8676 nisdofs = 1; 8677 } 8678 } 8679 nisneu = !!pcbddc->NeumannBoundariesLocal; 8680 nisvert = 0; /* nisvert is not used */ 8681 nis = nisdofs + nisneu + nisvert; 8682 PetscCall(PetscMalloc1(nis, &isarray)); 8683 /* dofs splitting */ 8684 for (i = 0; i < nisdofs; i++) { 8685 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8686 if (nedcfield != i) { 8687 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 8688 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8689 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8690 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8691 } else { 8692 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8693 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8694 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8695 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8696 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8697 } 8698 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8699 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8700 /* PetscCall(ISView(isarray[i],0)); */ 8701 } 8702 /* neumann boundaries */ 8703 if (pcbddc->NeumannBoundariesLocal) { 8704 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8705 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8706 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8707 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8708 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8709 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8710 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8711 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8712 } 8713 /* coordinates */ 8714 if (pcbddc->corner_selected) { 8715 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8716 PetscCall(ISGetLocalSize(corners, &tsize)); 8717 PetscCall(ISGetIndices(corners, &idxs)); 8718 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8719 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8720 PetscCall(ISRestoreIndices(corners, &idxs)); 8721 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8722 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8723 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8724 } 8725 PetscCall(PetscFree(tidxs)); 8726 PetscCall(PetscFree(tidxs2)); 8727 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8728 } else { 8729 nis = 0; 8730 nisdofs = 0; 8731 nisneu = 0; 8732 nisvert = 0; 8733 isarray = NULL; 8734 } 8735 /* destroy no longer needed map */ 8736 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8737 8738 /* subassemble */ 8739 if (multilevel_allowed) { 8740 Vec vp[1]; 8741 PetscInt nvecs = 0; 8742 PetscBool reuse; 8743 8744 vp[0] = NULL; 8745 /* XXX HDIV also */ 8746 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8747 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8748 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8749 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8750 nvecs = 1; 8751 8752 if (pcbddc->divudotp) { 8753 Mat B, loc_divudotp; 8754 Vec v, p; 8755 IS dummy; 8756 PetscInt np; 8757 8758 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8759 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8760 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8761 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8762 PetscCall(MatCreateVecs(B, &v, &p)); 8763 PetscCall(VecSet(p, 1.)); 8764 PetscCall(MatMultTranspose(B, p, v)); 8765 PetscCall(VecDestroy(&p)); 8766 PetscCall(MatDestroy(&B)); 8767 PetscCall(VecGetArray(vp[0], &array)); 8768 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8769 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8770 PetscCall(VecResetArray(pcbddc->vec1_P)); 8771 PetscCall(VecRestoreArray(vp[0], &array)); 8772 PetscCall(ISDestroy(&dummy)); 8773 PetscCall(VecDestroy(&v)); 8774 } 8775 } 8776 if (coarse_mat) reuse = PETSC_TRUE; 8777 else reuse = PETSC_FALSE; 8778 if (multi_element) { 8779 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8780 coarse_mat_is = t_coarse_mat_is; 8781 } else { 8782 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8783 if (reuse) { 8784 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8785 } else { 8786 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8787 } 8788 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8789 PetscScalar *arraym; 8790 const PetscScalar *arrayv; 8791 PetscInt nl; 8792 PetscCall(VecGetLocalSize(vp[0], &nl)); 8793 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8794 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8795 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8796 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8797 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8798 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8799 PetscCall(VecDestroy(&vp[0])); 8800 } else { 8801 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8802 } 8803 } 8804 } else { 8805 PetscBool default_sub; 8806 8807 PetscCall(PetscStrcmp(((PetscObject)pcbddc->coarse_subassembling)->name, "default subassembling", &default_sub)); 8808 if (!default_sub) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8809 else { 8810 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8811 coarse_mat_is = t_coarse_mat_is; 8812 } 8813 } 8814 if (coarse_mat_is || coarse_mat) { 8815 if (!multilevel_allowed) { 8816 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8817 } else { 8818 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8819 if (coarse_mat_is) { 8820 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8821 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8822 coarse_mat = coarse_mat_is; 8823 } 8824 } 8825 } 8826 PetscCall(MatDestroy(&t_coarse_mat_is)); 8827 PetscCall(MatDestroy(&coarse_mat_is)); 8828 8829 /* create local to global scatters for coarse problem */ 8830 if (compute_vecs) { 8831 PetscInt lrows; 8832 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8833 if (coarse_mat) { 8834 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8835 } else { 8836 lrows = 0; 8837 } 8838 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8839 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8840 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8841 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8842 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8843 } 8844 PetscCall(ISDestroy(&coarse_is)); 8845 8846 /* set defaults for coarse KSP and PC */ 8847 if (multilevel_allowed) { 8848 coarse_ksp_type = KSPRICHARDSON; 8849 coarse_pc_type = PCBDDC; 8850 } else { 8851 coarse_ksp_type = KSPPREONLY; 8852 coarse_pc_type = PCREDUNDANT; 8853 } 8854 8855 /* print some info if requested */ 8856 if (pcbddc->dbg_flag) { 8857 if (!multilevel_allowed) { 8858 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8859 if (multilevel_requested) { 8860 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio)); 8861 } else if (pcbddc->max_levels) { 8862 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8863 } 8864 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8865 } 8866 } 8867 8868 /* communicate coarse discrete gradient */ 8869 coarseG = NULL; 8870 if (pcbddc->nedcG && multilevel_allowed) { 8871 MPI_Comm ccomm; 8872 if (coarse_mat) { 8873 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8874 } else { 8875 ccomm = MPI_COMM_NULL; 8876 } 8877 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8878 } 8879 8880 /* create the coarse KSP object only once with defaults */ 8881 if (coarse_mat) { 8882 PetscBool isredundant, isbddc, force, valid; 8883 PetscViewer dbg_viewer = NULL; 8884 PetscBool isset, issym, isher, isspd; 8885 8886 if (pcbddc->dbg_flag) { 8887 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8888 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8889 } 8890 if (!pcbddc->coarse_ksp) { 8891 char prefix[256], str_level[16]; 8892 size_t len; 8893 8894 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8895 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8896 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8897 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8898 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1)); 8899 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8900 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8901 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8902 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8903 /* TODO is this logic correct? should check for coarse_mat type */ 8904 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8905 /* prefix */ 8906 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8907 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8908 if (!pcbddc->current_level) { 8909 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8910 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8911 } else { 8912 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8913 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8914 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8915 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8916 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8917 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 8918 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8919 } 8920 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8921 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8922 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8923 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8924 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8925 /* allow user customization */ 8926 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8927 /* get some info after set from options */ 8928 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8929 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8930 force = PETSC_FALSE; 8931 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8932 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8933 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8934 if (multilevel_allowed && !force && !valid) { 8935 isbddc = PETSC_TRUE; 8936 PetscCall(PCSetType(pc_temp, PCBDDC)); 8937 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8938 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8939 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8940 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8941 PetscObjectOptionsBegin((PetscObject)pc_temp); 8942 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8943 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8944 PetscOptionsEnd(); 8945 pc_temp->setfromoptionscalled++; 8946 } 8947 } 8948 } 8949 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8950 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8951 if (nisdofs) { 8952 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8953 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8954 } 8955 if (nisneu) { 8956 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8957 PetscCall(ISDestroy(&isarray[nisdofs])); 8958 } 8959 if (nisvert) { 8960 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8961 PetscCall(ISDestroy(&isarray[nis - 1])); 8962 } 8963 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8964 8965 /* get some info after set from options */ 8966 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8967 8968 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8969 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8970 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8971 force = PETSC_FALSE; 8972 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8973 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8974 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8975 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8976 if (isredundant) { 8977 KSP inner_ksp; 8978 PC inner_pc; 8979 8980 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8981 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8982 } 8983 8984 /* parameters which miss an API */ 8985 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8986 if (isbddc) { 8987 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8988 8989 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8990 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8991 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8992 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8993 if (pcbddc_coarse->benign_saddle_point) { 8994 Mat coarsedivudotp_is; 8995 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8996 IS row, col; 8997 const PetscInt *gidxs; 8998 PetscInt n, st, M, N; 8999 9000 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 9001 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 9002 st = st - n; 9003 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 9004 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 9005 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 9006 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 9007 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 9008 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 9009 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 9010 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 9011 PetscCall(ISGetSize(row, &M)); 9012 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 9013 PetscCall(ISDestroy(&row)); 9014 PetscCall(ISDestroy(&col)); 9015 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 9016 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 9017 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 9018 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 9019 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 9020 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 9021 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 9022 PetscCall(MatDestroy(&coarsedivudotp)); 9023 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 9024 PetscCall(MatDestroy(&coarsedivudotp_is)); 9025 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 9026 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 9027 } 9028 } 9029 9030 /* propagate symmetry info of coarse matrix */ 9031 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 9032 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 9033 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 9034 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 9035 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 9036 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 9037 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 9038 9039 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 9040 /* set operators */ 9041 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 9042 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 9043 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 9044 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 9045 } 9046 PetscCall(MatDestroy(&coarseG)); 9047 PetscCall(PetscFree(isarray)); 9048 #if 0 9049 { 9050 PetscViewer viewer; 9051 char filename[256]; 9052 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 9053 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 9054 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 9055 PetscCall(MatView(coarse_mat,viewer)); 9056 PetscCall(PetscViewerPopFormat(viewer)); 9057 PetscCall(PetscViewerDestroy(&viewer)); 9058 } 9059 #endif 9060 9061 if (corners) { 9062 Vec gv; 9063 IS is; 9064 const PetscInt *idxs; 9065 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 9066 PetscScalar *coords; 9067 9068 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 9069 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 9070 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 9071 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 9072 PetscCall(VecSetBlockSize(gv, cdim)); 9073 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 9074 PetscCall(VecSetType(gv, VECSTANDARD)); 9075 PetscCall(VecSetFromOptions(gv)); 9076 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 9077 9078 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 9079 PetscCall(ISGetLocalSize(is, &n)); 9080 PetscCall(ISGetIndices(is, &idxs)); 9081 PetscCall(PetscMalloc1(n * cdim, &coords)); 9082 for (i = 0; i < n; i++) { 9083 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 9084 } 9085 PetscCall(ISRestoreIndices(is, &idxs)); 9086 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 9087 9088 PetscCall(ISGetLocalSize(corners, &n)); 9089 PetscCall(ISGetIndices(corners, &idxs)); 9090 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 9091 PetscCall(ISRestoreIndices(corners, &idxs)); 9092 PetscCall(PetscFree(coords)); 9093 PetscCall(VecAssemblyBegin(gv)); 9094 PetscCall(VecAssemblyEnd(gv)); 9095 PetscCall(VecGetArray(gv, &coords)); 9096 if (pcbddc->coarse_ksp) { 9097 PC coarse_pc; 9098 PetscBool isbddc; 9099 9100 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 9101 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 9102 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 9103 PetscReal *realcoords; 9104 9105 PetscCall(VecGetLocalSize(gv, &n)); 9106 #if defined(PETSC_USE_COMPLEX) 9107 PetscCall(PetscMalloc1(n, &realcoords)); 9108 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 9109 #else 9110 realcoords = coords; 9111 #endif 9112 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 9113 #if defined(PETSC_USE_COMPLEX) 9114 PetscCall(PetscFree(realcoords)); 9115 #endif 9116 } 9117 } 9118 PetscCall(VecRestoreArray(gv, &coords)); 9119 PetscCall(VecDestroy(&gv)); 9120 } 9121 PetscCall(ISDestroy(&corners)); 9122 9123 if (pcbddc->coarse_ksp) { 9124 Vec crhs, csol; 9125 9126 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 9127 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 9128 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL)); 9129 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs)); 9130 } 9131 PetscCall(MatDestroy(&coarsedivudotp)); 9132 9133 /* compute null space for coarse solver if the benign trick has been requested */ 9134 if (pcbddc->benign_null) { 9135 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 9136 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 9137 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 9138 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 9139 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9140 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9141 if (coarse_mat) { 9142 Vec nullv; 9143 PetscScalar *array, *array2; 9144 PetscInt nl; 9145 9146 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 9147 PetscCall(VecGetLocalSize(nullv, &nl)); 9148 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9149 PetscCall(VecGetArray(nullv, &array2)); 9150 PetscCall(PetscArraycpy(array2, array, nl)); 9151 PetscCall(VecRestoreArray(nullv, &array2)); 9152 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9153 PetscCall(VecNormalize(nullv, NULL)); 9154 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 9155 PetscCall(VecDestroy(&nullv)); 9156 } 9157 } 9158 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 9159 9160 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9161 if (pcbddc->coarse_ksp) { 9162 PetscBool ispreonly; 9163 9164 if (CoarseNullSpace) { 9165 PetscBool isnull; 9166 9167 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 9168 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 9169 /* TODO: add local nullspaces (if any) */ 9170 } 9171 /* setup coarse ksp */ 9172 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 9173 /* Check coarse problem if in debug mode or if solving with an iterative method */ 9174 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 9175 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 9176 KSP check_ksp; 9177 KSPType check_ksp_type; 9178 PC check_pc; 9179 Vec check_vec, coarse_vec; 9180 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 9181 PetscInt its; 9182 PetscBool compute_eigs; 9183 PetscReal *eigs_r, *eigs_c; 9184 PetscInt neigs; 9185 const char *prefix; 9186 9187 /* Create ksp object suitable for estimation of extreme eigenvalues */ 9188 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 9189 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 9190 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 9191 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 9192 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 9193 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size)); 9194 /* prevent from setup unneeded object */ 9195 PetscCall(KSPGetPC(check_ksp, &check_pc)); 9196 PetscCall(PCSetType(check_pc, PCNONE)); 9197 if (ispreonly) { 9198 check_ksp_type = KSPPREONLY; 9199 compute_eigs = PETSC_FALSE; 9200 } else { 9201 check_ksp_type = KSPGMRES; 9202 compute_eigs = PETSC_TRUE; 9203 } 9204 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 9205 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 9206 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 9207 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 9208 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 9209 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 9210 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 9211 PetscCall(KSPSetFromOptions(check_ksp)); 9212 PetscCall(KSPSetUp(check_ksp)); 9213 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 9214 PetscCall(KSPSetPC(check_ksp, check_pc)); 9215 /* create random vec */ 9216 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 9217 PetscCall(VecSetRandom(check_vec, NULL)); 9218 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9219 /* solve coarse problem */ 9220 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 9221 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 9222 /* set eigenvalue estimation if preonly has not been requested */ 9223 if (compute_eigs) { 9224 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 9225 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 9226 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 9227 if (neigs) { 9228 lambda_max = eigs_r[neigs - 1]; 9229 lambda_min = eigs_r[0]; 9230 if (pcbddc->use_coarse_estimates) { 9231 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 9232 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 9233 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 9234 } 9235 } 9236 } 9237 } 9238 9239 /* check coarse problem residual error */ 9240 if (pcbddc->dbg_flag) { 9241 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 9242 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9243 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 9244 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 9245 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9246 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 9247 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 9248 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer)); 9249 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer)); 9250 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 9251 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 9252 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 9253 if (compute_eigs) { 9254 PetscReal lambda_max_s, lambda_min_s; 9255 KSPConvergedReason reason; 9256 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 9257 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 9258 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 9259 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 9260 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 9261 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 9262 } 9263 PetscCall(PetscViewerFlush(dbg_viewer)); 9264 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9265 } 9266 PetscCall(VecDestroy(&check_vec)); 9267 PetscCall(VecDestroy(&coarse_vec)); 9268 PetscCall(KSPDestroy(&check_ksp)); 9269 if (compute_eigs) { 9270 PetscCall(PetscFree(eigs_r)); 9271 PetscCall(PetscFree(eigs_c)); 9272 } 9273 } 9274 } 9275 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 9276 /* print additional info */ 9277 if (pcbddc->dbg_flag) { 9278 /* waits until all processes reaches this point */ 9279 PetscCall(PetscBarrier((PetscObject)pc)); 9280 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 9281 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9282 } 9283 9284 /* free memory */ 9285 PetscCall(MatDestroy(&coarse_mat)); 9286 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9287 PetscFunctionReturn(PETSC_SUCCESS); 9288 } 9289 9290 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 9291 { 9292 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9293 PC_IS *pcis = (PC_IS *)pc->data; 9294 IS subset, subset_mult, subset_n; 9295 PetscInt local_size, coarse_size = 0; 9296 PetscInt *local_primal_indices = NULL; 9297 const PetscInt *t_local_primal_indices; 9298 9299 PetscFunctionBegin; 9300 /* Compute global number of coarse dofs */ 9301 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 9302 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 9303 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 9304 PetscCall(ISDestroy(&subset_n)); 9305 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 9306 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 9307 PetscCall(ISDestroy(&subset)); 9308 PetscCall(ISDestroy(&subset_mult)); 9309 PetscCall(ISGetLocalSize(subset_n, &local_size)); 9310 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 9311 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 9312 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 9313 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 9314 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 9315 PetscCall(ISDestroy(&subset_n)); 9316 9317 if (pcbddc->dbg_flag) { 9318 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9319 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 9320 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size)); 9321 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9322 } 9323 9324 /* get back data */ 9325 *coarse_size_n = coarse_size; 9326 *local_primal_indices_n = local_primal_indices; 9327 PetscFunctionReturn(PETSC_SUCCESS); 9328 } 9329 9330 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 9331 { 9332 IS localis_t; 9333 PetscInt i, lsize, *idxs, n; 9334 PetscScalar *vals; 9335 9336 PetscFunctionBegin; 9337 /* get indices in local ordering exploiting local to global map */ 9338 PetscCall(ISGetLocalSize(globalis, &lsize)); 9339 PetscCall(PetscMalloc1(lsize, &vals)); 9340 for (i = 0; i < lsize; i++) vals[i] = 1.0; 9341 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 9342 PetscCall(VecSet(gwork, 0.0)); 9343 PetscCall(VecSet(lwork, 0.0)); 9344 if (idxs) { /* multilevel guard */ 9345 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 9346 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 9347 } 9348 PetscCall(VecAssemblyBegin(gwork)); 9349 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 9350 PetscCall(PetscFree(vals)); 9351 PetscCall(VecAssemblyEnd(gwork)); 9352 /* now compute set in local ordering */ 9353 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9354 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9355 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 9356 PetscCall(VecGetSize(lwork, &n)); 9357 for (i = 0, lsize = 0; i < n; i++) { 9358 if (PetscRealPart(vals[i]) > 0.5) lsize++; 9359 } 9360 PetscCall(PetscMalloc1(lsize, &idxs)); 9361 for (i = 0, lsize = 0; i < n; i++) { 9362 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 9363 } 9364 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 9365 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 9366 *localis = localis_t; 9367 PetscFunctionReturn(PETSC_SUCCESS); 9368 } 9369 9370 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 9371 { 9372 PC_IS *pcis = (PC_IS *)pc->data; 9373 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9374 PC_IS *pcisf; 9375 PC_BDDC *pcbddcf; 9376 PC pcf; 9377 9378 PetscFunctionBegin; 9379 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 9380 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 9381 PetscCall(PCSetType(pcf, PCBDDC)); 9382 9383 pcisf = (PC_IS *)pcf->data; 9384 pcbddcf = (PC_BDDC *)pcf->data; 9385 9386 pcisf->is_B_local = pcis->is_B_local; 9387 pcisf->vec1_N = pcis->vec1_N; 9388 pcisf->BtoNmap = pcis->BtoNmap; 9389 pcisf->n = pcis->n; 9390 pcisf->n_B = pcis->n_B; 9391 9392 PetscCall(PetscFree(pcbddcf->mat_graph)); 9393 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9394 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 9395 pcbddcf->sub_schurs = schurs; 9396 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9397 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9398 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9399 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9400 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9401 pcbddcf->use_faces = PETSC_TRUE; 9402 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9403 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9404 pcbddcf->use_qr_single = (PetscBool)!constraints; 9405 pcbddcf->fake_change = PETSC_TRUE; 9406 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9407 9408 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9409 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9410 9411 *change = pcbddcf->ConstraintMatrix; 9412 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 9413 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 9414 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9415 9416 if (schurs) pcbddcf->sub_schurs = NULL; 9417 pcbddcf->ConstraintMatrix = NULL; 9418 pcbddcf->mat_graph = NULL; 9419 pcisf->is_B_local = NULL; 9420 pcisf->vec1_N = NULL; 9421 pcisf->BtoNmap = NULL; 9422 PetscCall(PCDestroy(&pcf)); 9423 PetscFunctionReturn(PETSC_SUCCESS); 9424 } 9425 9426 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9427 { 9428 PC_IS *pcis = (PC_IS *)pc->data; 9429 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9430 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 9431 Mat S_j; 9432 PetscInt *used_xadj, *used_adjncy; 9433 PetscBool free_used_adj; 9434 9435 PetscFunctionBegin; 9436 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9437 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9438 free_used_adj = PETSC_FALSE; 9439 if (pcbddc->sub_schurs_layers == -1) { 9440 used_xadj = NULL; 9441 used_adjncy = NULL; 9442 } else { 9443 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9444 used_xadj = pcbddc->mat_graph->xadj; 9445 used_adjncy = pcbddc->mat_graph->adjncy; 9446 } else if (pcbddc->computed_rowadj) { 9447 used_xadj = pcbddc->mat_graph->xadj; 9448 used_adjncy = pcbddc->mat_graph->adjncy; 9449 } else { 9450 PetscBool flg_row = PETSC_FALSE; 9451 const PetscInt *xadj, *adjncy; 9452 PetscInt nvtxs; 9453 9454 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9455 if (flg_row) { 9456 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 9457 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 9458 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 9459 free_used_adj = PETSC_TRUE; 9460 } else { 9461 pcbddc->sub_schurs_layers = -1; 9462 used_xadj = NULL; 9463 used_adjncy = NULL; 9464 } 9465 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9466 } 9467 } 9468 9469 /* setup sub_schurs data */ 9470 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 9471 if (!sub_schurs->schur_explicit) { 9472 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9473 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 9474 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 9475 } else { 9476 Mat change = NULL; 9477 Vec scaling = NULL; 9478 IS change_primal = NULL, iP; 9479 PetscInt benign_n; 9480 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9481 PetscBool need_change = PETSC_FALSE; 9482 PetscBool discrete_harmonic = PETSC_FALSE; 9483 9484 if (!pcbddc->use_vertices && reuse_solvers) { 9485 PetscInt n_vertices; 9486 9487 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 9488 reuse_solvers = (PetscBool)!n_vertices; 9489 } 9490 if (!pcbddc->benign_change_explicit) { 9491 benign_n = pcbddc->benign_n; 9492 } else { 9493 benign_n = 0; 9494 } 9495 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9496 We need a global reduction to avoid possible deadlocks. 9497 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9498 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9499 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9500 PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 9501 need_change = (PetscBool)(!need_change); 9502 } 9503 /* If the user defines additional constraints, we import them here */ 9504 if (need_change) { 9505 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 9506 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 9507 } 9508 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9509 9510 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 9511 if (iP) { 9512 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 9513 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 9514 PetscOptionsEnd(); 9515 } 9516 if (discrete_harmonic) { 9517 Mat A; 9518 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 9519 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 9520 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 9521 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 9522 pcbddc->benign_zerodiag_subs, change, change_primal)); 9523 PetscCall(MatDestroy(&A)); 9524 } else { 9525 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 9526 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 9527 } 9528 PetscCall(MatDestroy(&change)); 9529 PetscCall(ISDestroy(&change_primal)); 9530 } 9531 PetscCall(MatDestroy(&S_j)); 9532 9533 /* free adjacency */ 9534 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 9535 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9536 PetscFunctionReturn(PETSC_SUCCESS); 9537 } 9538 9539 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9540 { 9541 PC_IS *pcis = (PC_IS *)pc->data; 9542 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9543 PCBDDCGraph graph; 9544 9545 PetscFunctionBegin; 9546 /* attach interface graph for determining subsets */ 9547 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9548 IS verticesIS, verticescomm; 9549 PetscInt vsize, *idxs; 9550 9551 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9552 PetscCall(ISGetSize(verticesIS, &vsize)); 9553 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 9554 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 9555 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 9556 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9557 PetscCall(PCBDDCGraphCreate(&graph)); 9558 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 9559 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 9560 PetscCall(ISDestroy(&verticescomm)); 9561 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9562 } else { 9563 graph = pcbddc->mat_graph; 9564 } 9565 /* print some info */ 9566 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9567 IS vertices; 9568 PetscInt nv, nedges, nfaces; 9569 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 9570 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9571 PetscCall(ISGetSize(vertices, &nv)); 9572 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9573 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 9574 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 9575 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 9576 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 9577 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9578 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9579 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9580 } 9581 9582 /* sub_schurs init */ 9583 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9584 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 9585 9586 /* free graph struct */ 9587 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 9588 PetscFunctionReturn(PETSC_SUCCESS); 9589 } 9590 9591 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer) 9592 { 9593 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 9594 PetscInt n = pc->pmat->rmap->n, ln, ni, st; 9595 const PetscInt *idxs; 9596 IS gis; 9597 9598 PetscFunctionBegin; 9599 if (!is) PetscFunctionReturn(PETSC_SUCCESS); 9600 PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL)); 9601 PetscCall(MatGetLocalSize(matis->A, NULL, &ln)); 9602 PetscCall(PetscArrayzero(matis->sf_leafdata, ln)); 9603 PetscCall(PetscArrayzero(matis->sf_rootdata, n)); 9604 PetscCall(ISGetLocalSize(is, &ni)); 9605 PetscCall(ISGetIndices(is, &idxs)); 9606 for (PetscInt i = 0; i < ni; i++) { 9607 if (idxs[i] < 0 || idxs[i] >= ln) continue; 9608 matis->sf_leafdata[idxs[i]] = 1; 9609 } 9610 PetscCall(ISRestoreIndices(is, &idxs)); 9611 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9612 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9613 ln = 0; 9614 for (PetscInt i = 0; i < n; i++) { 9615 if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st; 9616 } 9617 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis)); 9618 PetscCall(ISView(gis, viewer)); 9619 PetscCall(ISDestroy(&gis)); 9620 PetscFunctionReturn(PETSC_SUCCESS); 9621 } 9622 9623 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile) 9624 { 9625 PetscInt header[11]; 9626 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9627 PetscViewer viewer; 9628 MPI_Comm comm = PetscObjectComm((PetscObject)pc); 9629 9630 PetscFunctionBegin; 9631 PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer)); 9632 if (load) { 9633 IS is; 9634 Mat A; 9635 9636 PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT)); 9637 PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9638 PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9639 PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9640 PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9641 PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9642 PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9643 PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9644 PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9645 PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9646 PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9647 if (header[0]) { 9648 PetscCall(ISCreate(comm, &is)); 9649 PetscCall(ISLoad(is, viewer)); 9650 PetscCall(PCBDDCSetDirichletBoundaries(pc, is)); 9651 PetscCall(ISDestroy(&is)); 9652 } 9653 if (header[1]) { 9654 PetscCall(ISCreate(comm, &is)); 9655 PetscCall(ISLoad(is, viewer)); 9656 PetscCall(PCBDDCSetNeumannBoundaries(pc, is)); 9657 PetscCall(ISDestroy(&is)); 9658 } 9659 if (header[2]) { 9660 IS *isarray; 9661 9662 PetscCall(PetscMalloc1(header[2], &isarray)); 9663 for (PetscInt i = 0; i < header[2]; i++) { 9664 PetscCall(ISCreate(comm, &isarray[i])); 9665 PetscCall(ISLoad(isarray[i], viewer)); 9666 } 9667 PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray)); 9668 for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i])); 9669 PetscCall(PetscFree(isarray)); 9670 } 9671 if (header[3]) { 9672 PetscCall(ISCreate(comm, &is)); 9673 PetscCall(ISLoad(is, viewer)); 9674 PetscCall(PCBDDCSetPrimalVerticesIS(pc, is)); 9675 PetscCall(ISDestroy(&is)); 9676 } 9677 if (header[4]) { 9678 PetscCall(MatCreate(comm, &A)); 9679 PetscCall(MatSetType(A, MATAIJ)); 9680 PetscCall(MatLoad(A, viewer)); 9681 PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8])); 9682 PetscCall(MatDestroy(&A)); 9683 } 9684 if (header[9]) { 9685 PetscCall(MatCreate(comm, &A)); 9686 PetscCall(MatSetType(A, MATIS)); 9687 PetscCall(MatLoad(A, viewer)); 9688 PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL)); 9689 PetscCall(MatDestroy(&A)); 9690 } 9691 } else { 9692 header[0] = (PetscInt)!!pcbddc->DirichletBoundariesLocal; 9693 header[1] = (PetscInt)!!pcbddc->NeumannBoundariesLocal; 9694 header[2] = pcbddc->n_ISForDofsLocal; 9695 header[3] = (PetscInt)!!pcbddc->user_primal_vertices_local; 9696 header[4] = (PetscInt)!!pcbddc->discretegradient; 9697 header[5] = pcbddc->nedorder; 9698 header[6] = pcbddc->nedfield; 9699 header[7] = (PetscInt)pcbddc->nedglobal; 9700 header[8] = (PetscInt)pcbddc->conforming; 9701 header[9] = (PetscInt)!!pcbddc->divudotp; 9702 header[10] = (PetscInt)pcbddc->divudotp_trans; 9703 if (header[4]) header[3] = 0; 9704 9705 PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT)); 9706 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer)); 9707 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer)); 9708 for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer)); 9709 if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer)); 9710 if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer)); 9711 if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer)); 9712 } 9713 PetscCall(PetscViewerDestroy(&viewer)); 9714 PetscFunctionReturn(PETSC_SUCCESS); 9715 } 9716 9717 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9718 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9719 { 9720 Mat At; 9721 IS rows; 9722 PetscInt rst, ren; 9723 PetscLayout rmap; 9724 9725 PetscFunctionBegin; 9726 rst = ren = 0; 9727 if (ccomm != MPI_COMM_NULL) { 9728 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9729 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9730 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9731 PetscCall(PetscLayoutSetUp(rmap)); 9732 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9733 } 9734 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9735 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9736 PetscCall(ISDestroy(&rows)); 9737 9738 if (ccomm != MPI_COMM_NULL) { 9739 Mat_MPIAIJ *a, *b; 9740 IS from, to; 9741 Vec gvec; 9742 PetscInt lsize; 9743 9744 PetscCall(MatCreate(ccomm, B)); 9745 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9746 PetscCall(MatSetType(*B, MATAIJ)); 9747 PetscCall(PetscLayoutDestroy(&(*B)->rmap)); 9748 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9749 a = (Mat_MPIAIJ *)At->data; 9750 b = (Mat_MPIAIJ *)(*B)->data; 9751 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9752 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9753 PetscCall(PetscObjectReference((PetscObject)a->A)); 9754 PetscCall(PetscObjectReference((PetscObject)a->B)); 9755 b->A = a->A; 9756 b->B = a->B; 9757 9758 b->donotstash = a->donotstash; 9759 b->roworiented = a->roworiented; 9760 b->rowindices = NULL; 9761 b->rowvalues = NULL; 9762 b->getrowactive = PETSC_FALSE; 9763 9764 (*B)->rmap = rmap; 9765 (*B)->factortype = A->factortype; 9766 (*B)->assembled = PETSC_TRUE; 9767 (*B)->insertmode = NOT_SET_VALUES; 9768 (*B)->preallocated = PETSC_TRUE; 9769 9770 if (a->colmap) { 9771 #if defined(PETSC_USE_CTABLE) 9772 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9773 #else 9774 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9775 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9776 #endif 9777 } else b->colmap = NULL; 9778 if (a->garray) { 9779 PetscInt len; 9780 len = a->B->cmap->n; 9781 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9782 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9783 } else b->garray = NULL; 9784 9785 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9786 b->lvec = a->lvec; 9787 9788 /* cannot use VecScatterCopy */ 9789 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9790 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9791 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9792 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9793 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9794 PetscCall(ISDestroy(&from)); 9795 PetscCall(ISDestroy(&to)); 9796 PetscCall(VecDestroy(&gvec)); 9797 } 9798 PetscCall(MatDestroy(&At)); 9799 PetscFunctionReturn(PETSC_SUCCESS); 9800 } 9801 9802 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */ 9803 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA) 9804 { 9805 PetscBool isaij; 9806 MPI_Comm comm; 9807 9808 PetscFunctionBegin; 9809 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9810 PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, "")); 9811 PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented"); 9812 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij)); 9813 if (isaij) { /* SeqAIJ supports repeated rows */ 9814 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA)); 9815 } else { 9816 Mat A_loc; 9817 Mat_SeqAIJ *da; 9818 PetscSF sf; 9819 PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata; 9820 PetscScalar *daa; 9821 const PetscInt *idxs; 9822 const PetscSFNode *iremotes; 9823 PetscSFNode *remotes; 9824 9825 /* SF for incoming rows */ 9826 PetscCall(PetscSFCreate(comm, &sf)); 9827 PetscCall(ISGetLocalSize(rows, &ni)); 9828 PetscCall(ISGetIndices(rows, &idxs)); 9829 PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs)); 9830 PetscCall(ISRestoreIndices(rows, &idxs)); 9831 9832 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc)); 9833 da = (Mat_SeqAIJ *)A_loc->data; 9834 PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata)); 9835 for (PetscInt i = 0; i < m; i++) { 9836 rdata[2 * i + 0] = da->i[i + 1] - da->i[i]; 9837 rdata[2 * i + 1] = da->i[i]; 9838 } 9839 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9840 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9841 PetscCall(PetscMalloc1(ni + 1, &di)); 9842 di[0] = 0; 9843 for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0]; 9844 PetscCall(PetscMalloc1(di[ni], &dj)); 9845 PetscCall(PetscMalloc1(di[ni], &daa)); 9846 PetscCall(PetscMalloc1(di[ni], &remotes)); 9847 9848 PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes)); 9849 9850 /* SF graph for nonzeros */ 9851 c = 0; 9852 for (PetscInt i = 0; i < ni; i++) { 9853 const PetscInt rank = iremotes[i].rank; 9854 const PetscInt rsize = ldata[2 * i]; 9855 for (PetscInt j = 0; j < rsize; j++) { 9856 remotes[c].rank = rank; 9857 remotes[c].index = ldata[2 * i + 1] + j; 9858 c++; 9859 } 9860 } 9861 PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]); 9862 PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER)); 9863 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9864 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9865 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9866 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9867 9868 PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA)); 9869 PetscCall(MatDestroy(&A_loc)); 9870 PetscCall(PetscSFDestroy(&sf)); 9871 PetscCall(PetscFree(di)); 9872 PetscCall(PetscFree(dj)); 9873 PetscCall(PetscFree(daa)); 9874 PetscCall(PetscFree(remotes)); 9875 PetscCall(PetscFree2(ldata, rdata)); 9876 } 9877 PetscFunctionReturn(PETSC_SUCCESS); 9878 } 9879