1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) { 16 PetscScalar *uwork, *data, *U, ds = 0.; 17 PetscReal *sing; 18 PetscBLASInt bM, bN, lwork, lierr, di = 1; 19 PetscInt ulw, i, nr, nc, n; 20 #if defined(PETSC_USE_COMPLEX) 21 PetscReal *rwork2; 22 #endif 23 24 PetscFunctionBegin; 25 PetscCall(MatGetSize(A, &nr, &nc)); 26 if (!nr || !nc) PetscFunctionReturn(0); 27 28 /* workspace */ 29 if (!work) { 30 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 31 PetscCall(PetscMalloc1(ulw, &uwork)); 32 } else { 33 ulw = lw; 34 uwork = work; 35 } 36 n = PetscMin(nr, nc); 37 if (!rwork) { 38 PetscCall(PetscMalloc1(n, &sing)); 39 } else { 40 sing = rwork; 41 } 42 43 /* SVD */ 44 PetscCall(PetscMalloc1(nr * nr, &U)); 45 PetscCall(PetscBLASIntCast(nr, &bM)); 46 PetscCall(PetscBLASIntCast(nc, &bN)); 47 PetscCall(PetscBLASIntCast(ulw, &lwork)); 48 PetscCall(MatDenseGetArray(A, &data)); 49 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 50 #if !defined(PETSC_USE_COMPLEX) 51 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 52 #else 53 PetscCall(PetscMalloc1(5 * n, &rwork2)); 54 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 55 PetscCall(PetscFree(rwork2)); 56 #endif 57 PetscCall(PetscFPTrapPop()); 58 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 59 PetscCall(MatDenseRestoreArray(A, &data)); 60 for (i = 0; i < n; i++) 61 if (sing[i] < PETSC_SMALL) break; 62 if (!rwork) { PetscCall(PetscFree(sing)); } 63 if (!work) { PetscCall(PetscFree(uwork)); } 64 /* create B */ 65 if (!range) { 66 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 67 PetscCall(MatDenseGetArray(*B, &data)); 68 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 69 } else { 70 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 71 PetscCall(MatDenseGetArray(*B, &data)); 72 PetscCall(PetscArraycpy(data, U, i * nr)); 73 } 74 PetscCall(MatDenseRestoreArray(*B, &data)); 75 PetscCall(PetscFree(U)); 76 PetscFunctionReturn(0); 77 } 78 79 /* TODO REMOVE */ 80 #if defined(PRINT_GDET) 81 static int inc = 0; 82 static int lev = 0; 83 #endif 84 85 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) { 86 Mat GE, GEd; 87 PetscInt rsize, csize, esize; 88 PetscScalar *ptr; 89 90 PetscFunctionBegin; 91 PetscCall(ISGetSize(edge, &esize)); 92 if (!esize) PetscFunctionReturn(0); 93 PetscCall(ISGetSize(extrow, &rsize)); 94 PetscCall(ISGetSize(extcol, &csize)); 95 96 /* gradients */ 97 ptr = work + 5 * esize; 98 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 99 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 100 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 101 PetscCall(MatDestroy(&GE)); 102 103 /* constants */ 104 ptr += rsize * csize; 105 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 106 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 107 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 108 PetscCall(MatDestroy(&GE)); 109 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 110 PetscCall(MatDestroy(&GEd)); 111 112 if (corners) { 113 Mat GEc; 114 const PetscScalar *vals; 115 PetscScalar v; 116 117 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 118 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 119 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 120 /* v = PetscAbsScalar(vals[0]) */; 121 v = 1.; 122 cvals[0] = vals[0] / v; 123 cvals[1] = vals[1] / v; 124 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 125 PetscCall(MatScale(*GKins, 1. / v)); 126 #if defined(PRINT_GDET) 127 { 128 PetscViewer viewer; 129 char filename[256]; 130 sprintf(filename, "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++); 131 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 132 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 133 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 134 PetscCall(MatView(GEc, viewer)); 135 PetscCall(PetscObjectSetName((PetscObject)(*GKins), "GK")); 136 PetscCall(MatView(*GKins, viewer)); 137 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 138 PetscCall(MatView(GEd, viewer)); 139 PetscCall(PetscViewerDestroy(&viewer)); 140 } 141 #endif 142 PetscCall(MatDestroy(&GEd)); 143 PetscCall(MatDestroy(&GEc)); 144 } 145 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode PCBDDCNedelecSupport(PC pc) { 150 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 151 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 152 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 153 Vec tvec; 154 PetscSF sfv; 155 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 156 MPI_Comm comm; 157 IS lned, primals, allprimals, nedfieldlocal; 158 IS *eedges, *extrows, *extcols, *alleedges; 159 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 160 PetscScalar *vals, *work; 161 PetscReal *rwork; 162 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 163 PetscInt ne, nv, Lv, order, n, field; 164 PetscInt n_neigh, *neigh, *n_shared, **shared; 165 PetscInt i, j, extmem, cum, maxsize, nee; 166 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 167 PetscInt *sfvleaves, *sfvroots; 168 PetscInt *corners, *cedges; 169 PetscInt *ecount, **eneighs, *vcount, **vneighs; 170 PetscInt *emarks; 171 PetscBool print, eerr, done, lrc[2], conforming, global, singular, setprimal; 172 173 PetscFunctionBegin; 174 /* If the discrete gradient is defined for a subset of dofs and global is true, 175 it assumes G is given in global ordering for all the dofs. 176 Otherwise, the ordering is global for the Nedelec field */ 177 order = pcbddc->nedorder; 178 conforming = pcbddc->conforming; 179 field = pcbddc->nedfield; 180 global = pcbddc->nedglobal; 181 setprimal = PETSC_FALSE; 182 print = PETSC_FALSE; 183 singular = PETSC_FALSE; 184 185 /* Command line customization */ 186 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 187 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 188 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL)); 189 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 190 /* print debug info TODO: to be removed */ 191 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 192 PetscOptionsEnd(); 193 194 /* Return if there are no edges in the decomposition and the problem is not singular */ 195 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 196 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 197 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 198 if (!singular) { 199 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 200 lrc[0] = PETSC_FALSE; 201 for (i = 0; i < n; i++) { 202 if (PetscRealPart(vals[i]) > 2.) { 203 lrc[0] = PETSC_TRUE; 204 break; 205 } 206 } 207 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 208 PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 209 if (!lrc[1]) PetscFunctionReturn(0); 210 } 211 212 /* Get Nedelec field */ 213 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 214 if (pcbddc->n_ISForDofsLocal && field >= 0) { 215 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 216 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 217 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 218 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 219 ne = n; 220 nedfieldlocal = NULL; 221 global = PETSC_TRUE; 222 } else if (field == PETSC_DECIDE) { 223 PetscInt rst, ren, *idx; 224 225 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 226 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 227 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 228 for (i = rst; i < ren; i++) { 229 PetscInt nc; 230 231 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 232 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 233 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 234 } 235 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 236 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 237 PetscCall(PetscMalloc1(n, &idx)); 238 for (i = 0, ne = 0; i < n; i++) 239 if (matis->sf_leafdata[i]) idx[ne++] = i; 240 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 241 } else { 242 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 243 } 244 245 /* Sanity checks */ 246 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 247 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 248 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 249 250 /* Just set primal dofs and return */ 251 if (setprimal) { 252 IS enedfieldlocal; 253 PetscInt *eidxs; 254 255 PetscCall(PetscMalloc1(ne, &eidxs)); 256 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 257 if (nedfieldlocal) { 258 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 259 for (i = 0, cum = 0; i < ne; i++) { 260 if (PetscRealPart(vals[idxs[i]]) > 2.) { eidxs[cum++] = idxs[i]; } 261 } 262 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 263 } else { 264 for (i = 0, cum = 0; i < ne; i++) { 265 if (PetscRealPart(vals[i]) > 2.) { eidxs[cum++] = i; } 266 } 267 } 268 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 269 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 270 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 271 PetscCall(PetscFree(eidxs)); 272 PetscCall(ISDestroy(&nedfieldlocal)); 273 PetscCall(ISDestroy(&enedfieldlocal)); 274 PetscFunctionReturn(0); 275 } 276 277 /* Compute some l2g maps */ 278 if (nedfieldlocal) { 279 IS is; 280 281 /* need to map from the local Nedelec field to local numbering */ 282 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 283 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 284 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 285 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 286 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 287 if (global) { 288 PetscCall(PetscObjectReference((PetscObject)al2g)); 289 el2g = al2g; 290 } else { 291 IS gis; 292 293 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 294 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 295 PetscCall(ISDestroy(&gis)); 296 } 297 PetscCall(ISDestroy(&is)); 298 } else { 299 /* restore default */ 300 pcbddc->nedfield = -1; 301 /* one ref for the destruction of al2g, one for el2g */ 302 PetscCall(PetscObjectReference((PetscObject)al2g)); 303 PetscCall(PetscObjectReference((PetscObject)al2g)); 304 el2g = al2g; 305 fl2g = NULL; 306 } 307 308 /* Start communication to drop connections for interior edges (for cc analysis only) */ 309 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 310 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 311 if (nedfieldlocal) { 312 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 313 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 314 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 315 } else { 316 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 317 } 318 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 319 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 320 321 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 322 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 323 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 324 if (global) { 325 PetscInt rst; 326 327 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 328 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 329 if (matis->sf_rootdata[i] < 2) { matis->sf_rootdata[cum++] = i + rst; } 330 } 331 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 332 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 333 } else { 334 PetscInt *tbz; 335 336 PetscCall(PetscMalloc1(ne, &tbz)); 337 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 338 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 339 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 340 for (i = 0, cum = 0; i < ne; i++) 341 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 342 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 343 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 344 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 345 PetscCall(PetscFree(tbz)); 346 } 347 } else { /* we need the entire G to infer the nullspace */ 348 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 349 G = pcbddc->discretegradient; 350 } 351 352 /* Extract subdomain relevant rows of G */ 353 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 354 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 355 PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); 356 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 357 PetscCall(ISDestroy(&lned)); 358 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 359 PetscCall(MatDestroy(&lGall)); 360 PetscCall(MatISGetLocalMat(lGis, &lG)); 361 362 /* SF for nodal dofs communications */ 363 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 364 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 365 PetscCall(PetscObjectReference((PetscObject)vl2g)); 366 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 367 PetscCall(PetscSFCreate(comm, &sfv)); 368 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 369 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 370 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 371 i = singular ? 2 : 1; 372 PetscCall(PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots)); 373 374 /* Destroy temporary G created in MATIS format and modified G */ 375 PetscCall(PetscObjectReference((PetscObject)lG)); 376 PetscCall(MatDestroy(&lGis)); 377 PetscCall(MatDestroy(&G)); 378 379 if (print) { 380 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 381 PetscCall(MatView(lG, NULL)); 382 } 383 384 /* Save lG for values insertion in change of basis */ 385 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 386 387 /* Analyze the edge-nodes connections (duplicate lG) */ 388 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 389 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 390 PetscCall(PetscBTCreate(nv, &btv)); 391 PetscCall(PetscBTCreate(ne, &bte)); 392 PetscCall(PetscBTCreate(ne, &btb)); 393 PetscCall(PetscBTCreate(ne, &btbd)); 394 PetscCall(PetscBTCreate(nv, &btvcand)); 395 /* need to import the boundary specification to ensure the 396 proper detection of coarse edges' endpoints */ 397 if (pcbddc->DirichletBoundariesLocal) { 398 IS is; 399 400 if (fl2g) { 401 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 402 } else { 403 is = pcbddc->DirichletBoundariesLocal; 404 } 405 PetscCall(ISGetLocalSize(is, &cum)); 406 PetscCall(ISGetIndices(is, &idxs)); 407 for (i = 0; i < cum; i++) { 408 if (idxs[i] >= 0) { 409 PetscCall(PetscBTSet(btb, idxs[i])); 410 PetscCall(PetscBTSet(btbd, idxs[i])); 411 } 412 } 413 PetscCall(ISRestoreIndices(is, &idxs)); 414 if (fl2g) { PetscCall(ISDestroy(&is)); } 415 } 416 if (pcbddc->NeumannBoundariesLocal) { 417 IS is; 418 419 if (fl2g) { 420 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 421 } else { 422 is = pcbddc->NeumannBoundariesLocal; 423 } 424 PetscCall(ISGetLocalSize(is, &cum)); 425 PetscCall(ISGetIndices(is, &idxs)); 426 for (i = 0; i < cum; i++) { 427 if (idxs[i] >= 0) { PetscCall(PetscBTSet(btb, idxs[i])); } 428 } 429 PetscCall(ISRestoreIndices(is, &idxs)); 430 if (fl2g) { PetscCall(ISDestroy(&is)); } 431 } 432 433 /* Count neighs per dof */ 434 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs)); 435 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs)); 436 437 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 438 for proper detection of coarse edges' endpoints */ 439 PetscCall(PetscBTCreate(ne, &btee)); 440 for (i = 0; i < ne; i++) { 441 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) { PetscCall(PetscBTSet(btee, i)); } 442 } 443 PetscCall(PetscMalloc1(ne, &marks)); 444 if (!conforming) { 445 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 446 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 447 } 448 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 449 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 450 cum = 0; 451 for (i = 0; i < ne; i++) { 452 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 453 if (!PetscBTLookup(btee, i)) { 454 marks[cum++] = i; 455 continue; 456 } 457 /* set badly connected edge dofs as primal */ 458 if (!conforming) { 459 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 460 marks[cum++] = i; 461 PetscCall(PetscBTSet(bte, i)); 462 for (j = ii[i]; j < ii[i + 1]; j++) { PetscCall(PetscBTSet(btv, jj[j])); } 463 } else { 464 /* every edge dofs should be connected trough a certain number of nodal dofs 465 to other edge dofs belonging to coarse edges 466 - at most 2 endpoints 467 - order-1 interior nodal dofs 468 - no undefined nodal dofs (nconn < order) 469 */ 470 PetscInt ends = 0, ints = 0, undef = 0; 471 for (j = ii[i]; j < ii[i + 1]; j++) { 472 PetscInt v = jj[j], k; 473 PetscInt nconn = iit[v + 1] - iit[v]; 474 for (k = iit[v]; k < iit[v + 1]; k++) 475 if (!PetscBTLookup(btee, jjt[k])) nconn--; 476 if (nconn > order) ends++; 477 else if (nconn == order) ints++; 478 else undef++; 479 } 480 if (undef || ends > 2 || ints != order - 1) { 481 marks[cum++] = i; 482 PetscCall(PetscBTSet(bte, i)); 483 for (j = ii[i]; j < ii[i + 1]; j++) { PetscCall(PetscBTSet(btv, jj[j])); } 484 } 485 } 486 } 487 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 488 if (!order && ii[i + 1] != ii[i]) { 489 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 490 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 491 } 492 } 493 PetscCall(PetscBTDestroy(&btee)); 494 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 495 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 496 if (!conforming) { 497 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 498 PetscCall(MatDestroy(&lGt)); 499 } 500 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 501 502 /* identify splitpoints and corner candidates */ 503 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 504 if (print) { 505 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 506 PetscCall(MatView(lGe, NULL)); 507 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 508 PetscCall(MatView(lGt, NULL)); 509 } 510 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 511 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 512 for (i = 0; i < nv; i++) { 513 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 514 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 515 if (!order) { /* variable order */ 516 PetscReal vorder = 0.; 517 518 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 519 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 520 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 521 ord = 1; 522 } 523 PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord); 524 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 525 if (PetscBTLookup(btbd, jj[j])) { 526 bdir = PETSC_TRUE; 527 break; 528 } 529 if (vc != ecount[jj[j]]) { 530 sneighs = PETSC_FALSE; 531 } else { 532 PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]]; 533 for (k = 0; k < vc; k++) { 534 if (vn[k] != en[k]) { 535 sneighs = PETSC_FALSE; 536 break; 537 } 538 } 539 } 540 } 541 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 542 if (print) PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]); 543 PetscCall(PetscBTSet(btv, i)); 544 } else if (test == ord) { 545 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 546 if (print) PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i); 547 PetscCall(PetscBTSet(btv, i)); 548 } else { 549 if (print) PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i); 550 PetscCall(PetscBTSet(btvcand, i)); 551 } 552 } 553 } 554 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 555 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 556 PetscCall(PetscBTDestroy(&btbd)); 557 558 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 559 if (order != 1) { 560 if (print) PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"); 561 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 562 for (i = 0; i < nv; i++) { 563 if (PetscBTLookup(btvcand, i)) { 564 PetscBool found = PETSC_FALSE; 565 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 566 PetscInt k, e = jj[j]; 567 if (PetscBTLookup(bte, e)) continue; 568 for (k = iit[e]; k < iit[e + 1]; k++) { 569 PetscInt v = jjt[k]; 570 if (v != i && PetscBTLookup(btvcand, v)) { 571 found = PETSC_TRUE; 572 break; 573 } 574 } 575 } 576 if (!found) { 577 if (print) PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i); 578 PetscCall(PetscBTClear(btvcand, i)); 579 } else { 580 if (print) PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i); 581 } 582 } 583 } 584 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 585 } 586 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 587 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 588 PetscCall(MatDestroy(&lGe)); 589 590 /* Get the local G^T explicitly */ 591 PetscCall(MatDestroy(&lGt)); 592 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 593 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 594 595 /* Mark interior nodal dofs */ 596 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 597 PetscCall(PetscBTCreate(nv, &btvi)); 598 for (i = 1; i < n_neigh; i++) { 599 for (j = 0; j < n_shared[i]; j++) { PetscCall(PetscBTSet(btvi, shared[i][j])); } 600 } 601 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 602 603 /* communicate corners and splitpoints */ 604 PetscCall(PetscMalloc1(nv, &vmarks)); 605 PetscCall(PetscArrayzero(sfvleaves, nv)); 606 PetscCall(PetscArrayzero(sfvroots, Lv)); 607 for (i = 0; i < nv; i++) 608 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 609 610 if (print) { 611 IS tbz; 612 613 cum = 0; 614 for (i = 0; i < nv; i++) 615 if (sfvleaves[i]) vmarks[cum++] = i; 616 617 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 618 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 619 PetscCall(ISView(tbz, NULL)); 620 PetscCall(ISDestroy(&tbz)); 621 } 622 623 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 624 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 625 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 626 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 627 628 /* Zero rows of lGt corresponding to identified corners 629 and interior nodal dofs */ 630 cum = 0; 631 for (i = 0; i < nv; i++) { 632 if (sfvleaves[i]) { 633 vmarks[cum++] = i; 634 PetscCall(PetscBTSet(btv, i)); 635 } 636 if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 637 } 638 PetscCall(PetscBTDestroy(&btvi)); 639 if (print) { 640 IS tbz; 641 642 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 643 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 644 PetscCall(ISView(tbz, NULL)); 645 PetscCall(ISDestroy(&tbz)); 646 } 647 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 648 PetscCall(PetscFree(vmarks)); 649 PetscCall(PetscSFDestroy(&sfv)); 650 PetscCall(PetscFree2(sfvleaves, sfvroots)); 651 652 /* Recompute G */ 653 PetscCall(MatDestroy(&lG)); 654 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 655 if (print) { 656 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 657 PetscCall(MatView(lG, NULL)); 658 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 659 PetscCall(MatView(lGt, NULL)); 660 } 661 662 /* Get primal dofs (if any) */ 663 cum = 0; 664 for (i = 0; i < ne; i++) { 665 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 666 } 667 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 668 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 669 if (print) { 670 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 671 PetscCall(ISView(primals, NULL)); 672 } 673 PetscCall(PetscBTDestroy(&bte)); 674 /* TODO: what if the user passed in some of them ? */ 675 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 676 PetscCall(ISDestroy(&primals)); 677 678 /* Compute edge connectivity */ 679 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 680 681 /* Symbolic conn = lG*lGt */ 682 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 683 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 684 PetscCall(MatProductSetAlgorithm(conn, "default")); 685 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 686 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 687 PetscCall(MatProductSetFromOptions(conn)); 688 PetscCall(MatProductSymbolic(conn)); 689 690 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 691 if (fl2g) { 692 PetscBT btf; 693 PetscInt *iia, *jja, *iiu, *jju; 694 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 695 696 /* create CSR for all local dofs */ 697 PetscCall(PetscMalloc1(n + 1, &iia)); 698 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 699 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 700 iiu = pcbddc->mat_graph->xadj; 701 jju = pcbddc->mat_graph->adjncy; 702 } else if (pcbddc->use_local_adj) { 703 rest = PETSC_TRUE; 704 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 705 } else { 706 free = PETSC_TRUE; 707 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 708 iiu[0] = 0; 709 for (i = 0; i < n; i++) { 710 iiu[i + 1] = i + 1; 711 jju[i] = -1; 712 } 713 } 714 715 /* import sizes of CSR */ 716 iia[0] = 0; 717 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 718 719 /* overwrite entries corresponding to the Nedelec field */ 720 PetscCall(PetscBTCreate(n, &btf)); 721 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 722 for (i = 0; i < ne; i++) { 723 PetscCall(PetscBTSet(btf, idxs[i])); 724 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 725 } 726 727 /* iia in CSR */ 728 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 729 730 /* jja in CSR */ 731 PetscCall(PetscMalloc1(iia[n], &jja)); 732 for (i = 0; i < n; i++) 733 if (!PetscBTLookup(btf, i)) 734 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 735 736 /* map edge dofs connectivity */ 737 if (jj) { 738 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 739 for (i = 0; i < ne; i++) { 740 PetscInt e = idxs[i]; 741 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 742 } 743 } 744 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 745 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER)); 746 if (rest) { PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); } 747 if (free) PetscCall(PetscFree2(iiu, jju)); 748 PetscCall(PetscBTDestroy(&btf)); 749 } else { 750 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER)); 751 } 752 753 /* Analyze interface for edge dofs */ 754 PetscCall(PCBDDCAnalyzeInterface(pc)); 755 pcbddc->mat_graph->twodim = PETSC_FALSE; 756 757 /* Get coarse edges in the edge space */ 758 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 759 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 760 761 if (fl2g) { 762 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 763 PetscCall(PetscMalloc1(nee, &eedges)); 764 for (i = 0; i < nee; i++) { PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); } 765 } else { 766 eedges = alleedges; 767 primals = allprimals; 768 } 769 770 /* Mark fine edge dofs with their coarse edge id */ 771 PetscCall(PetscArrayzero(marks, ne)); 772 PetscCall(ISGetLocalSize(primals, &cum)); 773 PetscCall(ISGetIndices(primals, &idxs)); 774 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 775 PetscCall(ISRestoreIndices(primals, &idxs)); 776 if (print) { 777 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 778 PetscCall(ISView(primals, NULL)); 779 } 780 781 maxsize = 0; 782 for (i = 0; i < nee; i++) { 783 PetscInt size, mark = i + 1; 784 785 PetscCall(ISGetLocalSize(eedges[i], &size)); 786 PetscCall(ISGetIndices(eedges[i], &idxs)); 787 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 788 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 789 maxsize = PetscMax(maxsize, size); 790 } 791 792 /* Find coarse edge endpoints */ 793 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 794 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 795 for (i = 0; i < nee; i++) { 796 PetscInt mark = i + 1, size; 797 798 PetscCall(ISGetLocalSize(eedges[i], &size)); 799 if (!size && nedfieldlocal) continue; 800 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 801 PetscCall(ISGetIndices(eedges[i], &idxs)); 802 if (print) { 803 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 804 PetscCall(ISView(eedges[i], NULL)); 805 } 806 for (j = 0; j < size; j++) { 807 PetscInt k, ee = idxs[j]; 808 if (print) PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee); 809 for (k = ii[ee]; k < ii[ee + 1]; k++) { 810 if (print) PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k]); 811 if (PetscBTLookup(btv, jj[k])) { 812 if (print) PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k]); 813 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 814 PetscInt k2; 815 PetscBool corner = PETSC_FALSE; 816 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 817 if (print) PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])); 818 /* it's a corner if either is connected with an edge dof belonging to a different cc or 819 if the edge dof lie on the natural part of the boundary */ 820 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 821 corner = PETSC_TRUE; 822 break; 823 } 824 } 825 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 826 if (print) PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k]); 827 PetscCall(PetscBTSet(btv, jj[k])); 828 } else { 829 if (print) PetscPrintf(PETSC_COMM_SELF, " no corners found\n"); 830 } 831 } 832 } 833 } 834 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 835 } 836 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 837 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 838 PetscCall(PetscBTDestroy(&btb)); 839 840 /* Reset marked primal dofs */ 841 PetscCall(ISGetLocalSize(primals, &cum)); 842 PetscCall(ISGetIndices(primals, &idxs)); 843 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 844 PetscCall(ISRestoreIndices(primals, &idxs)); 845 846 /* Now use the initial lG */ 847 PetscCall(MatDestroy(&lG)); 848 PetscCall(MatDestroy(&lGt)); 849 lG = lGinit; 850 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 851 852 /* Compute extended cols indices */ 853 PetscCall(PetscBTCreate(nv, &btvc)); 854 PetscCall(PetscBTCreate(nee, &bter)); 855 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 856 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 857 i *= maxsize; 858 PetscCall(PetscCalloc1(nee, &extcols)); 859 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 860 eerr = PETSC_FALSE; 861 for (i = 0; i < nee; i++) { 862 PetscInt size, found = 0; 863 864 cum = 0; 865 PetscCall(ISGetLocalSize(eedges[i], &size)); 866 if (!size && nedfieldlocal) continue; 867 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 868 PetscCall(ISGetIndices(eedges[i], &idxs)); 869 PetscCall(PetscBTMemzero(nv, btvc)); 870 for (j = 0; j < size; j++) { 871 PetscInt k, ee = idxs[j]; 872 for (k = ii[ee]; k < ii[ee + 1]; k++) { 873 PetscInt vv = jj[k]; 874 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 875 else if (!PetscBTLookupSet(btvc, vv)) found++; 876 } 877 } 878 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 879 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 880 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 881 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 882 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 883 /* it may happen that endpoints are not defined at this point 884 if it is the case, mark this edge for a second pass */ 885 if (cum != size - 1 || found != 2) { 886 PetscCall(PetscBTSet(bter, i)); 887 if (print) { 888 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 889 PetscCall(ISView(eedges[i], NULL)); 890 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 891 PetscCall(ISView(extcols[i], NULL)); 892 } 893 eerr = PETSC_TRUE; 894 } 895 } 896 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 897 PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 898 if (done) { 899 PetscInt *newprimals; 900 901 PetscCall(PetscMalloc1(ne, &newprimals)); 902 PetscCall(ISGetLocalSize(primals, &cum)); 903 PetscCall(ISGetIndices(primals, &idxs)); 904 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 905 PetscCall(ISRestoreIndices(primals, &idxs)); 906 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 907 if (print) PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]); 908 for (i = 0; i < nee; i++) { 909 PetscBool has_candidates = PETSC_FALSE; 910 if (PetscBTLookup(bter, i)) { 911 PetscInt size, mark = i + 1; 912 913 PetscCall(ISGetLocalSize(eedges[i], &size)); 914 PetscCall(ISGetIndices(eedges[i], &idxs)); 915 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 916 for (j = 0; j < size; j++) { 917 PetscInt k, ee = idxs[j]; 918 if (print) PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]); 919 for (k = ii[ee]; k < ii[ee + 1]; k++) { 920 /* set all candidates located on the edge as corners */ 921 if (PetscBTLookup(btvcand, jj[k])) { 922 PetscInt k2, vv = jj[k]; 923 has_candidates = PETSC_TRUE; 924 if (print) PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv); 925 PetscCall(PetscBTSet(btv, vv)); 926 /* set all edge dofs connected to candidate as primals */ 927 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 928 if (marks[jjt[k2]] == mark) { 929 PetscInt k3, ee2 = jjt[k2]; 930 if (print) PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2); 931 newprimals[cum++] = ee2; 932 /* finally set the new corners */ 933 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 934 if (print) PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]); 935 PetscCall(PetscBTSet(btv, jj[k3])); 936 } 937 } 938 } 939 } else { 940 if (print) PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k]); 941 } 942 } 943 } 944 if (!has_candidates) { /* circular edge */ 945 PetscInt k, ee = idxs[0], *tmarks; 946 947 PetscCall(PetscCalloc1(ne, &tmarks)); 948 if (print) PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i); 949 for (k = ii[ee]; k < ii[ee + 1]; k++) { 950 PetscInt k2; 951 if (print) PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k]); 952 PetscCall(PetscBTSet(btv, jj[k])); 953 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 954 } 955 for (j = 0; j < size; j++) { 956 if (tmarks[idxs[j]] > 1) { 957 if (print) PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]); 958 newprimals[cum++] = idxs[j]; 959 } 960 } 961 PetscCall(PetscFree(tmarks)); 962 } 963 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 964 } 965 PetscCall(ISDestroy(&extcols[i])); 966 } 967 PetscCall(PetscFree(extcols)); 968 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 969 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 970 if (fl2g) { 971 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 972 PetscCall(ISDestroy(&primals)); 973 for (i = 0; i < nee; i++) { PetscCall(ISDestroy(&eedges[i])); } 974 PetscCall(PetscFree(eedges)); 975 } 976 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 977 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 978 PetscCall(PetscFree(newprimals)); 979 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 980 PetscCall(ISDestroy(&primals)); 981 PetscCall(PCBDDCAnalyzeInterface(pc)); 982 pcbddc->mat_graph->twodim = PETSC_FALSE; 983 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 984 if (fl2g) { 985 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 986 PetscCall(PetscMalloc1(nee, &eedges)); 987 for (i = 0; i < nee; i++) { PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); } 988 } else { 989 eedges = alleedges; 990 primals = allprimals; 991 } 992 PetscCall(PetscCalloc1(nee, &extcols)); 993 994 /* Mark again */ 995 PetscCall(PetscArrayzero(marks, ne)); 996 for (i = 0; i < nee; i++) { 997 PetscInt size, mark = i + 1; 998 999 PetscCall(ISGetLocalSize(eedges[i], &size)); 1000 PetscCall(ISGetIndices(eedges[i], &idxs)); 1001 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1002 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1003 } 1004 if (print) { 1005 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1006 PetscCall(ISView(primals, NULL)); 1007 } 1008 1009 /* Recompute extended cols */ 1010 eerr = PETSC_FALSE; 1011 for (i = 0; i < nee; i++) { 1012 PetscInt size; 1013 1014 cum = 0; 1015 PetscCall(ISGetLocalSize(eedges[i], &size)); 1016 if (!size && nedfieldlocal) continue; 1017 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1018 PetscCall(ISGetIndices(eedges[i], &idxs)); 1019 for (j = 0; j < size; j++) { 1020 PetscInt k, ee = idxs[j]; 1021 for (k = ii[ee]; k < ii[ee + 1]; k++) 1022 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1023 } 1024 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1025 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1026 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1027 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1028 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1029 if (cum != size - 1) { 1030 if (print) { 1031 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1032 PetscCall(ISView(eedges[i], NULL)); 1033 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1034 PetscCall(ISView(extcols[i], NULL)); 1035 } 1036 eerr = PETSC_TRUE; 1037 } 1038 } 1039 } 1040 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1041 PetscCall(PetscFree2(extrow, gidxs)); 1042 PetscCall(PetscBTDestroy(&bter)); 1043 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1044 /* an error should not occur at this point */ 1045 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1046 1047 /* Check the number of endpoints */ 1048 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1049 PetscCall(PetscMalloc1(2 * nee, &corners)); 1050 PetscCall(PetscMalloc1(nee, &cedges)); 1051 for (i = 0; i < nee; i++) { 1052 PetscInt size, found = 0, gc[2]; 1053 1054 /* init with defaults */ 1055 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1056 PetscCall(ISGetLocalSize(eedges[i], &size)); 1057 if (!size && nedfieldlocal) continue; 1058 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1059 PetscCall(ISGetIndices(eedges[i], &idxs)); 1060 PetscCall(PetscBTMemzero(nv, btvc)); 1061 for (j = 0; j < size; j++) { 1062 PetscInt k, ee = idxs[j]; 1063 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1064 PetscInt vv = jj[k]; 1065 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1066 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more then two corners for edge %" PetscInt_FMT, i); 1067 corners[i * 2 + found++] = vv; 1068 } 1069 } 1070 } 1071 if (found != 2) { 1072 PetscInt e; 1073 if (fl2g) { 1074 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1075 } else { 1076 e = idxs[0]; 1077 } 1078 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1079 } 1080 1081 /* get primal dof index on this coarse edge */ 1082 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1083 if (gc[0] > gc[1]) { 1084 PetscInt swap = corners[2 * i]; 1085 corners[2 * i] = corners[2 * i + 1]; 1086 corners[2 * i + 1] = swap; 1087 } 1088 cedges[i] = idxs[size - 1]; 1089 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1090 if (print) PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]); 1091 } 1092 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1093 PetscCall(PetscBTDestroy(&btvc)); 1094 1095 if (PetscDefined(USE_DEBUG)) { 1096 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1097 not interfere with neighbouring coarse edges */ 1098 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1099 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1100 for (i = 0; i < nv; i++) { 1101 PetscInt emax = 0, eemax = 0; 1102 1103 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1104 PetscCall(PetscArrayzero(emarks, nee + 1)); 1105 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1106 for (j = 1; j < nee + 1; j++) { 1107 if (emax < emarks[j]) { 1108 emax = emarks[j]; 1109 eemax = j; 1110 } 1111 } 1112 /* not relevant for edges */ 1113 if (!eemax) continue; 1114 1115 for (j = ii[i]; j < ii[i + 1]; j++) { 1116 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1117 } 1118 } 1119 PetscCall(PetscFree(emarks)); 1120 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1121 } 1122 1123 /* Compute extended rows indices for edge blocks of the change of basis */ 1124 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1125 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1126 extmem *= maxsize; 1127 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1128 PetscCall(PetscMalloc1(nee, &extrows)); 1129 PetscCall(PetscCalloc1(nee, &extrowcum)); 1130 for (i = 0; i < nv; i++) { 1131 PetscInt mark = 0, size, start; 1132 1133 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1134 for (j = ii[i]; j < ii[i + 1]; j++) 1135 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1136 1137 /* not relevant */ 1138 if (!mark) continue; 1139 1140 /* import extended row */ 1141 mark--; 1142 start = mark * extmem + extrowcum[mark]; 1143 size = ii[i + 1] - ii[i]; 1144 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1145 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1146 extrowcum[mark] += size; 1147 } 1148 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1149 PetscCall(MatDestroy(&lGt)); 1150 PetscCall(PetscFree(marks)); 1151 1152 /* Compress extrows */ 1153 cum = 0; 1154 for (i = 0; i < nee; i++) { 1155 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1156 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1157 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1158 cum = PetscMax(cum, size); 1159 } 1160 PetscCall(PetscFree(extrowcum)); 1161 PetscCall(PetscBTDestroy(&btv)); 1162 PetscCall(PetscBTDestroy(&btvcand)); 1163 1164 /* Workspace for lapack inner calls and VecSetValues */ 1165 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1166 1167 /* Create change of basis matrix (preallocation can be improved) */ 1168 PetscCall(MatCreate(comm, &T)); 1169 PetscCall(MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N)); 1170 PetscCall(MatSetType(T, MATAIJ)); 1171 PetscCall(MatSeqAIJSetPreallocation(T, 10, NULL)); 1172 PetscCall(MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL)); 1173 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1174 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1175 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1176 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1177 1178 /* Defaults to identity */ 1179 PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL)); 1180 PetscCall(VecSet(tvec, 1.0)); 1181 PetscCall(MatDiagonalSet(T, tvec, INSERT_VALUES)); 1182 PetscCall(VecDestroy(&tvec)); 1183 1184 /* Create discrete gradient for the coarser level if needed */ 1185 PetscCall(MatDestroy(&pcbddc->nedcG)); 1186 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1187 if (pcbddc->current_level < pcbddc->max_levels) { 1188 ISLocalToGlobalMapping cel2g, cvl2g; 1189 IS wis, gwis; 1190 PetscInt cnv, cne; 1191 1192 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1193 if (fl2g) { 1194 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1195 } else { 1196 PetscCall(PetscObjectReference((PetscObject)wis)); 1197 pcbddc->nedclocal = wis; 1198 } 1199 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1200 PetscCall(ISDestroy(&wis)); 1201 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1202 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1203 PetscCall(ISDestroy(&wis)); 1204 PetscCall(ISDestroy(&gwis)); 1205 1206 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1207 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1208 PetscCall(ISDestroy(&wis)); 1209 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1210 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1211 PetscCall(ISDestroy(&wis)); 1212 PetscCall(ISDestroy(&gwis)); 1213 1214 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1215 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1216 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1217 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1218 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1219 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1220 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1221 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1222 } 1223 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1224 1225 #if defined(PRINT_GDET) 1226 inc = 0; 1227 lev = pcbddc->current_level; 1228 #endif 1229 1230 /* Insert values in the change of basis matrix */ 1231 for (i = 0; i < nee; i++) { 1232 Mat Gins = NULL, GKins = NULL; 1233 IS cornersis = NULL; 1234 PetscScalar cvals[2]; 1235 1236 if (pcbddc->nedcG) { PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); } 1237 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1238 if (Gins && GKins) { 1239 const PetscScalar *data; 1240 const PetscInt *rows, *cols; 1241 PetscInt nrh, nch, nrc, ncc; 1242 1243 PetscCall(ISGetIndices(eedges[i], &cols)); 1244 /* H1 */ 1245 PetscCall(ISGetIndices(extrows[i], &rows)); 1246 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1247 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1248 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1249 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1250 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1251 /* complement */ 1252 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1253 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1254 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1255 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1256 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1257 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1258 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1259 1260 /* coarse discrete gradient */ 1261 if (pcbddc->nedcG) { 1262 PetscInt cols[2]; 1263 1264 cols[0] = 2 * i; 1265 cols[1] = 2 * i + 1; 1266 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1267 } 1268 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1269 } 1270 PetscCall(ISDestroy(&extrows[i])); 1271 PetscCall(ISDestroy(&extcols[i])); 1272 PetscCall(ISDestroy(&cornersis)); 1273 PetscCall(MatDestroy(&Gins)); 1274 PetscCall(MatDestroy(&GKins)); 1275 } 1276 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1277 1278 /* Start assembling */ 1279 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1280 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1281 1282 /* Free */ 1283 if (fl2g) { 1284 PetscCall(ISDestroy(&primals)); 1285 for (i = 0; i < nee; i++) { PetscCall(ISDestroy(&eedges[i])); } 1286 PetscCall(PetscFree(eedges)); 1287 } 1288 1289 /* hack mat_graph with primal dofs on the coarse edges */ 1290 { 1291 PCBDDCGraph graph = pcbddc->mat_graph; 1292 PetscInt *oqueue = graph->queue; 1293 PetscInt *ocptr = graph->cptr; 1294 PetscInt ncc, *idxs; 1295 1296 /* find first primal edge */ 1297 if (pcbddc->nedclocal) { 1298 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1299 } else { 1300 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1301 idxs = cedges; 1302 } 1303 cum = 0; 1304 while (cum < nee && cedges[cum] < 0) cum++; 1305 1306 /* adapt connected components */ 1307 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1308 graph->cptr[0] = 0; 1309 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1310 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1311 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1312 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1313 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1314 ncc++; 1315 lc--; 1316 cum++; 1317 while (cum < nee && cedges[cum] < 0) cum++; 1318 } 1319 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1320 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1321 ncc++; 1322 } 1323 graph->ncc = ncc; 1324 if (pcbddc->nedclocal) { PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); } 1325 PetscCall(PetscFree2(ocptr, oqueue)); 1326 } 1327 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1328 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1329 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1330 PetscCall(MatDestroy(&conn)); 1331 1332 PetscCall(ISDestroy(&nedfieldlocal)); 1333 PetscCall(PetscFree(extrow)); 1334 PetscCall(PetscFree2(work, rwork)); 1335 PetscCall(PetscFree(corners)); 1336 PetscCall(PetscFree(cedges)); 1337 PetscCall(PetscFree(extrows)); 1338 PetscCall(PetscFree(extcols)); 1339 PetscCall(MatDestroy(&lG)); 1340 1341 /* Complete assembling */ 1342 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1343 if (pcbddc->nedcG) { 1344 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1345 #if 0 1346 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1347 PetscCall(MatView(pcbddc->nedcG,NULL)); 1348 #endif 1349 } 1350 1351 /* set change of basis */ 1352 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, singular)); 1353 PetscCall(MatDestroy(&T)); 1354 1355 PetscFunctionReturn(0); 1356 } 1357 1358 /* the near-null space of BDDC carries information on quadrature weights, 1359 and these can be collinear -> so cheat with MatNullSpaceCreate 1360 and create a suitable set of basis vectors first */ 1361 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) { 1362 PetscInt i; 1363 1364 PetscFunctionBegin; 1365 for (i = 0; i < nvecs; i++) { 1366 PetscInt first, last; 1367 1368 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1369 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1370 if (i >= first && i < last) { 1371 PetscScalar *data; 1372 PetscCall(VecGetArray(quad_vecs[i], &data)); 1373 if (!has_const) { 1374 data[i - first] = 1.; 1375 } else { 1376 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1377 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1378 } 1379 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1380 } 1381 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1382 } 1383 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1384 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1385 PetscInt first, last; 1386 PetscCall(VecLockReadPop(quad_vecs[i])); 1387 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1388 if (i >= first && i < last) { 1389 PetscScalar *data; 1390 PetscCall(VecGetArray(quad_vecs[i], &data)); 1391 if (!has_const) { 1392 data[i - first] = 0.; 1393 } else { 1394 data[2 * i - first] = 0.; 1395 data[2 * i - first + 1] = 0.; 1396 } 1397 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1398 } 1399 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1400 PetscCall(VecLockReadPush(quad_vecs[i])); 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) { 1406 Mat loc_divudotp; 1407 Vec p, v, vins, quad_vec, *quad_vecs; 1408 ISLocalToGlobalMapping map; 1409 PetscScalar *vals; 1410 const PetscScalar *array; 1411 PetscInt i, maxneighs = 0, maxsize, *gidxs; 1412 PetscInt n_neigh, *neigh, *n_shared, **shared; 1413 PetscMPIInt rank; 1414 1415 PetscFunctionBegin; 1416 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1417 for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs); 1418 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A))); 1419 if (!maxneighs) { 1420 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1421 *nnsp = NULL; 1422 PetscFunctionReturn(0); 1423 } 1424 maxsize = 0; 1425 for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize); 1426 PetscCall(PetscMalloc2(maxsize, &gidxs, maxsize, &vals)); 1427 /* create vectors to hold quadrature weights */ 1428 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1429 if (!transpose) { 1430 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1431 } else { 1432 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1433 } 1434 PetscCall(VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs)); 1435 PetscCall(VecDestroy(&quad_vec)); 1436 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp)); 1437 for (i = 0; i < maxneighs; i++) { PetscCall(VecLockReadPop(quad_vecs[i])); } 1438 1439 /* compute local quad vec */ 1440 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1441 if (!transpose) { 1442 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1443 } else { 1444 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1445 } 1446 PetscCall(VecSet(p, 1.)); 1447 if (!transpose) { 1448 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1449 } else { 1450 PetscCall(MatMult(loc_divudotp, p, v)); 1451 } 1452 if (vl2l) { 1453 Mat lA; 1454 VecScatter sc; 1455 1456 PetscCall(MatISGetLocalMat(A, &lA)); 1457 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1458 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1459 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1460 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1461 PetscCall(VecScatterDestroy(&sc)); 1462 } else { 1463 vins = v; 1464 } 1465 PetscCall(VecGetArrayRead(vins, &array)); 1466 PetscCall(VecDestroy(&p)); 1467 1468 /* insert in global quadrature vecs */ 1469 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1470 for (i = 1; i < n_neigh; i++) { 1471 const PetscInt *idxs; 1472 PetscInt idx, nn, j; 1473 1474 idxs = shared[i]; 1475 nn = n_shared[i]; 1476 for (j = 0; j < nn; j++) vals[j] = array[idxs[j]]; 1477 PetscCall(PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx)); 1478 idx = -(idx + 1); 1479 PetscCheck(idx >= 0 && idx < maxneighs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")", idx, maxneighs); 1480 PetscCall(ISLocalToGlobalMappingApply(map, nn, idxs, gidxs)); 1481 PetscCall(VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES)); 1482 } 1483 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1484 PetscCall(VecRestoreArrayRead(vins, &array)); 1485 if (vl2l) { PetscCall(VecDestroy(&vins)); } 1486 PetscCall(VecDestroy(&v)); 1487 PetscCall(PetscFree2(gidxs, vals)); 1488 1489 /* assemble near null space */ 1490 for (i = 0; i < maxneighs; i++) { PetscCall(VecAssemblyBegin(quad_vecs[i])); } 1491 for (i = 0; i < maxneighs; i++) { 1492 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1493 PetscCall(VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view")); 1494 PetscCall(VecLockReadPush(quad_vecs[i])); 1495 } 1496 PetscCall(VecDestroyVecs(maxneighs, &quad_vecs)); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) { 1501 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1502 1503 PetscFunctionBegin; 1504 if (primalv) { 1505 if (pcbddc->user_primal_vertices_local) { 1506 IS list[2], newp; 1507 1508 list[0] = primalv; 1509 list[1] = pcbddc->user_primal_vertices_local; 1510 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1511 PetscCall(ISSortRemoveDups(newp)); 1512 PetscCall(ISDestroy(&list[1])); 1513 pcbddc->user_primal_vertices_local = newp; 1514 } else { 1515 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1516 } 1517 } 1518 PetscFunctionReturn(0); 1519 } 1520 1521 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) { 1522 PetscInt f, *comp = (PetscInt *)ctx; 1523 1524 PetscFunctionBegin; 1525 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1526 PetscFunctionReturn(0); 1527 } 1528 1529 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) { 1530 Vec local, global; 1531 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1532 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1533 PetscBool monolithic = PETSC_FALSE; 1534 1535 PetscFunctionBegin; 1536 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1537 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1538 PetscOptionsEnd(); 1539 /* need to convert from global to local topology information and remove references to information in global ordering */ 1540 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1541 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1542 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1543 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1544 if (monolithic) { /* just get block size to properly compute vertices */ 1545 if (pcbddc->vertex_size == 1) { PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); } 1546 goto boundary; 1547 } 1548 1549 if (pcbddc->user_provided_isfordofs) { 1550 if (pcbddc->n_ISForDofs) { 1551 PetscInt i; 1552 1553 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1554 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1555 PetscInt bs; 1556 1557 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1558 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1559 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1560 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1561 } 1562 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1563 pcbddc->n_ISForDofs = 0; 1564 PetscCall(PetscFree(pcbddc->ISForDofs)); 1565 } 1566 } else { 1567 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1568 DM dm; 1569 1570 PetscCall(MatGetDM(pc->pmat, &dm)); 1571 if (!dm) { PetscCall(PCGetDM(pc, &dm)); } 1572 if (dm) { 1573 IS *fields; 1574 PetscInt nf, i; 1575 1576 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1577 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1578 for (i = 0; i < nf; i++) { 1579 PetscInt bs; 1580 1581 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1582 PetscCall(ISGetBlockSize(fields[i], &bs)); 1583 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1584 PetscCall(ISDestroy(&fields[i])); 1585 } 1586 PetscCall(PetscFree(fields)); 1587 pcbddc->n_ISForDofsLocal = nf; 1588 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1589 PetscContainer c; 1590 1591 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1592 if (c) { 1593 MatISLocalFields lf; 1594 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1595 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1596 } else { /* fallback, create the default fields if bs > 1 */ 1597 PetscInt i, n = matis->A->rmap->n; 1598 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1599 if (i > 1) { 1600 pcbddc->n_ISForDofsLocal = i; 1601 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1602 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) { PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); } 1603 } 1604 } 1605 } 1606 } else { 1607 PetscInt i; 1608 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) { PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); } 1609 } 1610 } 1611 1612 boundary: 1613 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1614 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1615 } else if (pcbddc->DirichletBoundariesLocal) { 1616 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1617 } 1618 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1619 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1620 } else if (pcbddc->NeumannBoundariesLocal) { 1621 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1622 } 1623 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); } 1624 PetscCall(VecDestroy(&global)); 1625 PetscCall(VecDestroy(&local)); 1626 /* detect local disconnected subdomains if requested (use matis->A) */ 1627 if (pcbddc->detect_disconnected) { 1628 IS primalv = NULL; 1629 PetscInt i; 1630 PetscBool filter = pcbddc->detect_disconnected_filter; 1631 1632 for (i = 0; i < pcbddc->n_local_subs; i++) { PetscCall(ISDestroy(&pcbddc->local_subs[i])); } 1633 PetscCall(PetscFree(pcbddc->local_subs)); 1634 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1635 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1636 PetscCall(ISDestroy(&primalv)); 1637 } 1638 /* early stage corner detection */ 1639 { 1640 DM dm; 1641 1642 PetscCall(MatGetDM(pc->pmat, &dm)); 1643 if (!dm) { PetscCall(PCGetDM(pc, &dm)); } 1644 if (dm) { 1645 PetscBool isda; 1646 1647 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1648 if (isda) { 1649 ISLocalToGlobalMapping l2l; 1650 IS corners; 1651 Mat lA; 1652 PetscBool gl, lo; 1653 1654 { 1655 Vec cvec; 1656 const PetscScalar *coords; 1657 PetscInt dof, n, cdim; 1658 PetscBool memc = PETSC_TRUE; 1659 1660 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1661 PetscCall(DMGetCoordinates(dm, &cvec)); 1662 PetscCall(VecGetLocalSize(cvec, &n)); 1663 PetscCall(VecGetBlockSize(cvec, &cdim)); 1664 n /= cdim; 1665 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1666 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1667 PetscCall(VecGetArrayRead(cvec, &coords)); 1668 #if defined(PETSC_USE_COMPLEX) 1669 memc = PETSC_FALSE; 1670 #endif 1671 if (dof != 1) memc = PETSC_FALSE; 1672 if (memc) { 1673 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1674 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1675 PetscReal *bcoords = pcbddc->mat_graph->coords; 1676 PetscInt i, b, d; 1677 1678 for (i = 0; i < n; i++) { 1679 for (b = 0; b < dof; b++) { 1680 for (d = 0; d < cdim; d++) { bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); } 1681 } 1682 } 1683 } 1684 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1685 pcbddc->mat_graph->cdim = cdim; 1686 pcbddc->mat_graph->cnloc = dof * n; 1687 pcbddc->mat_graph->cloc = PETSC_FALSE; 1688 } 1689 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1690 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1691 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1692 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1693 lo = (PetscBool)(l2l && corners); 1694 PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1695 if (gl) { /* From PETSc's DMDA */ 1696 const PetscInt *idx; 1697 PetscInt dof, bs, *idxout, n; 1698 1699 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1700 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1701 PetscCall(ISGetLocalSize(corners, &n)); 1702 PetscCall(ISGetIndices(corners, &idx)); 1703 if (bs == dof) { 1704 PetscCall(PetscMalloc1(n, &idxout)); 1705 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1706 } else { /* the original DMDA local-to-local map have been modified */ 1707 PetscInt i, d; 1708 1709 PetscCall(PetscMalloc1(dof * n, &idxout)); 1710 for (i = 0; i < n; i++) 1711 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1712 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1713 1714 bs = 1; 1715 n *= dof; 1716 } 1717 PetscCall(ISRestoreIndices(corners, &idx)); 1718 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1719 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1720 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1721 PetscCall(ISDestroy(&corners)); 1722 pcbddc->corner_selected = PETSC_TRUE; 1723 pcbddc->corner_selection = PETSC_TRUE; 1724 } 1725 if (corners) { PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); } 1726 } 1727 } 1728 } 1729 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1730 DM dm; 1731 1732 PetscCall(MatGetDM(pc->pmat, &dm)); 1733 if (!dm) { PetscCall(PCGetDM(pc, &dm)); } 1734 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1735 Vec vcoords; 1736 PetscSection section; 1737 PetscReal *coords; 1738 PetscInt d, cdim, nl, nf, **ctxs; 1739 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1740 /* debug coordinates */ 1741 PetscViewer viewer; 1742 PetscBool flg; 1743 PetscViewerFormat format; 1744 const char *prefix; 1745 1746 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1747 PetscCall(DMGetLocalSection(dm, §ion)); 1748 PetscCall(PetscSectionGetNumFields(section, &nf)); 1749 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1750 PetscCall(VecGetLocalSize(vcoords, &nl)); 1751 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1752 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1753 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1754 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1755 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1756 1757 /* debug coordinates */ 1758 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1759 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1760 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1761 for (d = 0; d < cdim; d++) { 1762 PetscInt i; 1763 const PetscScalar *v; 1764 char name[16]; 1765 1766 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1767 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d)); 1768 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1769 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1770 if (flg) PetscCall(VecView(vcoords, viewer)); 1771 PetscCall(VecGetArrayRead(vcoords, &v)); 1772 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1773 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1774 } 1775 PetscCall(VecDestroy(&vcoords)); 1776 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1777 PetscCall(PetscFree(coords)); 1778 PetscCall(PetscFree(ctxs[0])); 1779 PetscCall(PetscFree2(funcs, ctxs)); 1780 if (flg) { 1781 PetscCall(PetscViewerPopFormat(viewer)); 1782 PetscCall(PetscViewerDestroy(&viewer)); 1783 } 1784 } 1785 } 1786 PetscFunctionReturn(0); 1787 } 1788 1789 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) { 1790 Mat_IS *matis = (Mat_IS *)(pc->pmat->data); 1791 IS nis; 1792 const PetscInt *idxs; 1793 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1794 1795 PetscFunctionBegin; 1796 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1797 if (mop == MPI_LAND) { 1798 /* init rootdata with true */ 1799 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1800 } else { 1801 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1802 } 1803 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1804 PetscCall(ISGetLocalSize(*is, &nd)); 1805 PetscCall(ISGetIndices(*is, &idxs)); 1806 for (i = 0; i < nd; i++) 1807 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1808 PetscCall(ISRestoreIndices(*is, &idxs)); 1809 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1810 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1811 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1812 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1813 if (mop == MPI_LAND) { 1814 PetscCall(PetscMalloc1(nd, &nidxs)); 1815 } else { 1816 PetscCall(PetscMalloc1(n, &nidxs)); 1817 } 1818 for (i = 0, nnd = 0; i < n; i++) 1819 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 1820 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 1821 PetscCall(ISDestroy(is)); 1822 *is = nis; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) { 1827 PC_IS *pcis = (PC_IS *)(pc->data); 1828 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 1829 1830 PetscFunctionBegin; 1831 if (!pcbddc->benign_have_null) { PetscFunctionReturn(0); } 1832 if (pcbddc->ChangeOfBasisMatrix) { 1833 Vec swap; 1834 1835 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 1836 swap = pcbddc->work_change; 1837 pcbddc->work_change = r; 1838 r = swap; 1839 } 1840 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1841 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1842 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1843 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 1844 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1845 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 1846 PetscCall(VecSet(z, 0.)); 1847 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1848 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1849 if (pcbddc->ChangeOfBasisMatrix) { 1850 pcbddc->work_change = r; 1851 PetscCall(VecCopy(z, pcbddc->work_change)); 1852 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 1853 } 1854 PetscFunctionReturn(0); 1855 } 1856 1857 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) { 1858 PCBDDCBenignMatMult_ctx ctx; 1859 PetscBool apply_right, apply_left, reset_x; 1860 1861 PetscFunctionBegin; 1862 PetscCall(MatShellGetContext(A, &ctx)); 1863 if (transpose) { 1864 apply_right = ctx->apply_left; 1865 apply_left = ctx->apply_right; 1866 } else { 1867 apply_right = ctx->apply_right; 1868 apply_left = ctx->apply_left; 1869 } 1870 reset_x = PETSC_FALSE; 1871 if (apply_right) { 1872 const PetscScalar *ax; 1873 PetscInt nl, i; 1874 1875 PetscCall(VecGetLocalSize(x, &nl)); 1876 PetscCall(VecGetArrayRead(x, &ax)); 1877 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 1878 PetscCall(VecRestoreArrayRead(x, &ax)); 1879 for (i = 0; i < ctx->benign_n; i++) { 1880 PetscScalar sum, val; 1881 const PetscInt *idxs; 1882 PetscInt nz, j; 1883 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1884 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1885 sum = 0.; 1886 if (ctx->apply_p0) { 1887 val = ctx->work[idxs[nz - 1]]; 1888 for (j = 0; j < nz - 1; j++) { 1889 sum += ctx->work[idxs[j]]; 1890 ctx->work[idxs[j]] += val; 1891 } 1892 } else { 1893 for (j = 0; j < nz - 1; j++) { sum += ctx->work[idxs[j]]; } 1894 } 1895 ctx->work[idxs[nz - 1]] -= sum; 1896 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1897 } 1898 PetscCall(VecPlaceArray(x, ctx->work)); 1899 reset_x = PETSC_TRUE; 1900 } 1901 if (transpose) { 1902 PetscCall(MatMultTranspose(ctx->A, x, y)); 1903 } else { 1904 PetscCall(MatMult(ctx->A, x, y)); 1905 } 1906 if (reset_x) PetscCall(VecResetArray(x)); 1907 if (apply_left) { 1908 PetscScalar *ay; 1909 PetscInt i; 1910 1911 PetscCall(VecGetArray(y, &ay)); 1912 for (i = 0; i < ctx->benign_n; i++) { 1913 PetscScalar sum, val; 1914 const PetscInt *idxs; 1915 PetscInt nz, j; 1916 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1917 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1918 val = -ay[idxs[nz - 1]]; 1919 if (ctx->apply_p0) { 1920 sum = 0.; 1921 for (j = 0; j < nz - 1; j++) { 1922 sum += ay[idxs[j]]; 1923 ay[idxs[j]] += val; 1924 } 1925 ay[idxs[nz - 1]] += sum; 1926 } else { 1927 for (j = 0; j < nz - 1; j++) { ay[idxs[j]] += val; } 1928 ay[idxs[nz - 1]] = 0.; 1929 } 1930 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1931 } 1932 PetscCall(VecRestoreArray(y, &ay)); 1933 } 1934 PetscFunctionReturn(0); 1935 } 1936 1937 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) { 1938 PetscFunctionBegin; 1939 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 1940 PetscFunctionReturn(0); 1941 } 1942 1943 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) { 1944 PetscFunctionBegin; 1945 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 1946 PetscFunctionReturn(0); 1947 } 1948 1949 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) { 1950 PC_IS *pcis = (PC_IS *)pc->data; 1951 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1952 PCBDDCBenignMatMult_ctx ctx; 1953 1954 PetscFunctionBegin; 1955 if (!restore) { 1956 Mat A_IB, A_BI; 1957 PetscScalar *work; 1958 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1959 1960 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 1961 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1962 PetscCall(PetscMalloc1(pcis->n, &work)); 1963 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 1964 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 1965 PetscCall(MatSetType(A_IB, MATSHELL)); 1966 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 1967 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 1968 PetscCall(PetscNew(&ctx)); 1969 PetscCall(MatShellSetContext(A_IB, ctx)); 1970 ctx->apply_left = PETSC_TRUE; 1971 ctx->apply_right = PETSC_FALSE; 1972 ctx->apply_p0 = PETSC_FALSE; 1973 ctx->benign_n = pcbddc->benign_n; 1974 if (reuse) { 1975 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1976 ctx->free = PETSC_FALSE; 1977 } else { /* TODO: could be optimized for successive solves */ 1978 ISLocalToGlobalMapping N_to_D; 1979 PetscInt i; 1980 1981 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 1982 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 1983 for (i = 0; i < pcbddc->benign_n; i++) { PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); } 1984 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 1985 ctx->free = PETSC_TRUE; 1986 } 1987 ctx->A = pcis->A_IB; 1988 ctx->work = work; 1989 PetscCall(MatSetUp(A_IB)); 1990 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 1991 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 1992 pcis->A_IB = A_IB; 1993 1994 /* A_BI as A_IB^T */ 1995 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 1996 pcbddc->benign_original_mat = pcis->A_BI; 1997 pcis->A_BI = A_BI; 1998 } else { 1999 if (!pcbddc->benign_original_mat) { PetscFunctionReturn(0); } 2000 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2001 PetscCall(MatDestroy(&pcis->A_IB)); 2002 pcis->A_IB = ctx->A; 2003 ctx->A = NULL; 2004 PetscCall(MatDestroy(&pcis->A_BI)); 2005 pcis->A_BI = pcbddc->benign_original_mat; 2006 pcbddc->benign_original_mat = NULL; 2007 if (ctx->free) { 2008 PetscInt i; 2009 for (i = 0; i < ctx->benign_n; i++) { PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); } 2010 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2011 } 2012 PetscCall(PetscFree(ctx->work)); 2013 PetscCall(PetscFree(ctx)); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 /* used just in bddc debug mode */ 2019 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) { 2020 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2021 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2022 Mat An; 2023 2024 PetscFunctionBegin; 2025 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2026 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2027 if (is1) { 2028 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2029 PetscCall(MatDestroy(&An)); 2030 } else { 2031 *B = An; 2032 } 2033 PetscFunctionReturn(0); 2034 } 2035 2036 /* TODO: add reuse flag */ 2037 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) { 2038 Mat Bt; 2039 PetscScalar *a, *bdata; 2040 const PetscInt *ii, *ij; 2041 PetscInt m, n, i, nnz, *bii, *bij; 2042 PetscBool flg_row; 2043 2044 PetscFunctionBegin; 2045 PetscCall(MatGetSize(A, &n, &m)); 2046 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2047 PetscCall(MatSeqAIJGetArray(A, &a)); 2048 nnz = n; 2049 for (i = 0; i < ii[n]; i++) { 2050 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2051 } 2052 PetscCall(PetscMalloc1(n + 1, &bii)); 2053 PetscCall(PetscMalloc1(nnz, &bij)); 2054 PetscCall(PetscMalloc1(nnz, &bdata)); 2055 nnz = 0; 2056 bii[0] = 0; 2057 for (i = 0; i < n; i++) { 2058 PetscInt j; 2059 for (j = ii[i]; j < ii[i + 1]; j++) { 2060 PetscScalar entry = a[j]; 2061 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2062 bij[nnz] = ij[j]; 2063 bdata[nnz] = entry; 2064 nnz++; 2065 } 2066 } 2067 bii[i + 1] = nnz; 2068 } 2069 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2070 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2071 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2072 { 2073 Mat_SeqAIJ *b = (Mat_SeqAIJ *)(Bt->data); 2074 b->free_a = PETSC_TRUE; 2075 b->free_ij = PETSC_TRUE; 2076 } 2077 if (*B == A) { PetscCall(MatDestroy(&A)); } 2078 *B = Bt; 2079 PetscFunctionReturn(0); 2080 } 2081 2082 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) { 2083 Mat B = NULL; 2084 DM dm; 2085 IS is_dummy, *cc_n; 2086 ISLocalToGlobalMapping l2gmap_dummy; 2087 PCBDDCGraph graph; 2088 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2089 PetscInt i, n; 2090 PetscInt *xadj, *adjncy; 2091 PetscBool isplex = PETSC_FALSE; 2092 2093 PetscFunctionBegin; 2094 if (ncc) *ncc = 0; 2095 if (cc) *cc = NULL; 2096 if (primalv) *primalv = NULL; 2097 PetscCall(PCBDDCGraphCreate(&graph)); 2098 PetscCall(MatGetDM(pc->pmat, &dm)); 2099 if (!dm) { PetscCall(PCGetDM(pc, &dm)); } 2100 if (dm) { PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex)); } 2101 if (filter) isplex = PETSC_FALSE; 2102 2103 if (isplex) { /* this code has been modified from plexpartition.c */ 2104 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2105 PetscInt *adj = NULL; 2106 IS cellNumbering; 2107 const PetscInt *cellNum; 2108 PetscBool useCone, useClosure; 2109 PetscSection section; 2110 PetscSegBuffer adjBuffer; 2111 PetscSF sfPoint; 2112 2113 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2114 PetscCall(DMGetPointSF(dm, &sfPoint)); 2115 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2116 /* Build adjacency graph via a section/segbuffer */ 2117 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2118 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2119 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2120 /* Always use FVM adjacency to create partitioner graph */ 2121 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2122 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2123 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2124 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2125 for (n = 0, p = pStart; p < pEnd; p++) { 2126 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2127 if (nroots > 0) { 2128 if (cellNum[p] < 0) continue; 2129 } 2130 adjSize = PETSC_DETERMINE; 2131 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2132 for (a = 0; a < adjSize; ++a) { 2133 const PetscInt point = adj[a]; 2134 if (pStart <= point && point < pEnd) { 2135 PetscInt *PETSC_RESTRICT pBuf; 2136 PetscCall(PetscSectionAddDof(section, p, 1)); 2137 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2138 *pBuf = point; 2139 } 2140 } 2141 n++; 2142 } 2143 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2144 /* Derive CSR graph from section/segbuffer */ 2145 PetscCall(PetscSectionSetUp(section)); 2146 PetscCall(PetscSectionGetStorageSize(section, &size)); 2147 PetscCall(PetscMalloc1(n + 1, &xadj)); 2148 for (idx = 0, p = pStart; p < pEnd; p++) { 2149 if (nroots > 0) { 2150 if (cellNum[p] < 0) continue; 2151 } 2152 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2153 } 2154 xadj[n] = size; 2155 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2156 /* Clean up */ 2157 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2158 PetscCall(PetscSectionDestroy(§ion)); 2159 PetscCall(PetscFree(adj)); 2160 graph->xadj = xadj; 2161 graph->adjncy = adjncy; 2162 } else { 2163 Mat A; 2164 PetscBool isseqaij, flg_row; 2165 2166 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2167 if (!A->rmap->N || !A->cmap->N) { 2168 PetscCall(PCBDDCGraphDestroy(&graph)); 2169 PetscFunctionReturn(0); 2170 } 2171 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2172 if (!isseqaij && filter) { 2173 PetscBool isseqdense; 2174 2175 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2176 if (!isseqdense) { 2177 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2178 } else { /* TODO: rectangular case and LDA */ 2179 PetscScalar *array; 2180 PetscReal chop = 1.e-6; 2181 2182 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2183 PetscCall(MatDenseGetArray(B, &array)); 2184 PetscCall(MatGetSize(B, &n, NULL)); 2185 for (i = 0; i < n; i++) { 2186 PetscInt j; 2187 for (j = i + 1; j < n; j++) { 2188 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2189 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2190 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2191 } 2192 } 2193 PetscCall(MatDenseRestoreArray(B, &array)); 2194 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2195 } 2196 } else { 2197 PetscCall(PetscObjectReference((PetscObject)A)); 2198 B = A; 2199 } 2200 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2201 2202 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2203 if (filter) { 2204 PetscScalar *data; 2205 PetscInt j, cum; 2206 2207 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2208 PetscCall(MatSeqAIJGetArray(B, &data)); 2209 cum = 0; 2210 for (i = 0; i < n; i++) { 2211 PetscInt t; 2212 2213 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2214 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { continue; } 2215 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2216 } 2217 t = xadj_filtered[i]; 2218 xadj_filtered[i] = cum; 2219 cum += t; 2220 } 2221 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2222 graph->xadj = xadj_filtered; 2223 graph->adjncy = adjncy_filtered; 2224 } else { 2225 graph->xadj = xadj; 2226 graph->adjncy = adjncy; 2227 } 2228 } 2229 /* compute local connected components using PCBDDCGraph */ 2230 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2231 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2232 PetscCall(ISDestroy(&is_dummy)); 2233 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT)); 2234 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2235 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2236 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2237 2238 /* partial clean up */ 2239 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2240 if (B) { 2241 PetscBool flg_row; 2242 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2243 PetscCall(MatDestroy(&B)); 2244 } 2245 if (isplex) { 2246 PetscCall(PetscFree(xadj)); 2247 PetscCall(PetscFree(adjncy)); 2248 } 2249 2250 /* get back data */ 2251 if (isplex) { 2252 if (ncc) *ncc = graph->ncc; 2253 if (cc || primalv) { 2254 Mat A; 2255 PetscBT btv, btvt; 2256 PetscSection subSection; 2257 PetscInt *ids, cum, cump, *cids, *pids; 2258 2259 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2260 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2261 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2262 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2263 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2264 2265 cids[0] = 0; 2266 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2267 PetscInt j; 2268 2269 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2270 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2271 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2272 2273 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2274 for (k = 0; k < 2 * size; k += 2) { 2275 PetscInt s, pp, p = closure[k], off, dof, cdof; 2276 2277 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2278 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2279 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2280 for (s = 0; s < dof - cdof; s++) { 2281 if (PetscBTLookupSet(btvt, off + s)) continue; 2282 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2283 else pids[cump++] = off + s; /* cross-vertex */ 2284 } 2285 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2286 if (pp != p) { 2287 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2288 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2289 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2290 for (s = 0; s < dof - cdof; s++) { 2291 if (PetscBTLookupSet(btvt, off + s)) continue; 2292 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2293 else pids[cump++] = off + s; /* cross-vertex */ 2294 } 2295 } 2296 } 2297 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2298 } 2299 cids[i + 1] = cum; 2300 /* mark dofs as already assigned */ 2301 for (j = cids[i]; j < cids[i + 1]; j++) { PetscCall(PetscBTSet(btv, ids[j])); } 2302 } 2303 if (cc) { 2304 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2305 for (i = 0; i < graph->ncc; i++) { PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); } 2306 *cc = cc_n; 2307 } 2308 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2309 PetscCall(PetscFree3(ids, cids, pids)); 2310 PetscCall(PetscBTDestroy(&btv)); 2311 PetscCall(PetscBTDestroy(&btvt)); 2312 } 2313 } else { 2314 if (ncc) *ncc = graph->ncc; 2315 if (cc) { 2316 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2317 for (i = 0; i < graph->ncc; i++) { PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); } 2318 *cc = cc_n; 2319 } 2320 } 2321 /* clean up graph */ 2322 graph->xadj = NULL; 2323 graph->adjncy = NULL; 2324 PetscCall(PCBDDCGraphDestroy(&graph)); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) { 2329 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2330 PC_IS *pcis = (PC_IS *)(pc->data); 2331 IS dirIS = NULL; 2332 PetscInt i; 2333 2334 PetscFunctionBegin; 2335 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2336 if (zerodiag) { 2337 Mat A; 2338 Vec vec3_N; 2339 PetscScalar *vals; 2340 const PetscInt *idxs; 2341 PetscInt nz, *count; 2342 2343 /* p0 */ 2344 PetscCall(VecSet(pcis->vec1_N, 0.)); 2345 PetscCall(PetscMalloc1(pcis->n, &vals)); 2346 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2347 PetscCall(ISGetIndices(zerodiag, &idxs)); 2348 for (i = 0; i < nz; i++) vals[i] = 1.; 2349 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2350 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2351 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2352 /* v_I */ 2353 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2354 for (i = 0; i < nz; i++) vals[i] = 0.; 2355 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2356 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2357 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2358 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2359 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2360 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2361 if (dirIS) { 2362 PetscInt n; 2363 2364 PetscCall(ISGetLocalSize(dirIS, &n)); 2365 PetscCall(ISGetIndices(dirIS, &idxs)); 2366 for (i = 0; i < n; i++) vals[i] = 0.; 2367 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2368 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2369 } 2370 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2371 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2372 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2373 PetscCall(VecSet(vec3_N, 0.)); 2374 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2375 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2376 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2377 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2378 PetscCall(PetscFree(vals)); 2379 PetscCall(VecDestroy(&vec3_N)); 2380 2381 /* there should not be any pressure dofs lying on the interface */ 2382 PetscCall(PetscCalloc1(pcis->n, &count)); 2383 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2384 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2385 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2386 PetscCall(ISGetIndices(zerodiag, &idxs)); 2387 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2388 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2389 PetscCall(PetscFree(count)); 2390 } 2391 PetscCall(ISDestroy(&dirIS)); 2392 2393 /* check PCBDDCBenignGetOrSetP0 */ 2394 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2395 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2396 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2397 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2398 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2399 for (i = 0; i < pcbddc->benign_n; i++) { 2400 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2401 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2402 } 2403 PetscFunctionReturn(0); 2404 } 2405 2406 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) { 2407 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2408 Mat_IS *matis = (Mat_IS *)(pc->pmat->data); 2409 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2410 PetscInt nz, n, benign_n, bsp = 1; 2411 PetscInt *interior_dofs, n_interior_dofs, nneu; 2412 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2413 2414 PetscFunctionBegin; 2415 if (reuse) goto project_b0; 2416 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2417 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2418 for (n = 0; n < pcbddc->benign_n; n++) { PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); } 2419 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2420 has_null_pressures = PETSC_TRUE; 2421 have_null = PETSC_TRUE; 2422 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2423 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2424 Checks if all the pressure dofs in each subdomain have a zero diagonal 2425 If not, a change of basis on pressures is not needed 2426 since the local Schur complements are already SPD 2427 */ 2428 if (pcbddc->n_ISForDofsLocal) { 2429 IS iP = NULL; 2430 PetscInt p, *pp; 2431 PetscBool flg; 2432 2433 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2434 n = pcbddc->n_ISForDofsLocal; 2435 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2436 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2437 PetscOptionsEnd(); 2438 if (!flg) { 2439 n = 1; 2440 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2441 } 2442 2443 bsp = 0; 2444 for (p = 0; p < n; p++) { 2445 PetscInt bs; 2446 2447 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2448 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2449 bsp += bs; 2450 } 2451 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2452 bsp = 0; 2453 for (p = 0; p < n; p++) { 2454 const PetscInt *idxs; 2455 PetscInt b, bs, npl, *bidxs; 2456 2457 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2458 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2459 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2460 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2461 for (b = 0; b < bs; b++) { 2462 PetscInt i; 2463 2464 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2465 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2466 bsp++; 2467 } 2468 PetscCall(PetscFree(bidxs)); 2469 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2470 } 2471 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2472 2473 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2474 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2475 if (iP) { 2476 IS newpressures; 2477 2478 PetscCall(ISDifference(pressures, iP, &newpressures)); 2479 PetscCall(ISDestroy(&pressures)); 2480 pressures = newpressures; 2481 } 2482 PetscCall(ISSorted(pressures, &sorted)); 2483 if (!sorted) { PetscCall(ISSort(pressures)); } 2484 PetscCall(PetscFree(pp)); 2485 } 2486 2487 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2488 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2489 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2490 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2491 PetscCall(ISSorted(zerodiag, &sorted)); 2492 if (!sorted) { PetscCall(ISSort(zerodiag)); } 2493 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2494 zerodiag_save = zerodiag; 2495 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2496 if (!nz) { 2497 if (n) have_null = PETSC_FALSE; 2498 has_null_pressures = PETSC_FALSE; 2499 PetscCall(ISDestroy(&zerodiag)); 2500 } 2501 recompute_zerodiag = PETSC_FALSE; 2502 2503 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2504 zerodiag_subs = NULL; 2505 benign_n = 0; 2506 n_interior_dofs = 0; 2507 interior_dofs = NULL; 2508 nneu = 0; 2509 if (pcbddc->NeumannBoundariesLocal) { PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); } 2510 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2511 if (checkb) { /* need to compute interior nodes */ 2512 PetscInt n, i, j; 2513 PetscInt n_neigh, *neigh, *n_shared, **shared; 2514 PetscInt *iwork; 2515 2516 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping, &n)); 2517 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2518 PetscCall(PetscCalloc1(n, &iwork)); 2519 PetscCall(PetscMalloc1(n, &interior_dofs)); 2520 for (i = 1; i < n_neigh; i++) 2521 for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1; 2522 for (i = 0; i < n; i++) 2523 if (!iwork[i]) interior_dofs[n_interior_dofs++] = i; 2524 PetscCall(PetscFree(iwork)); 2525 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2526 } 2527 if (has_null_pressures) { 2528 IS *subs; 2529 PetscInt nsubs, i, j, nl; 2530 const PetscInt *idxs; 2531 PetscScalar *array; 2532 Vec *work; 2533 2534 subs = pcbddc->local_subs; 2535 nsubs = pcbddc->n_local_subs; 2536 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2537 if (checkb) { 2538 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2539 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2540 PetscCall(ISGetIndices(zerodiag, &idxs)); 2541 /* work[0] = 1_p */ 2542 PetscCall(VecSet(work[0], 0.)); 2543 PetscCall(VecGetArray(work[0], &array)); 2544 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2545 PetscCall(VecRestoreArray(work[0], &array)); 2546 /* work[0] = 1_v */ 2547 PetscCall(VecSet(work[1], 1.)); 2548 PetscCall(VecGetArray(work[1], &array)); 2549 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2550 PetscCall(VecRestoreArray(work[1], &array)); 2551 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2552 } 2553 2554 if (nsubs > 1 || bsp > 1) { 2555 IS *is; 2556 PetscInt b, totb; 2557 2558 totb = bsp; 2559 is = bsp > 1 ? bzerodiag : &zerodiag; 2560 nsubs = PetscMax(nsubs, 1); 2561 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2562 for (b = 0; b < totb; b++) { 2563 for (i = 0; i < nsubs; i++) { 2564 ISLocalToGlobalMapping l2g; 2565 IS t_zerodiag_subs; 2566 PetscInt nl; 2567 2568 if (subs) { 2569 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2570 } else { 2571 IS tis; 2572 2573 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2574 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2575 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2576 PetscCall(ISDestroy(&tis)); 2577 } 2578 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2579 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2580 if (nl) { 2581 PetscBool valid = PETSC_TRUE; 2582 2583 if (checkb) { 2584 PetscCall(VecSet(matis->x, 0)); 2585 PetscCall(ISGetLocalSize(subs[i], &nl)); 2586 PetscCall(ISGetIndices(subs[i], &idxs)); 2587 PetscCall(VecGetArray(matis->x, &array)); 2588 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2589 PetscCall(VecRestoreArray(matis->x, &array)); 2590 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2591 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2592 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2593 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2594 PetscCall(VecGetArray(matis->y, &array)); 2595 for (j = 0; j < n_interior_dofs; j++) { 2596 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2597 valid = PETSC_FALSE; 2598 break; 2599 } 2600 } 2601 PetscCall(VecRestoreArray(matis->y, &array)); 2602 } 2603 if (valid && nneu) { 2604 const PetscInt *idxs; 2605 PetscInt nzb; 2606 2607 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2608 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2609 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2610 if (nzb) valid = PETSC_FALSE; 2611 } 2612 if (valid && pressures) { 2613 IS t_pressure_subs, tmp; 2614 PetscInt i1, i2; 2615 2616 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2617 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2618 PetscCall(ISGetLocalSize(tmp, &i1)); 2619 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2620 if (i2 != i1) valid = PETSC_FALSE; 2621 PetscCall(ISDestroy(&t_pressure_subs)); 2622 PetscCall(ISDestroy(&tmp)); 2623 } 2624 if (valid) { 2625 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2626 benign_n++; 2627 } else recompute_zerodiag = PETSC_TRUE; 2628 } 2629 PetscCall(ISDestroy(&t_zerodiag_subs)); 2630 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2631 } 2632 } 2633 } else { /* there's just one subdomain (or zero if they have not been detected */ 2634 PetscBool valid = PETSC_TRUE; 2635 2636 if (nneu) valid = PETSC_FALSE; 2637 if (valid && pressures) { PetscCall(ISEqual(pressures, zerodiag, &valid)); } 2638 if (valid && checkb) { 2639 PetscCall(MatMult(matis->A, work[0], matis->x)); 2640 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2641 PetscCall(VecGetArray(matis->x, &array)); 2642 for (j = 0; j < n_interior_dofs; j++) { 2643 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2644 valid = PETSC_FALSE; 2645 break; 2646 } 2647 } 2648 PetscCall(VecRestoreArray(matis->x, &array)); 2649 } 2650 if (valid) { 2651 benign_n = 1; 2652 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2653 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2654 zerodiag_subs[0] = zerodiag; 2655 } 2656 } 2657 if (checkb) { PetscCall(VecDestroyVecs(2, &work)); } 2658 } 2659 PetscCall(PetscFree(interior_dofs)); 2660 2661 if (!benign_n) { 2662 PetscInt n; 2663 2664 PetscCall(ISDestroy(&zerodiag)); 2665 recompute_zerodiag = PETSC_FALSE; 2666 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2667 if (n) have_null = PETSC_FALSE; 2668 } 2669 2670 /* final check for null pressures */ 2671 if (zerodiag && pressures) { PetscCall(ISEqual(pressures, zerodiag, &have_null)); } 2672 2673 if (recompute_zerodiag) { 2674 PetscCall(ISDestroy(&zerodiag)); 2675 if (benign_n == 1) { 2676 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2677 zerodiag = zerodiag_subs[0]; 2678 } else { 2679 PetscInt i, nzn, *new_idxs; 2680 2681 nzn = 0; 2682 for (i = 0; i < benign_n; i++) { 2683 PetscInt ns; 2684 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2685 nzn += ns; 2686 } 2687 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2688 nzn = 0; 2689 for (i = 0; i < benign_n; i++) { 2690 PetscInt ns, *idxs; 2691 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2692 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2693 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2694 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2695 nzn += ns; 2696 } 2697 PetscCall(PetscSortInt(nzn, new_idxs)); 2698 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2699 } 2700 have_null = PETSC_FALSE; 2701 } 2702 2703 /* determines if the coarse solver will be singular or not */ 2704 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2705 2706 /* Prepare matrix to compute no-net-flux */ 2707 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2708 Mat A, loc_divudotp; 2709 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2710 IS row, col, isused = NULL; 2711 PetscInt M, N, n, st, n_isused; 2712 2713 if (pressures) { 2714 isused = pressures; 2715 } else { 2716 isused = zerodiag_save; 2717 } 2718 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2719 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2720 PetscCall(MatGetLocalSize(A, &n, NULL)); 2721 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2722 n_isused = 0; 2723 if (isused) { PetscCall(ISGetLocalSize(isused, &n_isused)); } 2724 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2725 st = st - n_isused; 2726 if (n) { 2727 const PetscInt *gidxs; 2728 2729 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2730 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2731 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2732 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2733 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2734 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2735 } else { 2736 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 2737 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2738 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 2739 } 2740 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 2741 PetscCall(ISGetSize(row, &M)); 2742 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 2743 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 2744 PetscCall(ISDestroy(&row)); 2745 PetscCall(ISDestroy(&col)); 2746 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 2747 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 2748 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 2749 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 2750 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2751 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2752 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 2753 PetscCall(MatDestroy(&loc_divudotp)); 2754 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2755 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2756 } 2757 PetscCall(ISDestroy(&zerodiag_save)); 2758 PetscCall(ISDestroy(&pressures)); 2759 if (bzerodiag) { 2760 PetscInt i; 2761 2762 for (i = 0; i < bsp; i++) { PetscCall(ISDestroy(&bzerodiag[i])); } 2763 PetscCall(PetscFree(bzerodiag)); 2764 } 2765 pcbddc->benign_n = benign_n; 2766 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2767 2768 /* determines if the problem has subdomains with 0 pressure block */ 2769 have_null = (PetscBool)(!!pcbddc->benign_n); 2770 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 2771 2772 project_b0: 2773 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2774 /* change of basis and p0 dofs */ 2775 if (pcbddc->benign_n) { 2776 PetscInt i, s, *nnz; 2777 2778 /* local change of basis for pressures */ 2779 PetscCall(MatDestroy(&pcbddc->benign_change)); 2780 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 2781 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 2782 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 2783 PetscCall(PetscMalloc1(n, &nnz)); 2784 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 2785 for (i = 0; i < pcbddc->benign_n; i++) { 2786 const PetscInt *idxs; 2787 PetscInt nzs, j; 2788 2789 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 2790 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2791 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 2792 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 2793 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2794 } 2795 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 2796 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2797 PetscCall(PetscFree(nnz)); 2798 /* set identity by default */ 2799 for (i = 0; i < n; i++) { PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); } 2800 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 2801 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 2802 /* set change on pressures */ 2803 for (s = 0; s < pcbddc->benign_n; s++) { 2804 PetscScalar *array; 2805 const PetscInt *idxs; 2806 PetscInt nzs; 2807 2808 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 2809 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2810 for (i = 0; i < nzs - 1; i++) { 2811 PetscScalar vals[2]; 2812 PetscInt cols[2]; 2813 2814 cols[0] = idxs[i]; 2815 cols[1] = idxs[nzs - 1]; 2816 vals[0] = 1.; 2817 vals[1] = 1.; 2818 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 2819 } 2820 PetscCall(PetscMalloc1(nzs, &array)); 2821 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 2822 array[nzs - 1] = 1.; 2823 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 2824 /* store local idxs for p0 */ 2825 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 2826 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2827 PetscCall(PetscFree(array)); 2828 } 2829 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2830 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2831 2832 /* project if needed */ 2833 if (pcbddc->benign_change_explicit) { 2834 Mat M; 2835 2836 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 2837 PetscCall(MatDestroy(&pcbddc->local_mat)); 2838 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 2839 PetscCall(MatDestroy(&M)); 2840 } 2841 /* store global idxs for p0 */ 2842 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 2843 } 2844 *zerodiaglocal = zerodiag; 2845 PetscFunctionReturn(0); 2846 } 2847 2848 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) { 2849 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2850 PetscScalar *array; 2851 2852 PetscFunctionBegin; 2853 if (!pcbddc->benign_sf) { 2854 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 2855 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 2856 } 2857 if (get) { 2858 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 2859 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2860 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2861 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 2862 } else { 2863 PetscCall(VecGetArray(v, &array)); 2864 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2865 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2866 PetscCall(VecRestoreArray(v, &array)); 2867 } 2868 PetscFunctionReturn(0); 2869 } 2870 2871 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) { 2872 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2873 2874 PetscFunctionBegin; 2875 /* TODO: add error checking 2876 - avoid nested pop (or push) calls. 2877 - cannot push before pop. 2878 - cannot call this if pcbddc->local_mat is NULL 2879 */ 2880 if (!pcbddc->benign_n) { PetscFunctionReturn(0); } 2881 if (pop) { 2882 if (pcbddc->benign_change_explicit) { 2883 IS is_p0; 2884 MatReuse reuse; 2885 2886 /* extract B_0 */ 2887 reuse = MAT_INITIAL_MATRIX; 2888 if (pcbddc->benign_B0) { reuse = MAT_REUSE_MATRIX; } 2889 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 2890 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 2891 /* remove rows and cols from local problem */ 2892 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 2893 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 2894 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 2895 PetscCall(ISDestroy(&is_p0)); 2896 } else { 2897 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2898 PetscScalar *vals; 2899 PetscInt i, n, *idxs_ins; 2900 2901 PetscCall(VecGetLocalSize(matis->y, &n)); 2902 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 2903 if (!pcbddc->benign_B0) { 2904 PetscInt *nnz; 2905 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 2906 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 2907 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 2908 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 2909 for (i = 0; i < pcbddc->benign_n; i++) { 2910 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 2911 nnz[i] = n - nnz[i]; 2912 } 2913 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 2914 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2915 PetscCall(PetscFree(nnz)); 2916 } 2917 2918 for (i = 0; i < pcbddc->benign_n; i++) { 2919 PetscScalar *array; 2920 PetscInt *idxs, j, nz, cum; 2921 2922 PetscCall(VecSet(matis->x, 0.)); 2923 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 2924 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 2925 for (j = 0; j < nz; j++) vals[j] = 1.; 2926 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 2927 PetscCall(VecAssemblyBegin(matis->x)); 2928 PetscCall(VecAssemblyEnd(matis->x)); 2929 PetscCall(VecSet(matis->y, 0.)); 2930 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2931 PetscCall(VecGetArray(matis->y, &array)); 2932 cum = 0; 2933 for (j = 0; j < n; j++) { 2934 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2935 vals[cum] = array[j]; 2936 idxs_ins[cum] = j; 2937 cum++; 2938 } 2939 } 2940 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 2941 PetscCall(VecRestoreArray(matis->y, &array)); 2942 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 2943 } 2944 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 2945 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 2946 PetscCall(PetscFree2(idxs_ins, vals)); 2947 } 2948 } else { /* push */ 2949 if (pcbddc->benign_change_explicit) { 2950 PetscInt i; 2951 2952 for (i = 0; i < pcbddc->benign_n; i++) { 2953 PetscScalar *B0_vals; 2954 PetscInt *B0_cols, B0_ncol; 2955 2956 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 2957 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 2958 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 2959 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 2960 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 2961 } 2962 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 2963 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 2964 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 2965 } 2966 PetscFunctionReturn(0); 2967 } 2968 2969 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) { 2970 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2971 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2972 PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork; 2973 PetscBLASInt *B_iwork, *B_ifail; 2974 PetscScalar *work, lwork; 2975 PetscScalar *St, *S, *eigv; 2976 PetscScalar *Sarray, *Starray; 2977 PetscReal *eigs, thresh, lthresh, uthresh; 2978 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 2979 PetscBool allocated_S_St, upart; 2980 #if defined(PETSC_USE_COMPLEX) 2981 PetscReal *rwork; 2982 #endif 2983 2984 PetscFunctionBegin; 2985 if (!pcbddc->adaptive_selection) PetscFunctionReturn(0); 2986 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 2987 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2988 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 2989 sub_schurs->is_posdef); 2990 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 2991 2992 if (pcbddc->dbg_flag) { 2993 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 2994 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 2995 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 2996 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 2997 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 2998 } 2999 3000 if (pcbddc->dbg_flag) { PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); } 3001 3002 /* max size of subsets */ 3003 mss = 0; 3004 for (i = 0; i < sub_schurs->n_subs; i++) { 3005 PetscInt subset_size; 3006 3007 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3008 mss = PetscMax(mss, subset_size); 3009 } 3010 3011 /* min/max and threshold */ 3012 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3013 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3014 nmax = PetscMax(nmin, nmax); 3015 allocated_S_St = PETSC_FALSE; 3016 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3017 allocated_S_St = PETSC_TRUE; 3018 } 3019 3020 /* allocate lapack workspace */ 3021 cum = cum2 = 0; 3022 maxneigs = 0; 3023 for (i = 0; i < sub_schurs->n_subs; i++) { 3024 PetscInt n, subset_size; 3025 3026 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3027 n = PetscMin(subset_size, nmax); 3028 cum += subset_size; 3029 cum2 += subset_size * n; 3030 maxneigs = PetscMax(maxneigs, n); 3031 } 3032 lwork = 0; 3033 if (mss) { 3034 if (sub_schurs->is_symmetric) { 3035 PetscScalar sdummy = 0.; 3036 PetscBLASInt B_itype = 1; 3037 PetscBLASInt B_N = mss, idummy = 0; 3038 PetscReal rdummy = 0., zero = 0.0; 3039 PetscReal eps = 0.0; /* dlamch? */ 3040 3041 B_lwork = -1; 3042 /* some implementations may complain about NULL pointers, even if we are querying */ 3043 S = &sdummy; 3044 St = &sdummy; 3045 eigs = &rdummy; 3046 eigv = &sdummy; 3047 B_iwork = &idummy; 3048 B_ifail = &idummy; 3049 #if defined(PETSC_USE_COMPLEX) 3050 rwork = &rdummy; 3051 #endif 3052 thresh = 1.0; 3053 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3054 #if defined(PETSC_USE_COMPLEX) 3055 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3056 #else 3057 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3058 #endif 3059 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr); 3060 PetscCall(PetscFPTrapPop()); 3061 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3062 } 3063 3064 nv = 0; 3065 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3066 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3067 } 3068 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3069 if (allocated_S_St) { PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); } 3070 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3071 #if defined(PETSC_USE_COMPLEX) 3072 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3073 #endif 3074 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3075 &pcbddc->adaptive_constraints_data)); 3076 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3077 3078 maxneigs = 0; 3079 cum = cumarray = 0; 3080 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3081 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3082 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3083 const PetscInt *idxs; 3084 3085 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3086 for (cum = 0; cum < nv; cum++) { 3087 pcbddc->adaptive_constraints_n[cum] = 1; 3088 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3089 pcbddc->adaptive_constraints_data[cum] = 1.0; 3090 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3091 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3092 } 3093 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3094 } 3095 3096 if (mss) { /* multilevel */ 3097 if (sub_schurs->gdsw) { 3098 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3099 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3100 } else { 3101 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3102 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3103 } 3104 } 3105 3106 lthresh = pcbddc->adaptive_threshold[0]; 3107 uthresh = pcbddc->adaptive_threshold[1]; 3108 upart = pcbddc->use_deluxe_scaling; 3109 for (i = 0; i < sub_schurs->n_subs; i++) { 3110 const PetscInt *idxs; 3111 PetscReal upper, lower; 3112 PetscInt j, subset_size, eigs_start = 0; 3113 PetscBLASInt B_N; 3114 PetscBool same_data = PETSC_FALSE; 3115 PetscBool scal = PETSC_FALSE; 3116 3117 if (upart) { 3118 upper = PETSC_MAX_REAL; 3119 lower = uthresh; 3120 } else { 3121 if (sub_schurs->gdsw) { 3122 upper = uthresh; 3123 lower = PETSC_MIN_REAL; 3124 } else { 3125 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3126 upper = 1. / uthresh; 3127 lower = 0.; 3128 } 3129 } 3130 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3131 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3132 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3133 /* this is experimental: we assume the dofs have been properly grouped to have 3134 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3135 if (!sub_schurs->is_posdef) { 3136 Mat T; 3137 3138 for (j = 0; j < subset_size; j++) { 3139 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3140 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3141 PetscCall(MatScale(T, -1.0)); 3142 PetscCall(MatDestroy(&T)); 3143 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3144 PetscCall(MatScale(T, -1.0)); 3145 PetscCall(MatDestroy(&T)); 3146 if (sub_schurs->change_primal_sub) { 3147 PetscInt nz, k; 3148 const PetscInt *idxs; 3149 3150 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3151 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3152 for (k = 0; k < nz; k++) { 3153 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3154 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3155 } 3156 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3157 } 3158 scal = PETSC_TRUE; 3159 break; 3160 } 3161 } 3162 } 3163 3164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3165 if (sub_schurs->is_symmetric) { 3166 PetscInt j, k; 3167 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3168 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3169 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3170 } 3171 for (j = 0; j < subset_size; j++) { 3172 for (k = j; k < subset_size; k++) { 3173 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3174 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3175 } 3176 } 3177 } else { 3178 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3179 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3180 } 3181 } else { 3182 S = Sarray + cumarray; 3183 St = Starray + cumarray; 3184 } 3185 /* see if we can save some work */ 3186 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); } 3187 3188 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3189 B_neigs = 0; 3190 } else { 3191 if (sub_schurs->is_symmetric) { 3192 PetscBLASInt B_itype = 1; 3193 PetscBLASInt B_IL, B_IU; 3194 PetscReal eps = -1.0; /* dlamch? */ 3195 PetscInt nmin_s; 3196 PetscBool compute_range; 3197 3198 B_neigs = 0; 3199 compute_range = (PetscBool)!same_data; 3200 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3201 3202 if (pcbddc->dbg_flag) { 3203 PetscInt nc = 0; 3204 3205 if (sub_schurs->change_primal_sub) { PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); } 3206 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, 3207 sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc)); 3208 } 3209 3210 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3211 if (compute_range) { 3212 /* ask for eigenvalues larger than thresh */ 3213 if (sub_schurs->is_posdef) { 3214 #if defined(PETSC_USE_COMPLEX) 3215 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3216 #else 3217 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3218 #endif 3219 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3220 } else { /* no theory so far, but it works nicely */ 3221 PetscInt recipe = 0, recipe_m = 1; 3222 PetscReal bb[2]; 3223 3224 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3225 switch (recipe) { 3226 case 0: 3227 if (scal) { 3228 bb[0] = PETSC_MIN_REAL; 3229 bb[1] = lthresh; 3230 } else { 3231 bb[0] = uthresh; 3232 bb[1] = PETSC_MAX_REAL; 3233 } 3234 #if defined(PETSC_USE_COMPLEX) 3235 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3236 #else 3237 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3238 #endif 3239 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3240 break; 3241 case 1: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh * lthresh; 3242 #if defined(PETSC_USE_COMPLEX) 3243 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3244 #else 3245 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3246 #endif 3247 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3248 if (!scal) { 3249 PetscBLASInt B_neigs2 = 0; 3250 3251 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3252 bb[1] = PETSC_MAX_REAL; 3253 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3254 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3255 #if defined(PETSC_USE_COMPLEX) 3256 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3257 #else 3258 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3259 #endif 3260 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3261 B_neigs += B_neigs2; 3262 } 3263 break; 3264 case 2: 3265 if (scal) { 3266 bb[0] = PETSC_MIN_REAL; 3267 bb[1] = 0; 3268 #if defined(PETSC_USE_COMPLEX) 3269 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3270 #else 3271 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3272 #endif 3273 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3274 } else { 3275 PetscBLASInt B_neigs2 = 0; 3276 PetscBool import = PETSC_FALSE; 3277 3278 lthresh = PetscMax(lthresh, 0.0); 3279 if (lthresh > 0.0) { 3280 bb[0] = PETSC_MIN_REAL; 3281 bb[1] = lthresh * lthresh; 3282 3283 import = PETSC_TRUE; 3284 #if defined(PETSC_USE_COMPLEX) 3285 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3286 #else 3287 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3288 #endif 3289 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3290 } 3291 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3292 bb[1] = PETSC_MAX_REAL; 3293 if (import) { 3294 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3295 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3296 } 3297 #if defined(PETSC_USE_COMPLEX) 3298 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3299 #else 3300 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3301 #endif 3302 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3303 B_neigs += B_neigs2; 3304 } 3305 break; 3306 case 3: 3307 if (scal) { 3308 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3309 } else { 3310 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3311 } 3312 if (!scal) { 3313 bb[0] = uthresh; 3314 bb[1] = PETSC_MAX_REAL; 3315 #if defined(PETSC_USE_COMPLEX) 3316 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3317 #else 3318 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3319 #endif 3320 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3321 } 3322 if (recipe_m > 0 && B_N - B_neigs > 0) { 3323 PetscBLASInt B_neigs2 = 0; 3324 3325 B_IL = 1; 3326 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3327 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3328 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3329 #if defined(PETSC_USE_COMPLEX) 3330 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3331 #else 3332 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3333 #endif 3334 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3335 B_neigs += B_neigs2; 3336 } 3337 break; 3338 case 4: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3339 #if defined(PETSC_USE_COMPLEX) 3340 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3341 #else 3342 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3343 #endif 3344 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3345 { 3346 PetscBLASInt B_neigs2 = 0; 3347 3348 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3349 bb[1] = PETSC_MAX_REAL; 3350 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3351 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3352 #if defined(PETSC_USE_COMPLEX) 3353 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3354 #else 3355 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3356 #endif 3357 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3358 B_neigs += B_neigs2; 3359 } 3360 break; 3361 case 5: /* same as before: first compute all eigenvalues, then filter */ 3362 #if defined(PETSC_USE_COMPLEX) 3363 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3364 #else 3365 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3366 #endif 3367 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3368 { 3369 PetscInt e, k, ne; 3370 for (e = 0, ne = 0; e < B_neigs; e++) { 3371 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3372 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3373 eigs[ne] = eigs[e]; 3374 ne++; 3375 } 3376 } 3377 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3378 B_neigs = ne; 3379 } 3380 break; 3381 default: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3382 } 3383 } 3384 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3385 B_IU = PetscMax(1, PetscMin(B_N, nmax)); 3386 B_IL = 1; 3387 #if defined(PETSC_USE_COMPLEX) 3388 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3389 #else 3390 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3391 #endif 3392 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3393 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3394 PetscInt k; 3395 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3396 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3397 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3398 nmin = nmax; 3399 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3400 for (k = 0; k < nmax; k++) { 3401 eigs[k] = 1. / PETSC_SMALL; 3402 eigv[k * (subset_size + 1)] = 1.0; 3403 } 3404 } 3405 PetscCall(PetscFPTrapPop()); 3406 if (B_ierr) { 3407 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3408 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3409 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3410 } 3411 3412 if (B_neigs > nmax) { 3413 if (pcbddc->dbg_flag) { PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); } 3414 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3415 B_neigs = nmax; 3416 } 3417 3418 nmin_s = PetscMin(nmin, B_N); 3419 if (B_neigs < nmin_s) { 3420 PetscBLASInt B_neigs2 = 0; 3421 3422 if (upart) { 3423 if (scal) { 3424 B_IU = nmin_s; 3425 B_IL = B_neigs + 1; 3426 } else { 3427 B_IL = B_N - nmin_s + 1; 3428 B_IU = B_N - B_neigs; 3429 } 3430 } else { 3431 B_IL = B_neigs + 1; 3432 B_IU = nmin_s; 3433 } 3434 if (pcbddc->dbg_flag) { 3435 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3436 } 3437 if (sub_schurs->is_symmetric) { 3438 PetscInt j, k; 3439 for (j = 0; j < subset_size; j++) { 3440 for (k = j; k < subset_size; k++) { 3441 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3442 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3443 } 3444 } 3445 } else { 3446 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3447 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3448 } 3449 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3450 #if defined(PETSC_USE_COMPLEX) 3451 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3452 #else 3453 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3454 #endif 3455 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3456 PetscCall(PetscFPTrapPop()); 3457 B_neigs += B_neigs2; 3458 } 3459 if (B_ierr) { 3460 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3461 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3462 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3463 } 3464 if (pcbddc->dbg_flag) { 3465 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3466 for (j = 0; j < B_neigs; j++) { 3467 if (!sub_schurs->gdsw) { 3468 if (eigs[j] == 0.0) { 3469 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3470 } else { 3471 if (upart) { 3472 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3473 } else { 3474 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start]))); 3475 } 3476 } 3477 } else { 3478 double pg = (double)eigs[j + eigs_start]; 3479 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3480 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3481 } 3482 } 3483 } 3484 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3485 } 3486 /* change the basis back to the original one */ 3487 if (sub_schurs->change) { 3488 Mat change, phi, phit; 3489 3490 if (pcbddc->dbg_flag > 2) { 3491 PetscInt ii; 3492 for (ii = 0; ii < B_neigs; ii++) { 3493 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3494 for (j = 0; j < B_N; j++) { 3495 #if defined(PETSC_USE_COMPLEX) 3496 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3497 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3498 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3499 #else 3500 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3501 #endif 3502 } 3503 } 3504 } 3505 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3506 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3507 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi)); 3508 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3509 PetscCall(MatDestroy(&phit)); 3510 PetscCall(MatDestroy(&phi)); 3511 } 3512 maxneigs = PetscMax(B_neigs, maxneigs); 3513 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3514 if (B_neigs) { 3515 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3516 3517 if (pcbddc->dbg_flag > 1) { 3518 PetscInt ii; 3519 for (ii = 0; ii < B_neigs; ii++) { 3520 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3521 for (j = 0; j < B_N; j++) { 3522 #if defined(PETSC_USE_COMPLEX) 3523 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3524 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3525 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3526 #else 3527 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3528 #endif 3529 } 3530 } 3531 } 3532 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3533 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3534 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3535 cum++; 3536 } 3537 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3538 /* shift for next computation */ 3539 cumarray += subset_size * subset_size; 3540 } 3541 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3542 3543 if (mss) { 3544 if (sub_schurs->gdsw) { 3545 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3546 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3547 } else { 3548 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3549 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3550 /* destroy matrices (junk) */ 3551 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3552 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3553 } 3554 } 3555 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3556 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3557 #if defined(PETSC_USE_COMPLEX) 3558 PetscCall(PetscFree(rwork)); 3559 #endif 3560 if (pcbddc->dbg_flag) { 3561 PetscInt maxneigs_r; 3562 PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3563 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3564 } 3565 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3566 PetscFunctionReturn(0); 3567 } 3568 3569 PetscErrorCode PCBDDCSetUpSolvers(PC pc) { 3570 PetscScalar *coarse_submat_vals; 3571 3572 PetscFunctionBegin; 3573 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3574 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3575 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3576 3577 /* Setup local neumann solver ksp_R */ 3578 /* PCBDDCSetUpLocalScatters should be called first! */ 3579 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3580 3581 /* 3582 Setup local correction and local part of coarse basis. 3583 Gives back the dense local part of the coarse matrix in column major ordering 3584 */ 3585 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat_vals)); 3586 3587 /* Compute total number of coarse nodes and setup coarse solver */ 3588 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals)); 3589 3590 /* free */ 3591 PetscCall(PetscFree(coarse_submat_vals)); 3592 PetscFunctionReturn(0); 3593 } 3594 3595 PetscErrorCode PCBDDCResetCustomization(PC pc) { 3596 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3597 3598 PetscFunctionBegin; 3599 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3600 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3601 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3602 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3603 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3604 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3605 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3606 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3607 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3608 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3609 PetscFunctionReturn(0); 3610 } 3611 3612 PetscErrorCode PCBDDCResetTopography(PC pc) { 3613 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3614 PetscInt i; 3615 3616 PetscFunctionBegin; 3617 PetscCall(MatDestroy(&pcbddc->nedcG)); 3618 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3619 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3620 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3621 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3622 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3623 PetscCall(VecDestroy(&pcbddc->work_change)); 3624 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3625 PetscCall(MatDestroy(&pcbddc->divudotp)); 3626 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3627 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3628 for (i = 0; i < pcbddc->n_local_subs; i++) { PetscCall(ISDestroy(&pcbddc->local_subs[i])); } 3629 pcbddc->n_local_subs = 0; 3630 PetscCall(PetscFree(pcbddc->local_subs)); 3631 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3632 pcbddc->graphanalyzed = PETSC_FALSE; 3633 pcbddc->recompute_topography = PETSC_TRUE; 3634 pcbddc->corner_selected = PETSC_FALSE; 3635 PetscFunctionReturn(0); 3636 } 3637 3638 PetscErrorCode PCBDDCResetSolvers(PC pc) { 3639 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3640 3641 PetscFunctionBegin; 3642 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3643 if (pcbddc->coarse_phi_B) { 3644 PetscScalar *array; 3645 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &array)); 3646 PetscCall(PetscFree(array)); 3647 } 3648 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3649 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3650 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3651 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3652 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3653 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3654 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3655 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3656 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3657 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3658 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3659 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3660 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3661 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3662 PetscCall(KSPReset(pcbddc->ksp_D)); 3663 PetscCall(KSPReset(pcbddc->ksp_R)); 3664 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3665 PetscCall(MatDestroy(&pcbddc->local_mat)); 3666 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3667 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3668 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3669 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3670 PetscCall(MatDestroy(&pcbddc->benign_change)); 3671 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3672 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3673 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3674 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3675 if (pcbddc->benign_zerodiag_subs) { 3676 PetscInt i; 3677 for (i = 0; i < pcbddc->benign_n; i++) { PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); } 3678 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3679 } 3680 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3681 PetscFunctionReturn(0); 3682 } 3683 3684 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) { 3685 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3686 PC_IS *pcis = (PC_IS *)pc->data; 3687 VecType impVecType; 3688 PetscInt n_constraints, n_R, old_size; 3689 3690 PetscFunctionBegin; 3691 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3692 n_R = pcis->n - pcbddc->n_vertices; 3693 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3694 /* local work vectors (try to avoid unneeded work)*/ 3695 /* R nodes */ 3696 old_size = -1; 3697 if (pcbddc->vec1_R) { PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); } 3698 if (n_R != old_size) { 3699 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3700 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3701 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3702 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3703 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3704 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3705 } 3706 /* local primal dofs */ 3707 old_size = -1; 3708 if (pcbddc->vec1_P) { PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); } 3709 if (pcbddc->local_primal_size != old_size) { 3710 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3711 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3712 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3713 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3714 } 3715 /* local explicit constraints */ 3716 old_size = -1; 3717 if (pcbddc->vec1_C) { PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); } 3718 if (n_constraints && n_constraints != old_size) { 3719 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3720 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3721 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3722 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3723 } 3724 PetscFunctionReturn(0); 3725 } 3726 3727 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) { 3728 /* pointers to pcis and pcbddc */ 3729 PC_IS *pcis = (PC_IS *)pc->data; 3730 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3731 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3732 /* submatrices of local problem */ 3733 Mat A_RV, A_VR, A_VV, local_auxmat2_R; 3734 /* submatrices of local coarse problem */ 3735 Mat S_VV, S_CV, S_VC, S_CC; 3736 /* working matrices */ 3737 Mat C_CR; 3738 /* additional working stuff */ 3739 PC pc_R; 3740 Mat F, Brhs = NULL; 3741 Vec dummy_vec; 3742 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 3743 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3744 PetscScalar *work; 3745 PetscInt *idx_V_B; 3746 PetscInt lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I; 3747 PetscInt i, n_R, n_D, n_B; 3748 PetscScalar one = 1.0, m_one = -1.0; 3749 3750 PetscFunctionBegin; 3751 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 3752 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3753 3754 /* Set Non-overlapping dimensions */ 3755 n_vertices = pcbddc->n_vertices; 3756 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3757 n_B = pcis->n_B; 3758 n_D = pcis->n - n_B; 3759 n_R = pcis->n - n_vertices; 3760 3761 /* vertices in boundary numbering */ 3762 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 3763 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 3764 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 3765 3766 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3767 PetscCall(PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals)); 3768 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV)); 3769 PetscCall(MatDenseSetLDA(S_VV, pcbddc->local_primal_size)); 3770 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, coarse_submat_vals + n_vertices, &S_CV)); 3771 PetscCall(MatDenseSetLDA(S_CV, pcbddc->local_primal_size)); 3772 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, coarse_submat_vals + pcbddc->local_primal_size * n_vertices, &S_VC)); 3773 PetscCall(MatDenseSetLDA(S_VC, pcbddc->local_primal_size)); 3774 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, coarse_submat_vals + (pcbddc->local_primal_size + 1) * n_vertices, &S_CC)); 3775 PetscCall(MatDenseSetLDA(S_CC, pcbddc->local_primal_size)); 3776 3777 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3778 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 3779 PetscCall(PCSetUp(pc_R)); 3780 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 3781 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 3782 lda_rhs = n_R; 3783 need_benign_correction = PETSC_FALSE; 3784 if (isLU || isCHOL) { 3785 PetscCall(PCFactorGetMatrix(pc_R, &F)); 3786 } else if (sub_schurs && sub_schurs->reuse_solver) { 3787 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3788 MatFactorType type; 3789 3790 F = reuse_solver->F; 3791 PetscCall(MatGetFactorType(F, &type)); 3792 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3793 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3794 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 3795 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3796 } else F = NULL; 3797 3798 /* determine if we can use a sparse right-hand side */ 3799 sparserhs = PETSC_FALSE; 3800 if (F) { 3801 MatSolverType solver; 3802 3803 PetscCall(MatFactorGetSolverType(F, &solver)); 3804 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 3805 } 3806 3807 /* allocate workspace */ 3808 n = 0; 3809 if (n_constraints) { n += lda_rhs * n_constraints; } 3810 if (n_vertices) { 3811 n = PetscMax(2 * lda_rhs * n_vertices, n); 3812 n = PetscMax((lda_rhs + n_B) * n_vertices, n); 3813 } 3814 if (!pcbddc->symmetric_primal) { n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n); } 3815 PetscCall(PetscMalloc1(n, &work)); 3816 3817 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3818 dummy_vec = NULL; 3819 if (need_benign_correction && lda_rhs != n_R && F) { 3820 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 3821 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 3822 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 3823 } 3824 3825 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3826 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3827 3828 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3829 if (n_constraints) { 3830 Mat M3, C_B; 3831 IS is_aux; 3832 3833 /* Extract constraints on R nodes: C_{CR} */ 3834 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux)); 3835 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 3836 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 3837 3838 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3839 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3840 if (!sparserhs) { 3841 PetscCall(PetscArrayzero(work, lda_rhs * n_constraints)); 3842 for (i = 0; i < n_constraints; i++) { 3843 const PetscScalar *row_cmat_values; 3844 const PetscInt *row_cmat_indices; 3845 PetscInt size_of_constraint, j; 3846 3847 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3848 for (j = 0; j < size_of_constraint; j++) { work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j]; } 3849 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3850 } 3851 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs)); 3852 } else { 3853 Mat tC_CR; 3854 3855 PetscCall(MatScale(C_CR, -1.0)); 3856 if (lda_rhs != n_R) { 3857 PetscScalar *aa; 3858 PetscInt r, *ii, *jj; 3859 PetscBool done; 3860 3861 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3862 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 3863 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 3864 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 3865 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3866 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 3867 } else { 3868 PetscCall(PetscObjectReference((PetscObject)C_CR)); 3869 tC_CR = C_CR; 3870 } 3871 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 3872 PetscCall(MatDestroy(&tC_CR)); 3873 } 3874 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R)); 3875 if (F) { 3876 if (need_benign_correction) { 3877 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3878 3879 /* rhs is already zero on interior dofs, no need to change the rhs */ 3880 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 3881 } 3882 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 3883 if (need_benign_correction) { 3884 PetscScalar *marr; 3885 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3886 3887 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 3888 if (lda_rhs != n_R) { 3889 for (i = 0; i < n_constraints; i++) { 3890 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 3891 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 3892 PetscCall(VecResetArray(dummy_vec)); 3893 } 3894 } else { 3895 for (i = 0; i < n_constraints; i++) { 3896 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 3897 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 3898 PetscCall(VecResetArray(pcbddc->vec1_R)); 3899 } 3900 } 3901 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 3902 } 3903 } else { 3904 PetscScalar *marr; 3905 3906 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 3907 for (i = 0; i < n_constraints; i++) { 3908 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 3909 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 3910 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 3911 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 3912 PetscCall(VecResetArray(pcbddc->vec1_R)); 3913 PetscCall(VecResetArray(pcbddc->vec2_R)); 3914 } 3915 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 3916 } 3917 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 3918 PetscCall(MatDestroy(&Brhs)); 3919 if (!pcbddc->switch_static) { 3920 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2)); 3921 for (i = 0; i < n_constraints; i++) { 3922 Vec r, b; 3923 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 3924 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 3925 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 3926 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 3927 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 3928 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 3929 } 3930 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 3931 } else { 3932 if (lda_rhs != n_R) { 3933 IS dummy; 3934 3935 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy)); 3936 PetscCall(MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 3937 PetscCall(ISDestroy(&dummy)); 3938 } else { 3939 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 3940 pcbddc->local_auxmat2 = local_auxmat2_R; 3941 } 3942 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 3943 } 3944 PetscCall(ISDestroy(&is_aux)); 3945 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 3946 PetscCall(MatScale(M3, m_one)); 3947 if (isCHOL) { 3948 PetscCall(MatCholeskyFactor(M3, NULL, NULL)); 3949 } else { 3950 PetscCall(MatLUFactor(M3, NULL, NULL, NULL)); 3951 } 3952 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 3953 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3954 PetscCall(MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1)); 3955 PetscCall(MatDestroy(&C_B)); 3956 PetscCall(MatCopy(M3, S_CC, SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3957 PetscCall(MatDestroy(&M3)); 3958 } 3959 3960 /* Get submatrices from subdomain matrix */ 3961 if (n_vertices) { 3962 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 3963 PetscBool oldpin; 3964 #endif 3965 PetscBool isaij; 3966 IS is_aux; 3967 3968 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3969 IS tis; 3970 3971 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 3972 PetscCall(ISSort(tis)); 3973 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 3974 PetscCall(ISDestroy(&tis)); 3975 } else { 3976 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 3977 } 3978 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 3979 oldpin = pcbddc->local_mat->boundtocpu; 3980 #endif 3981 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 3982 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 3983 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 3984 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij)); 3985 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 3986 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 3987 } 3988 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 3989 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 3990 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 3991 #endif 3992 PetscCall(ISDestroy(&is_aux)); 3993 } 3994 3995 /* Matrix of coarse basis functions (local) */ 3996 if (pcbddc->coarse_phi_B) { 3997 PetscInt on_B, on_primal, on_D = n_D; 3998 if (pcbddc->coarse_phi_D) { PetscCall(MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL)); } 3999 PetscCall(MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal)); 4000 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4001 PetscScalar *marray; 4002 4003 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &marray)); 4004 PetscCall(PetscFree(marray)); 4005 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4006 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4007 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4008 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4009 } 4010 } 4011 4012 if (!pcbddc->coarse_phi_B) { 4013 PetscScalar *marr; 4014 4015 /* memory size */ 4016 n = n_B * pcbddc->local_primal_size; 4017 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size; 4018 if (!pcbddc->symmetric_primal) n *= 2; 4019 PetscCall(PetscCalloc1(n, &marr)); 4020 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B)); 4021 marr += n_B * pcbddc->local_primal_size; 4022 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4023 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D)); 4024 marr += n_D * pcbddc->local_primal_size; 4025 } 4026 if (!pcbddc->symmetric_primal) { 4027 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B)); 4028 marr += n_B * pcbddc->local_primal_size; 4029 if (pcbddc->switch_static || pcbddc->dbg_flag) { PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D)); } 4030 } else { 4031 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4032 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4033 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4034 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4035 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4036 } 4037 } 4038 } 4039 4040 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4041 p0_lidx_I = NULL; 4042 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4043 const PetscInt *idxs; 4044 4045 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4046 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4047 for (i = 0; i < pcbddc->benign_n; i++) { PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); } 4048 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4049 } 4050 4051 /* vertices */ 4052 if (n_vertices) { 4053 PetscBool restoreavr = PETSC_FALSE; 4054 4055 PetscCall(MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV)); 4056 4057 if (n_R) { 4058 Mat A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */ 4059 PetscBLASInt B_N, B_one = 1; 4060 const PetscScalar *x; 4061 PetscScalar *y; 4062 4063 PetscCall(MatScale(A_RV, m_one)); 4064 if (need_benign_correction) { 4065 ISLocalToGlobalMapping RtoN; 4066 IS is_p0; 4067 PetscInt *idxs_p0, n; 4068 4069 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4070 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4071 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4072 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4073 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4074 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4075 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4076 PetscCall(ISDestroy(&is_p0)); 4077 } 4078 4079 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV)); 4080 if (!sparserhs || need_benign_correction) { 4081 if (lda_rhs == n_R) { 4082 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4083 } else { 4084 PetscScalar *av, *array; 4085 const PetscInt *xadj, *adjncy; 4086 PetscInt n; 4087 PetscBool flg_row; 4088 4089 array = work + lda_rhs * n_vertices; 4090 PetscCall(PetscArrayzero(array, lda_rhs * n_vertices)); 4091 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4092 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4093 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4094 for (i = 0; i < n; i++) { 4095 PetscInt j; 4096 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j]; 4097 } 4098 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4099 PetscCall(MatDestroy(&A_RV)); 4100 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV)); 4101 } 4102 if (need_benign_correction) { 4103 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4104 PetscScalar *marr; 4105 4106 PetscCall(MatDenseGetArray(A_RV, &marr)); 4107 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4108 4109 | 0 0 0 | (V) 4110 L = | 0 0 -1 | (P-p0) 4111 | 0 0 -1 | (p0) 4112 4113 */ 4114 for (i = 0; i < reuse_solver->benign_n; i++) { 4115 const PetscScalar *vals; 4116 const PetscInt *idxs, *idxs_zero; 4117 PetscInt n, j, nz; 4118 4119 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4120 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4121 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4122 for (j = 0; j < n; j++) { 4123 PetscScalar val = vals[j]; 4124 PetscInt k, col = idxs[j]; 4125 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4126 } 4127 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4128 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4129 } 4130 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4131 } 4132 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4133 Brhs = A_RV; 4134 } else { 4135 Mat tA_RVT, A_RVT; 4136 4137 if (!pcbddc->symmetric_primal) { 4138 /* A_RV already scaled by -1 */ 4139 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4140 } else { 4141 restoreavr = PETSC_TRUE; 4142 PetscCall(MatScale(A_VR, -1.0)); 4143 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4144 A_RVT = A_VR; 4145 } 4146 if (lda_rhs != n_R) { 4147 PetscScalar *aa; 4148 PetscInt r, *ii, *jj; 4149 PetscBool done; 4150 4151 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4152 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4153 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4154 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4155 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4156 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4157 } else { 4158 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4159 tA_RVT = A_RVT; 4160 } 4161 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4162 PetscCall(MatDestroy(&tA_RVT)); 4163 PetscCall(MatDestroy(&A_RVT)); 4164 } 4165 if (F) { 4166 /* need to correct the rhs */ 4167 if (need_benign_correction) { 4168 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4169 PetscScalar *marr; 4170 4171 PetscCall(MatDenseGetArray(Brhs, &marr)); 4172 if (lda_rhs != n_R) { 4173 for (i = 0; i < n_vertices; i++) { 4174 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4175 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4176 PetscCall(VecResetArray(dummy_vec)); 4177 } 4178 } else { 4179 for (i = 0; i < n_vertices; i++) { 4180 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4181 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4182 PetscCall(VecResetArray(pcbddc->vec1_R)); 4183 } 4184 } 4185 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4186 } 4187 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4188 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4189 /* need to correct the solution */ 4190 if (need_benign_correction) { 4191 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4192 PetscScalar *marr; 4193 4194 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4195 if (lda_rhs != n_R) { 4196 for (i = 0; i < n_vertices; i++) { 4197 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4198 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4199 PetscCall(VecResetArray(dummy_vec)); 4200 } 4201 } else { 4202 for (i = 0; i < n_vertices; i++) { 4203 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4204 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4205 PetscCall(VecResetArray(pcbddc->vec1_R)); 4206 } 4207 } 4208 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4209 } 4210 } else { 4211 PetscCall(MatDenseGetArray(Brhs, &y)); 4212 for (i = 0; i < n_vertices; i++) { 4213 PetscCall(VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs)); 4214 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs)); 4215 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4216 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4217 PetscCall(VecResetArray(pcbddc->vec1_R)); 4218 PetscCall(VecResetArray(pcbddc->vec2_R)); 4219 } 4220 PetscCall(MatDenseRestoreArray(Brhs, &y)); 4221 } 4222 PetscCall(MatDestroy(&A_RV)); 4223 PetscCall(MatDestroy(&Brhs)); 4224 /* S_VV and S_CV */ 4225 if (n_constraints) { 4226 Mat B; 4227 4228 PetscCall(PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices)); 4229 for (i = 0; i < n_vertices; i++) { 4230 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 4231 PetscCall(VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B)); 4232 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4233 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4234 PetscCall(VecResetArray(pcis->vec1_B)); 4235 PetscCall(VecResetArray(pcbddc->vec1_R)); 4236 } 4237 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B)); 4238 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4239 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV)); 4240 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4241 PetscCall(MatProductSetFromOptions(S_CV)); 4242 PetscCall(MatProductSymbolic(S_CV)); 4243 PetscCall(MatProductNumeric(S_CV)); 4244 PetscCall(MatProductClear(S_CV)); 4245 4246 PetscCall(MatDestroy(&B)); 4247 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B)); 4248 /* Reuse B = local_auxmat2_R * S_CV */ 4249 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B)); 4250 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4251 PetscCall(MatProductSetFromOptions(B)); 4252 PetscCall(MatProductSymbolic(B)); 4253 PetscCall(MatProductNumeric(B)); 4254 4255 PetscCall(MatScale(S_CV, m_one)); 4256 PetscCall(PetscBLASIntCast(lda_rhs * n_vertices, &B_N)); 4257 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one)); 4258 PetscCall(MatDestroy(&B)); 4259 } 4260 if (lda_rhs != n_R) { 4261 PetscCall(MatDestroy(&A_RRmA_RV)); 4262 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV)); 4263 PetscCall(MatDenseSetLDA(A_RRmA_RV, lda_rhs)); 4264 } 4265 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt)); 4266 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4267 if (need_benign_correction) { 4268 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4269 PetscScalar *marr, *sums; 4270 4271 PetscCall(PetscMalloc1(n_vertices, &sums)); 4272 PetscCall(MatDenseGetArray(S_VVt, &marr)); 4273 for (i = 0; i < reuse_solver->benign_n; i++) { 4274 const PetscScalar *vals; 4275 const PetscInt *idxs, *idxs_zero; 4276 PetscInt n, j, nz; 4277 4278 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4279 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4280 for (j = 0; j < n_vertices; j++) { 4281 PetscInt k; 4282 sums[j] = 0.; 4283 for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs]; 4284 } 4285 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4286 for (j = 0; j < n; j++) { 4287 PetscScalar val = vals[j]; 4288 PetscInt k; 4289 for (k = 0; k < n_vertices; k++) { marr[idxs[j] + k * n_vertices] += val * sums[k]; } 4290 } 4291 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4292 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4293 } 4294 PetscCall(PetscFree(sums)); 4295 PetscCall(MatDenseRestoreArray(S_VVt, &marr)); 4296 PetscCall(MatDestroy(&A_RV_bcorr)); 4297 } 4298 PetscCall(MatDestroy(&A_RRmA_RV)); 4299 PetscCall(PetscBLASIntCast(n_vertices * n_vertices, &B_N)); 4300 PetscCall(MatDenseGetArrayRead(A_VV, &x)); 4301 PetscCall(MatDenseGetArray(S_VVt, &y)); 4302 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one)); 4303 PetscCall(MatDenseRestoreArrayRead(A_VV, &x)); 4304 PetscCall(MatDenseRestoreArray(S_VVt, &y)); 4305 PetscCall(MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN)); 4306 PetscCall(MatDestroy(&S_VVt)); 4307 } else { 4308 PetscCall(MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN)); 4309 } 4310 PetscCall(MatDestroy(&A_VV)); 4311 4312 /* coarse basis functions */ 4313 for (i = 0; i < n_vertices; i++) { 4314 Vec v; 4315 PetscScalar one = 1.0, zero = 0.0; 4316 4317 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4318 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v)); 4319 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4320 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4321 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4322 PetscMPIInt rank; 4323 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank)); 4324 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4325 } 4326 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4327 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4328 PetscCall(VecAssemblyEnd(v)); 4329 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v)); 4330 4331 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4332 PetscInt j; 4333 4334 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v)); 4335 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4336 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4337 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4338 PetscMPIInt rank; 4339 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank)); 4340 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4341 } 4342 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4343 PetscCall(VecAssemblyBegin(v)); 4344 PetscCall(VecAssemblyEnd(v)); 4345 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v)); 4346 } 4347 PetscCall(VecResetArray(pcbddc->vec1_R)); 4348 } 4349 /* if n_R == 0 the object is not destroyed */ 4350 PetscCall(MatDestroy(&A_RV)); 4351 } 4352 PetscCall(VecDestroy(&dummy_vec)); 4353 4354 if (n_constraints) { 4355 Mat B; 4356 4357 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B)); 4358 PetscCall(MatScale(S_CC, m_one)); 4359 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B)); 4360 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4361 PetscCall(MatProductSetFromOptions(B)); 4362 PetscCall(MatProductSymbolic(B)); 4363 PetscCall(MatProductNumeric(B)); 4364 4365 PetscCall(MatScale(S_CC, m_one)); 4366 if (n_vertices) { 4367 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4368 PetscCall(MatTransposeSetPrecursor(S_CV, S_VC)); 4369 PetscCall(MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC)); 4370 } else { 4371 Mat S_VCt; 4372 4373 if (lda_rhs != n_R) { 4374 PetscCall(MatDestroy(&B)); 4375 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B)); 4376 PetscCall(MatDenseSetLDA(B, lda_rhs)); 4377 } 4378 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt)); 4379 PetscCall(MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN)); 4380 PetscCall(MatDestroy(&S_VCt)); 4381 } 4382 } 4383 PetscCall(MatDestroy(&B)); 4384 /* coarse basis functions */ 4385 for (i = 0; i < n_constraints; i++) { 4386 Vec v; 4387 4388 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4389 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4390 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4391 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4392 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4393 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4394 PetscInt j; 4395 PetscScalar zero = 0.0; 4396 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4397 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4398 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4399 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4400 PetscCall(VecAssemblyBegin(v)); 4401 PetscCall(VecAssemblyEnd(v)); 4402 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4403 } 4404 PetscCall(VecResetArray(pcbddc->vec1_R)); 4405 } 4406 } 4407 if (n_constraints) { PetscCall(MatDestroy(&local_auxmat2_R)); } 4408 PetscCall(PetscFree(p0_lidx_I)); 4409 4410 /* coarse matrix entries relative to B_0 */ 4411 if (pcbddc->benign_n) { 4412 Mat B0_B, B0_BPHI; 4413 IS is_dummy; 4414 const PetscScalar *data; 4415 PetscInt j; 4416 4417 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4418 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4419 PetscCall(ISDestroy(&is_dummy)); 4420 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4421 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4422 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 4423 for (j = 0; j < pcbddc->benign_n; j++) { 4424 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4425 for (i = 0; i < pcbddc->local_primal_size; i++) { 4426 coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j]; 4427 coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j]; 4428 } 4429 } 4430 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 4431 PetscCall(MatDestroy(&B0_B)); 4432 PetscCall(MatDestroy(&B0_BPHI)); 4433 } 4434 4435 /* compute other basis functions for non-symmetric problems */ 4436 if (!pcbddc->symmetric_primal) { 4437 Mat B_V = NULL, B_C = NULL; 4438 PetscScalar *marray; 4439 4440 if (n_constraints) { 4441 Mat S_CCT, C_CRT; 4442 4443 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 4444 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 4445 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C)); 4446 PetscCall(MatDestroy(&S_CCT)); 4447 if (n_vertices) { 4448 Mat S_VCT; 4449 4450 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 4451 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V)); 4452 PetscCall(MatDestroy(&S_VCT)); 4453 } 4454 PetscCall(MatDestroy(&C_CRT)); 4455 } else { 4456 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 4457 } 4458 if (n_vertices && n_R) { 4459 PetscScalar *av, *marray; 4460 const PetscInt *xadj, *adjncy; 4461 PetscInt n; 4462 PetscBool flg_row; 4463 4464 /* B_V = B_V - A_VR^T */ 4465 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4466 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4467 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 4468 PetscCall(MatDenseGetArray(B_V, &marray)); 4469 for (i = 0; i < n; i++) { 4470 PetscInt j; 4471 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 4472 } 4473 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4474 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4475 PetscCall(MatDestroy(&A_VR)); 4476 } 4477 4478 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4479 if (n_vertices) { 4480 PetscCall(MatDenseGetArray(B_V, &marray)); 4481 for (i = 0; i < n_vertices; i++) { 4482 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 4483 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4484 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4485 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4486 PetscCall(VecResetArray(pcbddc->vec1_R)); 4487 PetscCall(VecResetArray(pcbddc->vec2_R)); 4488 } 4489 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4490 } 4491 if (B_C) { 4492 PetscCall(MatDenseGetArray(B_C, &marray)); 4493 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 4494 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 4495 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4496 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4497 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4498 PetscCall(VecResetArray(pcbddc->vec1_R)); 4499 PetscCall(VecResetArray(pcbddc->vec2_R)); 4500 } 4501 PetscCall(MatDenseRestoreArray(B_C, &marray)); 4502 } 4503 /* coarse basis functions */ 4504 for (i = 0; i < pcbddc->local_primal_size; i++) { 4505 Vec v; 4506 4507 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 4508 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 4509 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4510 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4511 if (i < n_vertices) { 4512 PetscScalar one = 1.0; 4513 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4514 PetscCall(VecAssemblyBegin(v)); 4515 PetscCall(VecAssemblyEnd(v)); 4516 } 4517 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 4518 4519 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4520 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 4521 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4522 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4523 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 4524 } 4525 PetscCall(VecResetArray(pcbddc->vec1_R)); 4526 } 4527 PetscCall(MatDestroy(&B_V)); 4528 PetscCall(MatDestroy(&B_C)); 4529 } 4530 4531 /* free memory */ 4532 PetscCall(PetscFree(idx_V_B)); 4533 PetscCall(MatDestroy(&S_VV)); 4534 PetscCall(MatDestroy(&S_CV)); 4535 PetscCall(MatDestroy(&S_VC)); 4536 PetscCall(MatDestroy(&S_CC)); 4537 PetscCall(PetscFree(work)); 4538 if (n_vertices) { PetscCall(MatDestroy(&A_VR)); } 4539 if (n_constraints) { PetscCall(MatDestroy(&C_CR)); } 4540 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4541 4542 /* Checking coarse_sub_mat and coarse basis functios */ 4543 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4544 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4545 if (pcbddc->dbg_flag) { 4546 Mat coarse_sub_mat; 4547 Mat AUXMAT, TM1, TM2, TM3, TM4; 4548 Mat coarse_phi_D, coarse_phi_B; 4549 Mat coarse_psi_D, coarse_psi_B; 4550 Mat A_II, A_BB, A_IB, A_BI; 4551 Mat C_B, CPHI; 4552 IS is_dummy; 4553 Vec mones; 4554 MatType checkmattype = MATSEQAIJ; 4555 PetscReal real_value; 4556 4557 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4558 Mat A; 4559 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 4560 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 4561 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 4562 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 4563 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 4564 PetscCall(MatDestroy(&A)); 4565 } else { 4566 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 4567 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 4568 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 4569 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 4570 } 4571 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 4572 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 4573 if (!pcbddc->symmetric_primal) { 4574 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 4575 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 4576 } 4577 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat)); 4578 4579 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 4580 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 4581 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4582 if (!pcbddc->symmetric_primal) { 4583 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4584 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4585 PetscCall(MatDestroy(&AUXMAT)); 4586 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4587 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4588 PetscCall(MatDestroy(&AUXMAT)); 4589 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4590 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4591 PetscCall(MatDestroy(&AUXMAT)); 4592 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4593 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4594 PetscCall(MatDestroy(&AUXMAT)); 4595 } else { 4596 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4597 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4598 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4599 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4600 PetscCall(MatDestroy(&AUXMAT)); 4601 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4602 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4603 PetscCall(MatDestroy(&AUXMAT)); 4604 } 4605 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 4606 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 4607 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 4608 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 4609 if (pcbddc->benign_n) { 4610 Mat B0_B, B0_BPHI; 4611 const PetscScalar *data2; 4612 PetscScalar *data; 4613 PetscInt j; 4614 4615 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4616 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4617 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4618 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4619 PetscCall(MatDenseGetArray(TM1, &data)); 4620 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 4621 for (j = 0; j < pcbddc->benign_n; j++) { 4622 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4623 for (i = 0; i < pcbddc->local_primal_size; i++) { 4624 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 4625 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 4626 } 4627 } 4628 PetscCall(MatDenseRestoreArray(TM1, &data)); 4629 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 4630 PetscCall(MatDestroy(&B0_B)); 4631 PetscCall(ISDestroy(&is_dummy)); 4632 PetscCall(MatDestroy(&B0_BPHI)); 4633 } 4634 #if 0 4635 { 4636 PetscViewer viewer; 4637 char filename[256]; 4638 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4639 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4640 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4641 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4642 PetscCall(MatView(coarse_sub_mat,viewer)); 4643 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4644 PetscCall(MatView(TM1,viewer)); 4645 if (pcbddc->coarse_phi_B) { 4646 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4647 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4648 } 4649 if (pcbddc->coarse_phi_D) { 4650 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4651 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4652 } 4653 if (pcbddc->coarse_psi_B) { 4654 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4655 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4656 } 4657 if (pcbddc->coarse_psi_D) { 4658 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4659 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4660 } 4661 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4662 PetscCall(MatView(pcbddc->local_mat,viewer)); 4663 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4664 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4665 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4666 PetscCall(ISView(pcis->is_I_local,viewer)); 4667 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4668 PetscCall(ISView(pcis->is_B_local,viewer)); 4669 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4670 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4671 PetscCall(PetscViewerDestroy(&viewer)); 4672 } 4673 #endif 4674 PetscCall(MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN)); 4675 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 4676 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4677 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4678 4679 /* check constraints */ 4680 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 4681 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4682 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4683 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4684 } else { 4685 PetscScalar *data; 4686 Mat tmat; 4687 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 4688 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 4689 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 4690 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4691 PetscCall(MatDestroy(&tmat)); 4692 } 4693 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 4694 PetscCall(VecSet(mones, -1.0)); 4695 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4696 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4697 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4698 if (!pcbddc->symmetric_primal) { 4699 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 4700 PetscCall(VecSet(mones, -1.0)); 4701 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4702 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4703 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4704 } 4705 PetscCall(MatDestroy(&C_B)); 4706 PetscCall(MatDestroy(&CPHI)); 4707 PetscCall(ISDestroy(&is_dummy)); 4708 PetscCall(VecDestroy(&mones)); 4709 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4710 PetscCall(MatDestroy(&A_II)); 4711 PetscCall(MatDestroy(&A_BB)); 4712 PetscCall(MatDestroy(&A_IB)); 4713 PetscCall(MatDestroy(&A_BI)); 4714 PetscCall(MatDestroy(&TM1)); 4715 PetscCall(MatDestroy(&TM2)); 4716 PetscCall(MatDestroy(&TM3)); 4717 PetscCall(MatDestroy(&TM4)); 4718 PetscCall(MatDestroy(&coarse_phi_D)); 4719 PetscCall(MatDestroy(&coarse_phi_B)); 4720 if (!pcbddc->symmetric_primal) { 4721 PetscCall(MatDestroy(&coarse_psi_D)); 4722 PetscCall(MatDestroy(&coarse_psi_B)); 4723 } 4724 PetscCall(MatDestroy(&coarse_sub_mat)); 4725 } 4726 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4727 { 4728 PetscBool gpu; 4729 4730 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu)); 4731 if (gpu) { 4732 if (pcbddc->local_auxmat1) { PetscCall(MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); } 4733 if (pcbddc->local_auxmat2) { PetscCall(MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); } 4734 if (pcbddc->coarse_phi_B) { PetscCall(MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); } 4735 if (pcbddc->coarse_phi_D) { PetscCall(MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); } 4736 if (pcbddc->coarse_psi_B) { PetscCall(MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); } 4737 if (pcbddc->coarse_psi_D) { PetscCall(MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); } 4738 } 4739 } 4740 /* get back data */ 4741 *coarse_submat_vals_n = coarse_submat_vals; 4742 PetscFunctionReturn(0); 4743 } 4744 4745 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) { 4746 Mat *work_mat; 4747 IS isrow_s, iscol_s; 4748 PetscBool rsorted, csorted; 4749 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 4750 4751 PetscFunctionBegin; 4752 PetscCall(ISSorted(isrow, &rsorted)); 4753 PetscCall(ISSorted(iscol, &csorted)); 4754 PetscCall(ISGetLocalSize(isrow, &rsize)); 4755 PetscCall(ISGetLocalSize(iscol, &csize)); 4756 4757 if (!rsorted) { 4758 const PetscInt *idxs; 4759 PetscInt *idxs_sorted, i; 4760 4761 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 4762 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 4763 for (i = 0; i < rsize; i++) { idxs_perm_r[i] = i; } 4764 PetscCall(ISGetIndices(isrow, &idxs)); 4765 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 4766 for (i = 0; i < rsize; i++) { idxs_sorted[i] = idxs[idxs_perm_r[i]]; } 4767 PetscCall(ISRestoreIndices(isrow, &idxs)); 4768 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 4769 } else { 4770 PetscCall(PetscObjectReference((PetscObject)isrow)); 4771 isrow_s = isrow; 4772 } 4773 4774 if (!csorted) { 4775 if (isrow == iscol) { 4776 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4777 iscol_s = isrow_s; 4778 } else { 4779 const PetscInt *idxs; 4780 PetscInt *idxs_sorted, i; 4781 4782 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 4783 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 4784 for (i = 0; i < csize; i++) { idxs_perm_c[i] = i; } 4785 PetscCall(ISGetIndices(iscol, &idxs)); 4786 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 4787 for (i = 0; i < csize; i++) { idxs_sorted[i] = idxs[idxs_perm_c[i]]; } 4788 PetscCall(ISRestoreIndices(iscol, &idxs)); 4789 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 4790 } 4791 } else { 4792 PetscCall(PetscObjectReference((PetscObject)iscol)); 4793 iscol_s = iscol; 4794 } 4795 4796 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 4797 4798 if (!rsorted || !csorted) { 4799 Mat new_mat; 4800 IS is_perm_r, is_perm_c; 4801 4802 if (!rsorted) { 4803 PetscInt *idxs_r, i; 4804 PetscCall(PetscMalloc1(rsize, &idxs_r)); 4805 for (i = 0; i < rsize; i++) { idxs_r[idxs_perm_r[i]] = i; } 4806 PetscCall(PetscFree(idxs_perm_r)); 4807 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 4808 } else { 4809 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 4810 } 4811 PetscCall(ISSetPermutation(is_perm_r)); 4812 4813 if (!csorted) { 4814 if (isrow_s == iscol_s) { 4815 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 4816 is_perm_c = is_perm_r; 4817 } else { 4818 PetscInt *idxs_c, i; 4819 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 4820 PetscCall(PetscMalloc1(csize, &idxs_c)); 4821 for (i = 0; i < csize; i++) { idxs_c[idxs_perm_c[i]] = i; } 4822 PetscCall(PetscFree(idxs_perm_c)); 4823 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 4824 } 4825 } else { 4826 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 4827 } 4828 PetscCall(ISSetPermutation(is_perm_c)); 4829 4830 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 4831 PetscCall(MatDestroy(&work_mat[0])); 4832 work_mat[0] = new_mat; 4833 PetscCall(ISDestroy(&is_perm_r)); 4834 PetscCall(ISDestroy(&is_perm_c)); 4835 } 4836 4837 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 4838 *B = work_mat[0]; 4839 PetscCall(MatDestroyMatrices(1, &work_mat)); 4840 PetscCall(ISDestroy(&isrow_s)); 4841 PetscCall(ISDestroy(&iscol_s)); 4842 PetscFunctionReturn(0); 4843 } 4844 4845 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) { 4846 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 4847 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4848 Mat new_mat, lA; 4849 IS is_local, is_global; 4850 PetscInt local_size; 4851 PetscBool isseqaij, issym, isset; 4852 4853 PetscFunctionBegin; 4854 PetscCall(MatDestroy(&pcbddc->local_mat)); 4855 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 4856 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 4857 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 4858 PetscCall(ISDestroy(&is_local)); 4859 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 4860 PetscCall(ISDestroy(&is_global)); 4861 4862 if (pcbddc->dbg_flag) { 4863 Vec x, x_change; 4864 PetscReal error; 4865 4866 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 4867 PetscCall(VecSetRandom(x, NULL)); 4868 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 4869 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4870 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4871 PetscCall(MatMult(new_mat, matis->x, matis->y)); 4872 if (!pcbddc->change_interior) { 4873 const PetscScalar *x, *y, *v; 4874 PetscReal lerror = 0.; 4875 PetscInt i; 4876 4877 PetscCall(VecGetArrayRead(matis->x, &x)); 4878 PetscCall(VecGetArrayRead(matis->y, &y)); 4879 PetscCall(VecGetArrayRead(matis->counter, &v)); 4880 for (i = 0; i < local_size; i++) 4881 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 4882 PetscCall(VecRestoreArrayRead(matis->x, &x)); 4883 PetscCall(VecRestoreArrayRead(matis->y, &y)); 4884 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 4885 PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 4886 if (error > PETSC_SMALL) { 4887 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4888 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 4889 } else { 4890 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 4891 } 4892 } 4893 } 4894 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4895 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4896 PetscCall(VecAXPY(x, -1.0, x_change)); 4897 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 4898 if (error > PETSC_SMALL) { 4899 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4900 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 4901 } else { 4902 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 4903 } 4904 } 4905 PetscCall(VecDestroy(&x)); 4906 PetscCall(VecDestroy(&x_change)); 4907 } 4908 4909 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4910 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 4911 4912 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4913 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 4914 if (isseqaij) { 4915 PetscCall(MatDestroy(&pcbddc->local_mat)); 4916 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 4917 if (lA) { 4918 Mat work; 4919 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 4920 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 4921 PetscCall(MatDestroy(&work)); 4922 } 4923 } else { 4924 Mat work_mat; 4925 4926 PetscCall(MatDestroy(&pcbddc->local_mat)); 4927 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 4928 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 4929 PetscCall(MatDestroy(&work_mat)); 4930 if (lA) { 4931 Mat work; 4932 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 4933 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 4934 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 4935 PetscCall(MatDestroy(&work)); 4936 } 4937 } 4938 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 4939 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 4940 PetscCall(MatDestroy(&new_mat)); 4941 PetscFunctionReturn(0); 4942 } 4943 4944 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) { 4945 PC_IS *pcis = (PC_IS *)(pc->data); 4946 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4947 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4948 PetscInt *idx_R_local = NULL; 4949 PetscInt n_vertices, i, j, n_R, n_D, n_B; 4950 PetscInt vbs, bs; 4951 PetscBT bitmask = NULL; 4952 4953 PetscFunctionBegin; 4954 /* 4955 No need to setup local scatters if 4956 - primal space is unchanged 4957 AND 4958 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4959 AND 4960 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4961 */ 4962 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { PetscFunctionReturn(0); } 4963 /* destroy old objects */ 4964 PetscCall(ISDestroy(&pcbddc->is_R_local)); 4965 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 4966 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 4967 /* Set Non-overlapping dimensions */ 4968 n_B = pcis->n_B; 4969 n_D = pcis->n - n_B; 4970 n_vertices = pcbddc->n_vertices; 4971 4972 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4973 4974 /* create auxiliary bitmask and allocate workspace */ 4975 if (!sub_schurs || !sub_schurs->reuse_solver) { 4976 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 4977 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 4978 for (i = 0; i < n_vertices; i++) { PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); } 4979 4980 for (i = 0, n_R = 0; i < pcis->n; i++) { 4981 if (!PetscBTLookup(bitmask, i)) { idx_R_local[n_R++] = i; } 4982 } 4983 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4984 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4985 4986 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 4987 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 4988 } 4989 4990 /* Block code */ 4991 vbs = 1; 4992 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 4993 if (bs > 1 && !(n_vertices % bs)) { 4994 PetscBool is_blocked = PETSC_TRUE; 4995 PetscInt *vary; 4996 if (!sub_schurs || !sub_schurs->reuse_solver) { 4997 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 4998 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 4999 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5000 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5001 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5002 for (i = 0; i < pcis->n / bs; i++) { 5003 if (vary[i] != 0 && vary[i] != bs) { 5004 is_blocked = PETSC_FALSE; 5005 break; 5006 } 5007 } 5008 PetscCall(PetscFree(vary)); 5009 } else { 5010 /* Verify directly the R set */ 5011 for (i = 0; i < n_R / bs; i++) { 5012 PetscInt j, node = idx_R_local[bs * i]; 5013 for (j = 1; j < bs; j++) { 5014 if (node != idx_R_local[bs * i + j] - j) { 5015 is_blocked = PETSC_FALSE; 5016 break; 5017 } 5018 } 5019 } 5020 } 5021 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5022 vbs = bs; 5023 for (i = 0; i < n_R / vbs; i++) { idx_R_local[i] = idx_R_local[vbs * i] / vbs; } 5024 } 5025 } 5026 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5027 if (sub_schurs && sub_schurs->reuse_solver) { 5028 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5029 5030 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5031 PetscCall(ISDestroy(&reuse_solver->is_R)); 5032 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5033 reuse_solver->is_R = pcbddc->is_R_local; 5034 } else { 5035 PetscCall(PetscFree(idx_R_local)); 5036 } 5037 5038 /* print some info if requested */ 5039 if (pcbddc->dbg_flag) { 5040 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5041 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5042 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5043 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5044 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5045 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5046 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5047 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5048 } 5049 5050 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5051 if (!sub_schurs || !sub_schurs->reuse_solver) { 5052 IS is_aux1, is_aux2; 5053 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5054 5055 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5056 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5057 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5058 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5059 for (i = 0; i < n_D; i++) { PetscCall(PetscBTSet(bitmask, is_indices[i])); } 5060 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5061 for (i = 0, j = 0; i < n_R; i++) { 5062 if (!PetscBTLookup(bitmask, idx_R_local[i])) { aux_array1[j++] = i; } 5063 } 5064 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5065 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5066 for (i = 0, j = 0; i < n_B; i++) { 5067 if (!PetscBTLookup(bitmask, is_indices[i])) { aux_array2[j++] = i; } 5068 } 5069 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5070 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5071 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5072 PetscCall(ISDestroy(&is_aux1)); 5073 PetscCall(ISDestroy(&is_aux2)); 5074 5075 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5076 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5077 for (i = 0, j = 0; i < n_R; i++) { 5078 if (PetscBTLookup(bitmask, idx_R_local[i])) { aux_array1[j++] = i; } 5079 } 5080 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5081 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5082 PetscCall(ISDestroy(&is_aux1)); 5083 } 5084 PetscCall(PetscBTDestroy(&bitmask)); 5085 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5086 } else { 5087 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5088 IS tis; 5089 PetscInt schur_size; 5090 5091 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5092 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5093 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5094 PetscCall(ISDestroy(&tis)); 5095 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5096 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5097 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5098 PetscCall(ISDestroy(&tis)); 5099 } 5100 } 5101 PetscFunctionReturn(0); 5102 } 5103 5104 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) { 5105 MatNullSpace NullSpace; 5106 Mat dmat; 5107 const Vec *nullvecs; 5108 Vec v, v2, *nullvecs2; 5109 VecScatter sct = NULL; 5110 PetscContainer c; 5111 PetscScalar *ddata; 5112 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5113 PetscBool nnsp_has_cnst; 5114 5115 PetscFunctionBegin; 5116 if (!is && !B) { /* MATIS */ 5117 Mat_IS *matis = (Mat_IS *)A->data; 5118 5119 if (!B) { PetscCall(MatISGetLocalMat(A, &B)); } 5120 sct = matis->cctx; 5121 PetscCall(PetscObjectReference((PetscObject)sct)); 5122 } else { 5123 PetscCall(MatGetNullSpace(B, &NullSpace)); 5124 if (!NullSpace) { PetscCall(MatGetNearNullSpace(B, &NullSpace)); } 5125 if (NullSpace) PetscFunctionReturn(0); 5126 } 5127 PetscCall(MatGetNullSpace(A, &NullSpace)); 5128 if (!NullSpace) { PetscCall(MatGetNearNullSpace(A, &NullSpace)); } 5129 if (!NullSpace) PetscFunctionReturn(0); 5130 5131 PetscCall(MatCreateVecs(A, &v, NULL)); 5132 PetscCall(MatCreateVecs(B, &v2, NULL)); 5133 if (!sct) { PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); } 5134 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs)); 5135 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5136 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5137 PetscCall(VecGetBlockSize(v2, &bs)); 5138 PetscCall(VecGetSize(v2, &N)); 5139 PetscCall(VecGetLocalSize(v2, &n)); 5140 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5141 for (k = 0; k < nnsp_size; k++) { 5142 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5143 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5144 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5145 } 5146 if (nnsp_has_cnst) { 5147 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5148 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5149 } 5150 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5151 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5152 5153 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5154 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c)); 5155 PetscCall(PetscContainerSetPointer(c, ddata)); 5156 PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault)); 5157 PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c)); 5158 PetscCall(PetscContainerDestroy(&c)); 5159 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5160 PetscCall(MatDestroy(&dmat)); 5161 5162 for (k = 0; k < bsiz; k++) { PetscCall(VecDestroy(&nullvecs2[k])); } 5163 PetscCall(PetscFree(nullvecs2)); 5164 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5165 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5166 PetscCall(VecDestroy(&v)); 5167 PetscCall(VecDestroy(&v2)); 5168 PetscCall(VecScatterDestroy(&sct)); 5169 PetscFunctionReturn(0); 5170 } 5171 5172 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) { 5173 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5174 PC_IS *pcis = (PC_IS *)pc->data; 5175 PC pc_temp; 5176 Mat A_RR; 5177 MatNullSpace nnsp; 5178 MatReuse reuse; 5179 PetscScalar m_one = -1.0; 5180 PetscReal value; 5181 PetscInt n_D, n_R; 5182 PetscBool issbaij, opts, isset, issym; 5183 void (*f)(void) = NULL; 5184 char dir_prefix[256], neu_prefix[256], str_level[16]; 5185 size_t len; 5186 5187 PetscFunctionBegin; 5188 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5189 /* approximate solver, propagate NearNullSpace if needed */ 5190 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5191 MatNullSpace gnnsp1, gnnsp2; 5192 PetscBool lhas, ghas; 5193 5194 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5195 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5196 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5197 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5198 PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5199 if (!ghas && (gnnsp1 || gnnsp2)) { PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); } 5200 } 5201 5202 /* compute prefixes */ 5203 PetscCall(PetscStrcpy(dir_prefix, "")); 5204 PetscCall(PetscStrcpy(neu_prefix, "")); 5205 if (!pcbddc->current_level) { 5206 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5207 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5208 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5209 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5210 } else { 5211 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level))); 5212 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5213 len -= 15; /* remove "pc_bddc_coarse_" */ 5214 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5215 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5216 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5217 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5218 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5219 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5220 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5221 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5222 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5223 } 5224 5225 /* DIRICHLET PROBLEM */ 5226 if (dirichlet) { 5227 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5228 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5229 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5230 if (pcbddc->dbg_flag) { 5231 Mat A_IIn; 5232 5233 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5234 PetscCall(MatDestroy(&pcis->A_II)); 5235 pcis->A_II = A_IIn; 5236 } 5237 } 5238 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5239 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5240 5241 /* Matrix for Dirichlet problem is pcis->A_II */ 5242 n_D = pcis->n - pcis->n_B; 5243 opts = PETSC_FALSE; 5244 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5245 opts = PETSC_TRUE; 5246 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5247 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5248 /* default */ 5249 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5250 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5251 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5252 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5253 if (issbaij) { 5254 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5255 } else { 5256 PetscCall(PCSetType(pc_temp, PCLU)); 5257 } 5258 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5259 } 5260 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5261 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5262 /* Allow user's customization */ 5263 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5264 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5265 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5266 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5267 } 5268 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5269 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5270 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5271 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5272 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5273 const PetscInt *idxs; 5274 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5275 5276 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5277 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5278 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5279 for (i = 0; i < nl; i++) { 5280 for (d = 0; d < cdim; d++) { scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; } 5281 } 5282 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5283 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5284 PetscCall(PetscFree(scoords)); 5285 } 5286 if (sub_schurs && sub_schurs->reuse_solver) { 5287 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5288 5289 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5290 } 5291 5292 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5293 if (!n_D) { 5294 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5295 PetscCall(PCSetType(pc_temp, PCNONE)); 5296 } 5297 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5298 /* set ksp_D into pcis data */ 5299 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5300 PetscCall(KSPDestroy(&pcis->ksp_D)); 5301 pcis->ksp_D = pcbddc->ksp_D; 5302 } 5303 5304 /* NEUMANN PROBLEM */ 5305 A_RR = NULL; 5306 if (neumann) { 5307 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5308 PetscInt ibs, mbs; 5309 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5310 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5311 5312 reuse_neumann_solver = PETSC_FALSE; 5313 if (sub_schurs && sub_schurs->reuse_solver) { 5314 IS iP; 5315 5316 reuse_neumann_solver = PETSC_TRUE; 5317 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5318 if (iP) reuse_neumann_solver = PETSC_FALSE; 5319 } 5320 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5321 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5322 if (pcbddc->ksp_R) { /* already created ksp */ 5323 PetscInt nn_R; 5324 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5325 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5326 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5327 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5328 PetscCall(KSPReset(pcbddc->ksp_R)); 5329 PetscCall(MatDestroy(&A_RR)); 5330 reuse = MAT_INITIAL_MATRIX; 5331 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5332 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5333 PetscCall(MatDestroy(&A_RR)); 5334 reuse = MAT_INITIAL_MATRIX; 5335 } else { /* safe to reuse the matrix */ 5336 reuse = MAT_REUSE_MATRIX; 5337 } 5338 } 5339 /* last check */ 5340 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5341 PetscCall(MatDestroy(&A_RR)); 5342 reuse = MAT_INITIAL_MATRIX; 5343 } 5344 } else { /* first time, so we need to create the matrix */ 5345 reuse = MAT_INITIAL_MATRIX; 5346 } 5347 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5348 TODO: Get Rid of these conversions */ 5349 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5350 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5351 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5352 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5353 if (matis->A == pcbddc->local_mat) { 5354 PetscCall(MatDestroy(&pcbddc->local_mat)); 5355 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5356 } else { 5357 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5358 } 5359 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5360 if (matis->A == pcbddc->local_mat) { 5361 PetscCall(MatDestroy(&pcbddc->local_mat)); 5362 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5363 } else { 5364 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5365 } 5366 } 5367 /* extract A_RR */ 5368 if (reuse_neumann_solver) { 5369 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5370 5371 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5372 PetscCall(MatDestroy(&A_RR)); 5373 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5374 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 5375 } else { 5376 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 5377 } 5378 } else { 5379 PetscCall(MatDestroy(&A_RR)); 5380 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 5381 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5382 } 5383 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5384 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 5385 } 5386 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5387 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 5388 opts = PETSC_FALSE; 5389 if (!pcbddc->ksp_R) { /* create object if not present */ 5390 opts = PETSC_TRUE; 5391 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 5392 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 5393 /* default */ 5394 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 5395 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 5396 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5397 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 5398 if (issbaij) { 5399 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5400 } else { 5401 PetscCall(PCSetType(pc_temp, PCLU)); 5402 } 5403 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 5404 } 5405 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 5406 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 5407 if (opts) { /* Allow user's customization once */ 5408 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5409 } 5410 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5411 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5412 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 5413 } 5414 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5415 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5416 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5417 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5418 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5419 const PetscInt *idxs; 5420 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5421 5422 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 5423 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 5424 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5425 for (i = 0; i < nl; i++) { 5426 for (d = 0; d < cdim; d++) { scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; } 5427 } 5428 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 5429 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5430 PetscCall(PetscFree(scoords)); 5431 } 5432 5433 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5434 if (!n_R) { 5435 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5436 PetscCall(PCSetType(pc_temp, PCNONE)); 5437 } 5438 /* Reuse solver if it is present */ 5439 if (reuse_neumann_solver) { 5440 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5441 5442 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 5443 } 5444 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5445 } 5446 5447 if (pcbddc->dbg_flag) { 5448 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5449 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5450 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5451 } 5452 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5453 5454 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5455 if (pcbddc->NullSpace_corr[0]) { PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); } 5456 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); } 5457 if (neumann && pcbddc->NullSpace_corr[2]) { PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); } 5458 /* check Dirichlet and Neumann solvers */ 5459 if (pcbddc->dbg_flag) { 5460 if (dirichlet) { /* Dirichlet */ 5461 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 5462 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 5463 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 5464 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 5465 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 5466 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 5467 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_D))->prefix, (double)value)); 5468 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5469 } 5470 if (neumann) { /* Neumann */ 5471 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 5472 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 5473 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 5474 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5475 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 5476 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 5477 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_R))->prefix, (double)value)); 5478 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5479 } 5480 } 5481 /* free Neumann problem's matrix */ 5482 PetscCall(MatDestroy(&A_RR)); 5483 PetscFunctionReturn(0); 5484 } 5485 5486 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) { 5487 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5488 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5489 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5490 5491 PetscFunctionBegin; 5492 if (!reuse_solver) { PetscCall(VecSet(pcbddc->vec1_R, 0.)); } 5493 if (!pcbddc->switch_static) { 5494 if (applytranspose && pcbddc->local_auxmat1) { 5495 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 5496 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5497 } 5498 if (!reuse_solver) { 5499 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5500 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5501 } else { 5502 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5503 5504 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5505 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5506 } 5507 } else { 5508 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5509 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5510 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5511 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5512 if (applytranspose && pcbddc->local_auxmat1) { 5513 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 5514 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5515 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5516 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5517 } 5518 } 5519 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5520 if (!reuse_solver || pcbddc->switch_static) { 5521 if (applytranspose) { 5522 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5523 } else { 5524 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5525 } 5526 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 5527 } else { 5528 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5529 5530 if (applytranspose) { 5531 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5532 } else { 5533 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5534 } 5535 } 5536 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5537 PetscCall(VecSet(inout_B, 0.)); 5538 if (!pcbddc->switch_static) { 5539 if (!reuse_solver) { 5540 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5541 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5542 } else { 5543 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5544 5545 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5546 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5547 } 5548 if (!applytranspose && pcbddc->local_auxmat1) { 5549 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5550 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 5551 } 5552 } else { 5553 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5554 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5555 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5556 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5557 if (!applytranspose && pcbddc->local_auxmat1) { 5558 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5559 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 5560 } 5561 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5562 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5563 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5564 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5565 } 5566 PetscFunctionReturn(0); 5567 } 5568 5569 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5570 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) { 5571 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5572 PC_IS *pcis = (PC_IS *)(pc->data); 5573 const PetscScalar zero = 0.0; 5574 5575 PetscFunctionBegin; 5576 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5577 if (!pcbddc->benign_apply_coarse_only) { 5578 if (applytranspose) { 5579 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 5580 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5581 } else { 5582 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 5583 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5584 } 5585 } else { 5586 PetscCall(VecSet(pcbddc->vec1_P, zero)); 5587 } 5588 5589 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5590 if (pcbddc->benign_n) { 5591 PetscScalar *array; 5592 PetscInt j; 5593 5594 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5595 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 5596 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5597 } 5598 5599 /* start communications from local primal nodes to rhs of coarse solver */ 5600 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 5601 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 5602 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 5603 5604 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5605 if (pcbddc->coarse_ksp) { 5606 Mat coarse_mat; 5607 Vec rhs, sol; 5608 MatNullSpace nullsp; 5609 PetscBool isbddc = PETSC_FALSE; 5610 5611 if (pcbddc->benign_have_null) { 5612 PC coarse_pc; 5613 5614 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5615 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 5616 /* we need to propagate to coarser levels the need for a possible benign correction */ 5617 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5618 PC_BDDC *coarsepcbddc = (PC_BDDC *)(coarse_pc->data); 5619 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5620 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5621 } 5622 } 5623 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 5624 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 5625 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 5626 if (applytranspose) { 5627 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 5628 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5629 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 5630 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5631 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5632 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 5633 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5634 } else { 5635 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 5636 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5637 PC coarse_pc; 5638 5639 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 5640 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5641 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 5642 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 5643 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 5644 } else { 5645 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5646 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 5647 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5648 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5649 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5650 } 5651 } 5652 /* we don't need the benign correction at coarser levels anymore */ 5653 if (pcbddc->benign_have_null && isbddc) { 5654 PC coarse_pc; 5655 PC_BDDC *coarsepcbddc; 5656 5657 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5658 coarsepcbddc = (PC_BDDC *)(coarse_pc->data); 5659 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5660 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5661 } 5662 } 5663 5664 /* Local solution on R nodes */ 5665 if (pcis->n && !pcbddc->benign_apply_coarse_only) { PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); } 5666 /* communications from coarse sol to local primal nodes */ 5667 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 5668 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 5669 5670 /* Sum contributions from the two levels */ 5671 if (!pcbddc->benign_apply_coarse_only) { 5672 if (applytranspose) { 5673 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5674 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5675 } else { 5676 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5677 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5678 } 5679 /* store p0 */ 5680 if (pcbddc->benign_n) { 5681 PetscScalar *array; 5682 PetscInt j; 5683 5684 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5685 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 5686 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5687 } 5688 } else { /* expand the coarse solution */ 5689 if (applytranspose) { 5690 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 5691 } else { 5692 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 5693 } 5694 } 5695 PetscFunctionReturn(0); 5696 } 5697 5698 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) { 5699 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5700 Vec from, to; 5701 const PetscScalar *array; 5702 5703 PetscFunctionBegin; 5704 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5705 from = pcbddc->coarse_vec; 5706 to = pcbddc->vec1_P; 5707 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5708 Vec tvec; 5709 5710 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5711 PetscCall(VecResetArray(tvec)); 5712 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 5713 PetscCall(VecGetArrayRead(tvec, &array)); 5714 PetscCall(VecPlaceArray(from, array)); 5715 PetscCall(VecRestoreArrayRead(tvec, &array)); 5716 } 5717 } else { /* from local to global -> put data in coarse right hand side */ 5718 from = pcbddc->vec1_P; 5719 to = pcbddc->coarse_vec; 5720 } 5721 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5722 PetscFunctionReturn(0); 5723 } 5724 5725 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) { 5726 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5727 Vec from, to; 5728 const PetscScalar *array; 5729 5730 PetscFunctionBegin; 5731 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5732 from = pcbddc->coarse_vec; 5733 to = pcbddc->vec1_P; 5734 } else { /* from local to global -> put data in coarse right hand side */ 5735 from = pcbddc->vec1_P; 5736 to = pcbddc->coarse_vec; 5737 } 5738 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5739 if (smode == SCATTER_FORWARD) { 5740 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5741 Vec tvec; 5742 5743 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5744 PetscCall(VecGetArrayRead(to, &array)); 5745 PetscCall(VecPlaceArray(tvec, array)); 5746 PetscCall(VecRestoreArrayRead(to, &array)); 5747 } 5748 } else { 5749 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5750 PetscCall(VecResetArray(from)); 5751 } 5752 } 5753 PetscFunctionReturn(0); 5754 } 5755 5756 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) { 5757 PC_IS *pcis = (PC_IS *)(pc->data); 5758 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5759 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5760 /* one and zero */ 5761 PetscScalar one = 1.0, zero = 0.0; 5762 /* space to store constraints and their local indices */ 5763 PetscScalar *constraints_data; 5764 PetscInt *constraints_idxs, *constraints_idxs_B; 5765 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 5766 PetscInt *constraints_n; 5767 /* iterators */ 5768 PetscInt i, j, k, total_counts, total_counts_cc, cum; 5769 /* BLAS integers */ 5770 PetscBLASInt lwork, lierr; 5771 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 5772 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 5773 /* reuse */ 5774 PetscInt olocal_primal_size, olocal_primal_size_cc; 5775 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 5776 /* change of basis */ 5777 PetscBool qr_needed; 5778 PetscBT change_basis, qr_needed_idx; 5779 /* auxiliary stuff */ 5780 PetscInt *nnz, *is_indices; 5781 PetscInt ncc; 5782 /* some quantities */ 5783 PetscInt n_vertices, total_primal_vertices, valid_constraints; 5784 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 5785 PetscReal tol; /* tolerance for retaining eigenmodes */ 5786 5787 PetscFunctionBegin; 5788 tol = PetscSqrtReal(PETSC_SMALL); 5789 /* Destroy Mat objects computed previously */ 5790 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 5791 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 5792 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 5793 /* save info on constraints from previous setup (if any) */ 5794 olocal_primal_size = pcbddc->local_primal_size; 5795 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5796 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 5797 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 5798 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 5799 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 5800 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 5801 5802 if (!pcbddc->adaptive_selection) { 5803 IS ISForVertices, *ISForFaces, *ISForEdges; 5804 MatNullSpace nearnullsp; 5805 const Vec *nearnullvecs; 5806 Vec *localnearnullsp; 5807 PetscScalar *array; 5808 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 5809 PetscBool nnsp_has_cnst; 5810 /* LAPACK working arrays for SVD or POD */ 5811 PetscBool skip_lapack, boolforchange; 5812 PetscScalar *work; 5813 PetscReal *singular_vals; 5814 #if defined(PETSC_USE_COMPLEX) 5815 PetscReal *rwork; 5816 #endif 5817 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 5818 PetscBLASInt dummy_int = 1; 5819 PetscScalar dummy_scalar = 1.; 5820 PetscBool use_pod = PETSC_FALSE; 5821 5822 /* MKL SVD with same input gives different results on different processes! */ 5823 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 5824 use_pod = PETSC_TRUE; 5825 #endif 5826 /* Get index sets for faces, edges and vertices from graph */ 5827 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 5828 o_nf = n_ISForFaces; 5829 o_ne = n_ISForEdges; 5830 n_vertices = 0; 5831 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 5832 /* print some info */ 5833 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5834 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 5835 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 5836 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5837 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 5838 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 5839 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 5840 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 5841 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5842 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 5843 } 5844 5845 if (!pcbddc->use_vertices) n_vertices = 0; 5846 if (!pcbddc->use_edges) n_ISForEdges = 0; 5847 if (!pcbddc->use_faces) n_ISForFaces = 0; 5848 5849 /* check if near null space is attached to global mat */ 5850 if (pcbddc->use_nnsp) { 5851 PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 5852 } else nearnullsp = NULL; 5853 5854 if (nearnullsp) { 5855 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 5856 /* remove any stored info */ 5857 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 5858 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 5859 /* store information for BDDC solver reuse */ 5860 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 5861 pcbddc->onearnullspace = nearnullsp; 5862 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 5863 for (i = 0; i < nnsp_size; i++) { PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); } 5864 } else { /* if near null space is not provided BDDC uses constants by default */ 5865 nnsp_size = 0; 5866 nnsp_has_cnst = PETSC_TRUE; 5867 } 5868 /* get max number of constraints on a single cc */ 5869 max_constraints = nnsp_size; 5870 if (nnsp_has_cnst) max_constraints++; 5871 5872 /* 5873 Evaluate maximum storage size needed by the procedure 5874 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5875 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5876 There can be multiple constraints per connected component 5877 */ 5878 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 5879 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 5880 5881 total_counts = n_ISForFaces + n_ISForEdges; 5882 total_counts *= max_constraints; 5883 total_counts += n_vertices; 5884 PetscCall(PetscBTCreate(total_counts, &change_basis)); 5885 5886 total_counts = 0; 5887 max_size_of_constraint = 0; 5888 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 5889 IS used_is; 5890 if (i < n_ISForEdges) { 5891 used_is = ISForEdges[i]; 5892 } else { 5893 used_is = ISForFaces[i - n_ISForEdges]; 5894 } 5895 PetscCall(ISGetSize(used_is, &j)); 5896 total_counts += j; 5897 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 5898 } 5899 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 5900 5901 /* get local part of global near null space vectors */ 5902 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 5903 for (k = 0; k < nnsp_size; k++) { 5904 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 5905 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 5906 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 5907 } 5908 5909 /* whether or not to skip lapack calls */ 5910 skip_lapack = PETSC_TRUE; 5911 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5912 5913 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5914 if (!skip_lapack) { 5915 PetscScalar temp_work; 5916 5917 if (use_pod) { 5918 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5919 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 5920 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 5921 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 5922 #if defined(PETSC_USE_COMPLEX) 5923 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 5924 #endif 5925 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5926 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 5927 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 5928 lwork = -1; 5929 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 5930 #if !defined(PETSC_USE_COMPLEX) 5931 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 5932 #else 5933 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 5934 #endif 5935 PetscCall(PetscFPTrapPop()); 5936 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr); 5937 } else { 5938 #if !defined(PETSC_MISSING_LAPACK_GESVD) 5939 /* SVD */ 5940 PetscInt max_n, min_n; 5941 max_n = max_size_of_constraint; 5942 min_n = max_constraints; 5943 if (max_size_of_constraint < max_constraints) { 5944 min_n = max_size_of_constraint; 5945 max_n = max_constraints; 5946 } 5947 PetscCall(PetscMalloc1(min_n, &singular_vals)); 5948 #if defined(PETSC_USE_COMPLEX) 5949 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 5950 #endif 5951 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5952 lwork = -1; 5953 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 5954 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 5955 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 5956 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 5957 #if !defined(PETSC_USE_COMPLEX) 5958 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 5959 #else 5960 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 5961 #endif 5962 PetscCall(PetscFPTrapPop()); 5963 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr); 5964 #else 5965 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 5966 #endif /* on missing GESVD */ 5967 } 5968 /* Allocate optimal workspace */ 5969 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 5970 PetscCall(PetscMalloc1(lwork, &work)); 5971 } 5972 /* Now we can loop on constraining sets */ 5973 total_counts = 0; 5974 constraints_idxs_ptr[0] = 0; 5975 constraints_data_ptr[0] = 0; 5976 /* vertices */ 5977 if (n_vertices) { 5978 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 5979 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 5980 for (i = 0; i < n_vertices; i++) { 5981 constraints_n[total_counts] = 1; 5982 constraints_data[total_counts] = 1.0; 5983 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 5984 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 5985 total_counts++; 5986 } 5987 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 5988 } 5989 5990 /* edges and faces */ 5991 total_counts_cc = total_counts; 5992 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 5993 IS used_is; 5994 PetscBool idxs_copied = PETSC_FALSE; 5995 5996 if (ncc < n_ISForEdges) { 5997 used_is = ISForEdges[ncc]; 5998 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5999 } else { 6000 used_is = ISForFaces[ncc - n_ISForEdges]; 6001 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6002 } 6003 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6004 6005 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6006 if (!size_of_constraint) continue; 6007 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6008 /* change of basis should not be performed on local periodic nodes */ 6009 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6010 if (nnsp_has_cnst) { 6011 PetscScalar quad_value; 6012 6013 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6014 idxs_copied = PETSC_TRUE; 6015 6016 if (!pcbddc->use_nnsp_true) { 6017 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6018 } else { 6019 quad_value = 1.0; 6020 } 6021 for (j = 0; j < size_of_constraint; j++) { constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; } 6022 temp_constraints++; 6023 total_counts++; 6024 } 6025 for (k = 0; k < nnsp_size; k++) { 6026 PetscReal real_value; 6027 PetscScalar *ptr_to_data; 6028 6029 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6030 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6031 for (j = 0; j < size_of_constraint; j++) { ptr_to_data[j] = array[is_indices[j]]; } 6032 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6033 /* check if array is null on the connected component */ 6034 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6035 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6036 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6037 temp_constraints++; 6038 total_counts++; 6039 if (!idxs_copied) { 6040 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6041 idxs_copied = PETSC_TRUE; 6042 } 6043 } 6044 } 6045 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6046 valid_constraints = temp_constraints; 6047 if (!pcbddc->use_nnsp_true && temp_constraints) { 6048 if (temp_constraints == 1) { /* just normalize the constraint */ 6049 PetscScalar norm, *ptr_to_data; 6050 6051 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6052 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6053 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6054 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6055 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6056 } else { /* perform SVD */ 6057 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6058 6059 if (use_pod) { 6060 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6061 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6062 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6063 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6064 from that computed using LAPACKgesvd 6065 -> This is due to a different computation of eigenvectors in LAPACKheev 6066 -> The quality of the POD-computed basis will be the same */ 6067 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6068 /* Store upper triangular part of correlation matrix */ 6069 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6070 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6071 for (j = 0; j < temp_constraints; j++) { 6072 for (k = 0; k < j + 1; k++) { PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); } 6073 } 6074 /* compute eigenvalues and eigenvectors of correlation matrix */ 6075 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6076 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6077 #if !defined(PETSC_USE_COMPLEX) 6078 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6079 #else 6080 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6081 #endif 6082 PetscCall(PetscFPTrapPop()); 6083 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr); 6084 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6085 j = 0; 6086 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6087 total_counts = total_counts - j; 6088 valid_constraints = temp_constraints - j; 6089 /* scale and copy POD basis into used quadrature memory */ 6090 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6091 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6092 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6093 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6094 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6095 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6096 if (j < temp_constraints) { 6097 PetscInt ii; 6098 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6099 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6100 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6101 PetscCall(PetscFPTrapPop()); 6102 for (k = 0; k < temp_constraints - j; k++) { 6103 for (ii = 0; ii < size_of_constraint; ii++) { ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; } 6104 } 6105 } 6106 } else { 6107 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6108 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6109 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6110 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6111 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6112 #if !defined(PETSC_USE_COMPLEX) 6113 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6114 #else 6115 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6116 #endif 6117 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 6118 PetscCall(PetscFPTrapPop()); 6119 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6120 k = temp_constraints; 6121 if (k > size_of_constraint) k = size_of_constraint; 6122 j = 0; 6123 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6124 valid_constraints = k - j; 6125 total_counts = total_counts - temp_constraints + valid_constraints; 6126 #else 6127 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6128 #endif /* on missing GESVD */ 6129 } 6130 } 6131 } 6132 /* update pointers information */ 6133 if (valid_constraints) { 6134 constraints_n[total_counts_cc] = valid_constraints; 6135 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6136 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6137 /* set change_of_basis flag */ 6138 if (boolforchange) { PetscBTSet(change_basis, total_counts_cc); } 6139 total_counts_cc++; 6140 } 6141 } 6142 /* free workspace */ 6143 if (!skip_lapack) { 6144 PetscCall(PetscFree(work)); 6145 #if defined(PETSC_USE_COMPLEX) 6146 PetscCall(PetscFree(rwork)); 6147 #endif 6148 PetscCall(PetscFree(singular_vals)); 6149 PetscCall(PetscFree(correlation_mat)); 6150 PetscCall(PetscFree(temp_basis)); 6151 } 6152 for (k = 0; k < nnsp_size; k++) { PetscCall(VecDestroy(&localnearnullsp[k])); } 6153 PetscCall(PetscFree(localnearnullsp)); 6154 /* free index sets of faces, edges and vertices */ 6155 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6156 } else { 6157 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6158 6159 total_counts = 0; 6160 n_vertices = 0; 6161 if (sub_schurs->is_vertices && pcbddc->use_vertices) { PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); } 6162 max_constraints = 0; 6163 total_counts_cc = 0; 6164 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6165 total_counts += pcbddc->adaptive_constraints_n[i]; 6166 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6167 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6168 } 6169 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6170 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6171 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6172 constraints_data = pcbddc->adaptive_constraints_data; 6173 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6174 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6175 total_counts_cc = 0; 6176 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6177 if (pcbddc->adaptive_constraints_n[i]) { constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; } 6178 } 6179 6180 max_size_of_constraint = 0; 6181 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6182 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6183 /* Change of basis */ 6184 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6185 if (pcbddc->use_change_of_basis) { 6186 for (i = 0; i < sub_schurs->n_subs; i++) { 6187 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) { PetscCall(PetscBTSet(change_basis, i + n_vertices)); } 6188 } 6189 } 6190 } 6191 pcbddc->local_primal_size = total_counts; 6192 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6193 6194 /* map constraints_idxs in boundary numbering */ 6195 if (pcbddc->use_change_of_basis) { 6196 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6197 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6198 } 6199 6200 /* Create constraint matrix */ 6201 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6202 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6203 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6204 6205 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6206 /* determine if a QR strategy is needed for change of basis */ 6207 qr_needed = pcbddc->use_qr_single; 6208 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6209 total_primal_vertices = 0; 6210 pcbddc->local_primal_size_cc = 0; 6211 for (i = 0; i < total_counts_cc; i++) { 6212 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6213 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6214 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6215 pcbddc->local_primal_size_cc += 1; 6216 } else if (PetscBTLookup(change_basis, i)) { 6217 for (k = 0; k < constraints_n[i]; k++) { pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; } 6218 pcbddc->local_primal_size_cc += constraints_n[i]; 6219 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6220 PetscBTSet(qr_needed_idx, i); 6221 qr_needed = PETSC_TRUE; 6222 } 6223 } else { 6224 pcbddc->local_primal_size_cc += 1; 6225 } 6226 } 6227 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6228 pcbddc->n_vertices = total_primal_vertices; 6229 /* permute indices in order to have a sorted set of vertices */ 6230 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6231 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6232 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6233 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6234 6235 /* nonzero structure of constraint matrix */ 6236 /* and get reference dof for local constraints */ 6237 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6238 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6239 6240 j = total_primal_vertices; 6241 total_counts = total_primal_vertices; 6242 cum = total_primal_vertices; 6243 for (i = n_vertices; i < total_counts_cc; i++) { 6244 if (!PetscBTLookup(change_basis, i)) { 6245 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6246 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6247 cum++; 6248 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6249 for (k = 0; k < constraints_n[i]; k++) { 6250 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6251 nnz[j + k] = size_of_constraint; 6252 } 6253 j += constraints_n[i]; 6254 } 6255 } 6256 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6257 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6258 PetscCall(PetscFree(nnz)); 6259 6260 /* set values in constraint matrix */ 6261 for (i = 0; i < total_primal_vertices; i++) { PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); } 6262 total_counts = total_primal_vertices; 6263 for (i = n_vertices; i < total_counts_cc; i++) { 6264 if (!PetscBTLookup(change_basis, i)) { 6265 PetscInt *cols; 6266 6267 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6268 cols = constraints_idxs + constraints_idxs_ptr[i]; 6269 for (k = 0; k < constraints_n[i]; k++) { 6270 PetscInt row = total_counts + k; 6271 PetscScalar *vals; 6272 6273 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6274 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6275 } 6276 total_counts += constraints_n[i]; 6277 } 6278 } 6279 /* assembling */ 6280 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6281 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6282 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6283 6284 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6285 if (pcbddc->use_change_of_basis) { 6286 /* dual and primal dofs on a single cc */ 6287 PetscInt dual_dofs, primal_dofs; 6288 /* working stuff for GEQRF */ 6289 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6290 PetscBLASInt lqr_work; 6291 /* working stuff for UNGQR */ 6292 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6293 PetscBLASInt lgqr_work; 6294 /* working stuff for TRTRS */ 6295 PetscScalar *trs_rhs = NULL; 6296 PetscBLASInt Blas_NRHS; 6297 /* pointers for values insertion into change of basis matrix */ 6298 PetscInt *start_rows, *start_cols; 6299 PetscScalar *start_vals; 6300 /* working stuff for values insertion */ 6301 PetscBT is_primal; 6302 PetscInt *aux_primal_numbering_B; 6303 /* matrix sizes */ 6304 PetscInt global_size, local_size; 6305 /* temporary change of basis */ 6306 Mat localChangeOfBasisMatrix; 6307 /* extra space for debugging */ 6308 PetscScalar *dbg_work = NULL; 6309 6310 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6311 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6312 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6313 /* nonzeros for local mat */ 6314 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6315 if (!pcbddc->benign_change || pcbddc->fake_change) { 6316 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6317 } else { 6318 const PetscInt *ii; 6319 PetscInt n; 6320 PetscBool flg_row; 6321 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6322 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6323 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6324 } 6325 for (i = n_vertices; i < total_counts_cc; i++) { 6326 if (PetscBTLookup(change_basis, i)) { 6327 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6328 if (PetscBTLookup(qr_needed_idx, i)) { 6329 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6330 } else { 6331 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6332 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6333 } 6334 } 6335 } 6336 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6337 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6338 PetscCall(PetscFree(nnz)); 6339 /* Set interior change in the matrix */ 6340 if (!pcbddc->benign_change || pcbddc->fake_change) { 6341 for (i = 0; i < pcis->n; i++) { PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); } 6342 } else { 6343 const PetscInt *ii, *jj; 6344 PetscScalar *aa; 6345 PetscInt n; 6346 PetscBool flg_row; 6347 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6348 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6349 for (i = 0; i < n; i++) { PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); } 6350 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6351 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6352 } 6353 6354 if (pcbddc->dbg_flag) { 6355 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6356 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 6357 } 6358 6359 /* Now we loop on the constraints which need a change of basis */ 6360 /* 6361 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6362 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6363 6364 Basic blocks of change of basis matrix T computed: 6365 6366 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6367 6368 | 1 0 ... 0 s_1/S | 6369 | 0 1 ... 0 s_2/S | 6370 | ... | 6371 | 0 ... 1 s_{n-1}/S | 6372 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6373 6374 with S = \sum_{i=1}^n s_i^2 6375 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6376 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6377 6378 - QR decomposition of constraints otherwise 6379 */ 6380 if (qr_needed && max_size_of_constraint) { 6381 /* space to store Q */ 6382 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 6383 /* array to store scaling factors for reflectors */ 6384 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 6385 /* first we issue queries for optimal work */ 6386 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6387 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6388 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6389 lqr_work = -1; 6390 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 6391 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr); 6392 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 6393 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t), &qr_work)); 6394 lgqr_work = -1; 6395 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6396 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 6397 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 6398 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6399 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 6400 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 6401 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr); 6402 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 6403 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t), &gqr_work)); 6404 /* array to store rhs and solution of triangular solver */ 6405 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 6406 /* allocating workspace for check */ 6407 if (pcbddc->dbg_flag) { PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); } 6408 } 6409 /* array to store whether a node is primal or not */ 6410 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 6411 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 6412 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 6413 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 6414 for (i = 0; i < total_primal_vertices; i++) { PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); } 6415 PetscCall(PetscFree(aux_primal_numbering_B)); 6416 6417 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6418 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 6419 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 6420 if (PetscBTLookup(change_basis, total_counts)) { 6421 /* get constraint info */ 6422 primal_dofs = constraints_n[total_counts]; 6423 dual_dofs = size_of_constraint - primal_dofs; 6424 6425 if (pcbddc->dbg_flag) { PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); } 6426 6427 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 6428 6429 /* copy quadrature constraints for change of basis check */ 6430 if (pcbddc->dbg_flag) { PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); } 6431 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6432 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6433 6434 /* compute QR decomposition of constraints */ 6435 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6436 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6437 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6438 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6439 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 6440 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr); 6441 PetscCall(PetscFPTrapPop()); 6442 6443 /* explicitly compute R^-T */ 6444 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 6445 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 6446 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6447 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 6448 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6449 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6450 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6451 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 6452 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr); 6453 PetscCall(PetscFPTrapPop()); 6454 6455 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6456 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6457 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6458 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6459 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6460 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6461 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 6462 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr); 6463 PetscCall(PetscFPTrapPop()); 6464 6465 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6466 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6467 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6468 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6469 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6470 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6471 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6472 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6473 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6474 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6475 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 6476 PetscCall(PetscFPTrapPop()); 6477 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6478 6479 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6480 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6481 /* insert cols for primal dofs */ 6482 for (j = 0; j < primal_dofs; j++) { 6483 start_vals = &qr_basis[j * size_of_constraint]; 6484 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6485 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6486 } 6487 /* insert cols for dual dofs */ 6488 for (j = 0, k = 0; j < dual_dofs; k++) { 6489 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 6490 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 6491 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6492 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6493 j++; 6494 } 6495 } 6496 6497 /* check change of basis */ 6498 if (pcbddc->dbg_flag) { 6499 PetscInt ii, jj; 6500 PetscBool valid_qr = PETSC_TRUE; 6501 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 6502 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6503 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 6504 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6505 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 6506 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 6507 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6508 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 6509 PetscCall(PetscFPTrapPop()); 6510 for (jj = 0; jj < size_of_constraint; jj++) { 6511 for (ii = 0; ii < primal_dofs; ii++) { 6512 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6513 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6514 } 6515 } 6516 if (!valid_qr) { 6517 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 6518 for (jj = 0; jj < size_of_constraint; jj++) { 6519 for (ii = 0; ii < primal_dofs; ii++) { 6520 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 6521 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6522 } 6523 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 6524 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6525 } 6526 } 6527 } 6528 } else { 6529 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 6530 } 6531 } 6532 } else { /* simple transformation block */ 6533 PetscInt row, col; 6534 PetscScalar val, norm; 6535 6536 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6537 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 6538 for (j = 0; j < size_of_constraint; j++) { 6539 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 6540 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6541 if (!PetscBTLookup(is_primal, row_B)) { 6542 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6543 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 6544 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 6545 } else { 6546 for (k = 0; k < size_of_constraint; k++) { 6547 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6548 if (row != col) { 6549 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 6550 } else { 6551 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 6552 } 6553 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 6554 } 6555 } 6556 } 6557 if (pcbddc->dbg_flag) { PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); } 6558 } 6559 } else { 6560 if (pcbddc->dbg_flag) { PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); } 6561 } 6562 } 6563 6564 /* free workspace */ 6565 if (qr_needed) { 6566 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 6567 PetscCall(PetscFree(trs_rhs)); 6568 PetscCall(PetscFree(qr_tau)); 6569 PetscCall(PetscFree(qr_work)); 6570 PetscCall(PetscFree(gqr_work)); 6571 PetscCall(PetscFree(qr_basis)); 6572 } 6573 PetscCall(PetscBTDestroy(&is_primal)); 6574 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6575 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6576 6577 /* assembling of global change of variable */ 6578 if (!pcbddc->fake_change) { 6579 Mat tmat; 6580 PetscInt bs; 6581 6582 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 6583 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 6584 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 6585 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 6586 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 6587 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 6588 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 6589 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 6590 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 6591 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 6592 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 6593 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 6594 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 6595 PetscCall(MatDestroy(&tmat)); 6596 PetscCall(VecSet(pcis->vec1_global, 0.0)); 6597 PetscCall(VecSet(pcis->vec1_N, 1.0)); 6598 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6599 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6600 PetscCall(VecReciprocal(pcis->vec1_global)); 6601 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 6602 6603 /* check */ 6604 if (pcbddc->dbg_flag) { 6605 PetscReal error; 6606 Vec x, x_change; 6607 6608 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 6609 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 6610 PetscCall(VecSetRandom(x, NULL)); 6611 PetscCall(VecCopy(x, pcis->vec1_global)); 6612 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6613 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6614 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 6615 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6616 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6617 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 6618 PetscCall(VecAXPY(x, -1.0, x_change)); 6619 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 6620 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 6621 PetscCall(VecDestroy(&x)); 6622 PetscCall(VecDestroy(&x_change)); 6623 } 6624 /* adapt sub_schurs computed (if any) */ 6625 if (pcbddc->use_deluxe_scaling) { 6626 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6627 6628 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6629 if (sub_schurs && sub_schurs->S_Ej_all) { 6630 Mat S_new, tmat; 6631 IS is_all_N, is_V_Sall = NULL; 6632 6633 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 6634 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 6635 if (pcbddc->deluxe_zerorows) { 6636 ISLocalToGlobalMapping NtoSall; 6637 IS is_V; 6638 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 6639 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 6640 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 6641 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6642 PetscCall(ISDestroy(&is_V)); 6643 } 6644 PetscCall(ISDestroy(&is_all_N)); 6645 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6646 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6647 PetscCall(PetscObjectReference((PetscObject)S_new)); 6648 if (pcbddc->deluxe_zerorows) { 6649 const PetscScalar *array; 6650 const PetscInt *idxs_V, *idxs_all; 6651 PetscInt i, n_V; 6652 6653 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6654 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 6655 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 6656 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 6657 PetscCall(VecGetArrayRead(pcis->D, &array)); 6658 for (i = 0; i < n_V; i++) { 6659 PetscScalar val; 6660 PetscInt idx; 6661 6662 idx = idxs_V[i]; 6663 val = array[idxs_all[idxs_V[i]]]; 6664 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 6665 } 6666 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 6667 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 6668 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 6669 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 6670 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 6671 } 6672 sub_schurs->S_Ej_all = S_new; 6673 PetscCall(MatDestroy(&S_new)); 6674 if (sub_schurs->sum_S_Ej_all) { 6675 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6676 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 6677 PetscCall(PetscObjectReference((PetscObject)S_new)); 6678 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6679 sub_schurs->sum_S_Ej_all = S_new; 6680 PetscCall(MatDestroy(&S_new)); 6681 } 6682 PetscCall(ISDestroy(&is_V_Sall)); 6683 PetscCall(MatDestroy(&tmat)); 6684 } 6685 /* destroy any change of basis context in sub_schurs */ 6686 if (sub_schurs && sub_schurs->change) { 6687 PetscInt i; 6688 6689 for (i = 0; i < sub_schurs->n_subs; i++) { PetscCall(KSPDestroy(&sub_schurs->change[i])); } 6690 PetscCall(PetscFree(sub_schurs->change)); 6691 } 6692 } 6693 if (pcbddc->switch_static) { /* need to save the local change */ 6694 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6695 } else { 6696 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 6697 } 6698 /* determine if any process has changed the pressures locally */ 6699 pcbddc->change_interior = pcbddc->benign_have_null; 6700 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6701 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6702 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6703 pcbddc->use_qr_single = qr_needed; 6704 } 6705 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6706 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6707 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 6708 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6709 } else { 6710 Mat benign_global = NULL; 6711 if (pcbddc->benign_have_null) { 6712 Mat M; 6713 6714 pcbddc->change_interior = PETSC_TRUE; 6715 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 6716 PetscCall(VecReciprocal(pcis->vec1_N)); 6717 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 6718 if (pcbddc->benign_change) { 6719 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 6720 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 6721 } else { 6722 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 6723 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 6724 } 6725 PetscCall(MatISSetLocalMat(benign_global, M)); 6726 PetscCall(MatDestroy(&M)); 6727 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 6728 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 6729 } 6730 if (pcbddc->user_ChangeOfBasisMatrix) { 6731 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix)); 6732 PetscCall(MatDestroy(&benign_global)); 6733 } else if (pcbddc->benign_have_null) { 6734 pcbddc->ChangeOfBasisMatrix = benign_global; 6735 } 6736 } 6737 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6738 IS is_global; 6739 const PetscInt *gidxs; 6740 6741 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 6742 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 6743 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 6744 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 6745 PetscCall(ISDestroy(&is_global)); 6746 } 6747 } 6748 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); } 6749 6750 if (!pcbddc->fake_change) { 6751 /* add pressure dofs to set of primal nodes for numbering purposes */ 6752 for (i = 0; i < pcbddc->benign_n; i++) { 6753 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6754 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6755 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6756 pcbddc->local_primal_size_cc++; 6757 pcbddc->local_primal_size++; 6758 } 6759 6760 /* check if a new primal space has been introduced (also take into account benign trick) */ 6761 pcbddc->new_primal_space_local = PETSC_TRUE; 6762 if (olocal_primal_size == pcbddc->local_primal_size) { 6763 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6764 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6765 if (!pcbddc->new_primal_space_local) { 6766 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6767 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6768 } 6769 } 6770 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6771 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 6772 } 6773 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 6774 6775 /* flush dbg viewer */ 6776 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6777 6778 /* free workspace */ 6779 PetscCall(PetscBTDestroy(&qr_needed_idx)); 6780 PetscCall(PetscBTDestroy(&change_basis)); 6781 if (!pcbddc->adaptive_selection) { 6782 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 6783 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 6784 } else { 6785 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 6786 PetscCall(PetscFree(constraints_n)); 6787 PetscCall(PetscFree(constraints_idxs_B)); 6788 } 6789 PetscFunctionReturn(0); 6790 } 6791 6792 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) { 6793 ISLocalToGlobalMapping map; 6794 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6795 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6796 PetscInt i, N; 6797 PetscBool rcsr = PETSC_FALSE; 6798 6799 PetscFunctionBegin; 6800 if (pcbddc->recompute_topography) { 6801 pcbddc->graphanalyzed = PETSC_FALSE; 6802 /* Reset previously computed graph */ 6803 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 6804 /* Init local Graph struct */ 6805 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 6806 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 6807 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 6808 6809 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); } 6810 /* Check validity of the csr graph passed in by the user */ 6811 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 6812 pcbddc->mat_graph->nvtxs); 6813 6814 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6815 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6816 PetscInt *xadj, *adjncy; 6817 PetscInt nvtxs; 6818 PetscBool flg_row = PETSC_FALSE; 6819 6820 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6821 if (flg_row) { 6822 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 6823 pcbddc->computed_rowadj = PETSC_TRUE; 6824 } 6825 PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6826 rcsr = PETSC_TRUE; 6827 } 6828 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6829 6830 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6831 PetscReal *lcoords; 6832 PetscInt n; 6833 MPI_Datatype dimrealtype; 6834 6835 /* TODO: support for blocked */ 6836 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 6837 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6838 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 6839 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype)); 6840 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 6841 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6842 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6843 PetscCallMPI(MPI_Type_free(&dimrealtype)); 6844 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 6845 6846 pcbddc->mat_graph->coords = lcoords; 6847 pcbddc->mat_graph->cloc = PETSC_TRUE; 6848 pcbddc->mat_graph->cnloc = n; 6849 } 6850 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 6851 pcbddc->mat_graph->nvtxs); 6852 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 6853 6854 /* Setup of Graph */ 6855 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6856 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 6857 6858 /* attach info on disconnected subdomains if present */ 6859 if (pcbddc->n_local_subs) { 6860 PetscInt *local_subs, n, totn; 6861 6862 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6863 PetscCall(PetscMalloc1(n, &local_subs)); 6864 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 6865 for (i = 0; i < pcbddc->n_local_subs; i++) { 6866 const PetscInt *idxs; 6867 PetscInt nl, j; 6868 6869 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 6870 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 6871 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 6872 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 6873 } 6874 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 6875 pcbddc->mat_graph->n_local_subs = totn + 1; 6876 pcbddc->mat_graph->local_subs = local_subs; 6877 } 6878 } 6879 6880 if (!pcbddc->graphanalyzed) { 6881 /* Graph's connected components analysis */ 6882 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 6883 pcbddc->graphanalyzed = PETSC_TRUE; 6884 pcbddc->corner_selected = pcbddc->corner_selection; 6885 } 6886 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6887 PetscFunctionReturn(0); 6888 } 6889 6890 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) { 6891 PetscInt i, j, n; 6892 PetscScalar *alphas; 6893 PetscReal norm, *onorms; 6894 6895 PetscFunctionBegin; 6896 n = *nio; 6897 if (!n) PetscFunctionReturn(0); 6898 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 6899 PetscCall(VecNormalize(vecs[0], &norm)); 6900 if (norm < PETSC_SMALL) { 6901 onorms[0] = 0.0; 6902 PetscCall(VecSet(vecs[0], 0.0)); 6903 } else { 6904 onorms[0] = norm; 6905 } 6906 6907 for (i = 1; i < n; i++) { 6908 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 6909 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 6910 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 6911 PetscCall(VecNormalize(vecs[i], &norm)); 6912 if (norm < PETSC_SMALL) { 6913 onorms[i] = 0.0; 6914 PetscCall(VecSet(vecs[i], 0.0)); 6915 } else { 6916 onorms[i] = norm; 6917 } 6918 } 6919 /* push nonzero vectors at the beginning */ 6920 for (i = 0; i < n; i++) { 6921 if (onorms[i] == 0.0) { 6922 for (j = i + 1; j < n; j++) { 6923 if (onorms[j] != 0.0) { 6924 PetscCall(VecCopy(vecs[j], vecs[i])); 6925 onorms[j] = 0.0; 6926 } 6927 } 6928 } 6929 } 6930 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 6931 PetscCall(PetscFree2(alphas, onorms)); 6932 PetscFunctionReturn(0); 6933 } 6934 6935 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) { 6936 ISLocalToGlobalMapping mapping; 6937 Mat A; 6938 PetscInt n_neighs, *neighs, *n_shared, **shared; 6939 PetscMPIInt size, rank, color; 6940 PetscInt *xadj, *adjncy; 6941 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 6942 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 6943 PetscInt void_procs, *procs_candidates = NULL; 6944 PetscInt xadj_count, *count; 6945 PetscBool ismatis, use_vwgt = PETSC_FALSE; 6946 PetscSubcomm psubcomm; 6947 MPI_Comm subcomm; 6948 6949 PetscFunctionBegin; 6950 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 6951 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 6952 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 6953 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 6954 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 6955 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 6956 6957 if (have_void) *have_void = PETSC_FALSE; 6958 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 6959 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 6960 PetscCall(MatISGetLocalMat(mat, &A)); 6961 PetscCall(MatGetLocalSize(A, &n, NULL)); 6962 im_active = !!n; 6963 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6964 void_procs = size - active_procs; 6965 /* get ranks of of non-active processes in mat communicator */ 6966 if (void_procs) { 6967 PetscInt ncand; 6968 6969 if (have_void) *have_void = PETSC_TRUE; 6970 PetscCall(PetscMalloc1(size, &procs_candidates)); 6971 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 6972 for (i = 0, ncand = 0; i < size; i++) { 6973 if (!procs_candidates[i]) { procs_candidates[ncand++] = i; } 6974 } 6975 /* force n_subdomains to be not greater that the number of non-active processes */ 6976 *n_subdomains = PetscMin(void_procs, *n_subdomains); 6977 } 6978 6979 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6980 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 6981 PetscCall(MatGetSize(mat, &N, NULL)); 6982 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6983 PetscInt issize, isidx, dest; 6984 if (*n_subdomains == 1) dest = 0; 6985 else dest = rank; 6986 if (im_active) { 6987 issize = 1; 6988 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6989 isidx = procs_candidates[dest]; 6990 } else { 6991 isidx = dest; 6992 } 6993 } else { 6994 issize = 0; 6995 isidx = -1; 6996 } 6997 if (*n_subdomains != 1) *n_subdomains = active_procs; 6998 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 6999 PetscCall(PetscFree(procs_candidates)); 7000 PetscFunctionReturn(0); 7001 } 7002 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL)); 7003 PetscCall(PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL)); 7004 threshold = PetscMax(threshold, 2); 7005 7006 /* Get info on mapping */ 7007 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7008 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7009 7010 /* build local CSR graph of subdomains' connectivity */ 7011 PetscCall(PetscMalloc1(2, &xadj)); 7012 xadj[0] = 0; 7013 xadj[1] = PetscMax(n_neighs - 1, 0); 7014 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7015 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7016 PetscCall(PetscCalloc1(n, &count)); 7017 for (i = 1; i < n_neighs; i++) 7018 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7019 7020 xadj_count = 0; 7021 for (i = 1; i < n_neighs; i++) { 7022 for (j = 0; j < n_shared[i]; j++) { 7023 if (count[shared[i][j]] < threshold) { 7024 adjncy[xadj_count] = neighs[i]; 7025 adjncy_wgt[xadj_count] = n_shared[i]; 7026 xadj_count++; 7027 break; 7028 } 7029 } 7030 } 7031 xadj[1] = xadj_count; 7032 PetscCall(PetscFree(count)); 7033 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7034 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7035 7036 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7037 7038 /* Restrict work on active processes only */ 7039 PetscCall(PetscMPIIntCast(im_active, &color)); 7040 if (void_procs) { 7041 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7042 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7043 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7044 subcomm = PetscSubcommChild(psubcomm); 7045 } else { 7046 psubcomm = NULL; 7047 subcomm = PetscObjectComm((PetscObject)mat); 7048 } 7049 7050 v_wgt = NULL; 7051 if (!color) { 7052 PetscCall(PetscFree(xadj)); 7053 PetscCall(PetscFree(adjncy)); 7054 PetscCall(PetscFree(adjncy_wgt)); 7055 } else { 7056 Mat subdomain_adj; 7057 IS new_ranks, new_ranks_contig; 7058 MatPartitioning partitioner; 7059 PetscInt rstart = 0, rend = 0; 7060 PetscInt *is_indices, *oldranks; 7061 PetscMPIInt size; 7062 PetscBool aggregate; 7063 7064 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7065 if (void_procs) { 7066 PetscInt prank = rank; 7067 PetscCall(PetscMalloc1(size, &oldranks)); 7068 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7069 for (i = 0; i < xadj[1]; i++) { PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); } 7070 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7071 } else { 7072 oldranks = NULL; 7073 } 7074 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7075 if (aggregate) { /* TODO: all this part could be made more efficient */ 7076 PetscInt lrows, row, ncols, *cols; 7077 PetscMPIInt nrank; 7078 PetscScalar *vals; 7079 7080 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7081 lrows = 0; 7082 if (nrank < redprocs) { 7083 lrows = size / redprocs; 7084 if (nrank < size % redprocs) lrows++; 7085 } 7086 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7087 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7088 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7089 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7090 row = nrank; 7091 ncols = xadj[1] - xadj[0]; 7092 cols = adjncy; 7093 PetscCall(PetscMalloc1(ncols, &vals)); 7094 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7095 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7096 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7097 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7098 PetscCall(PetscFree(xadj)); 7099 PetscCall(PetscFree(adjncy)); 7100 PetscCall(PetscFree(adjncy_wgt)); 7101 PetscCall(PetscFree(vals)); 7102 if (use_vwgt) { 7103 Vec v; 7104 const PetscScalar *array; 7105 PetscInt nl; 7106 7107 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7108 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7109 PetscCall(VecAssemblyBegin(v)); 7110 PetscCall(VecAssemblyEnd(v)); 7111 PetscCall(VecGetLocalSize(v, &nl)); 7112 PetscCall(VecGetArrayRead(v, &array)); 7113 PetscCall(PetscMalloc1(nl, &v_wgt)); 7114 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7115 PetscCall(VecRestoreArrayRead(v, &array)); 7116 PetscCall(VecDestroy(&v)); 7117 } 7118 } else { 7119 PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7120 if (use_vwgt) { 7121 PetscCall(PetscMalloc1(1, &v_wgt)); 7122 v_wgt[0] = n; 7123 } 7124 } 7125 /* PetscCall(MatView(subdomain_adj,0)); */ 7126 7127 /* Partition */ 7128 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7129 #if defined(PETSC_HAVE_PTSCOTCH) 7130 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7131 #elif defined(PETSC_HAVE_PARMETIS) 7132 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7133 #else 7134 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7135 #endif 7136 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7137 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7138 *n_subdomains = PetscMin((PetscInt)size, *n_subdomains); 7139 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7140 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7141 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7142 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7143 7144 /* renumber new_ranks to avoid "holes" in new set of processors */ 7145 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7146 PetscCall(ISDestroy(&new_ranks)); 7147 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7148 if (!aggregate) { 7149 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7150 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7151 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7152 } else if (oldranks) { 7153 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7154 } else { 7155 ranks_send_to_idx[0] = is_indices[0]; 7156 } 7157 } else { 7158 PetscInt idx = 0; 7159 PetscMPIInt tag; 7160 MPI_Request *reqs; 7161 7162 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7163 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7164 for (i = rstart; i < rend; i++) { PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); } 7165 PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7166 PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE)); 7167 PetscCall(PetscFree(reqs)); 7168 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7169 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7170 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7171 } else if (oldranks) { 7172 ranks_send_to_idx[0] = oldranks[idx]; 7173 } else { 7174 ranks_send_to_idx[0] = idx; 7175 } 7176 } 7177 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7178 /* clean up */ 7179 PetscCall(PetscFree(oldranks)); 7180 PetscCall(ISDestroy(&new_ranks_contig)); 7181 PetscCall(MatDestroy(&subdomain_adj)); 7182 PetscCall(MatPartitioningDestroy(&partitioner)); 7183 } 7184 PetscCall(PetscSubcommDestroy(&psubcomm)); 7185 PetscCall(PetscFree(procs_candidates)); 7186 7187 /* assemble parallel IS for sends */ 7188 i = 1; 7189 if (!color) i = 0; 7190 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7191 PetscFunctionReturn(0); 7192 } 7193 7194 typedef enum { 7195 MATDENSE_PRIVATE = 0, 7196 MATAIJ_PRIVATE, 7197 MATBAIJ_PRIVATE, 7198 MATSBAIJ_PRIVATE 7199 } MatTypePrivate; 7200 7201 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) { 7202 Mat local_mat; 7203 IS is_sends_internal; 7204 PetscInt rows, cols, new_local_rows; 7205 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7206 PetscBool ismatis, isdense, newisdense, destroy_mat; 7207 ISLocalToGlobalMapping l2gmap; 7208 PetscInt *l2gmap_indices; 7209 const PetscInt *is_indices; 7210 MatType new_local_type; 7211 /* buffers */ 7212 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7213 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7214 PetscInt *recv_buffer_idxs_local; 7215 PetscScalar *ptr_vals, *recv_buffer_vals; 7216 const PetscScalar *send_buffer_vals; 7217 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7218 /* MPI */ 7219 MPI_Comm comm, comm_n; 7220 PetscSubcomm subcomm; 7221 PetscMPIInt n_sends, n_recvs, size; 7222 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7223 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7224 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7225 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7226 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7227 7228 PetscFunctionBegin; 7229 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7230 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7231 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7232 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7233 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7234 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7235 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7236 PetscValidLogicalCollectiveInt(mat, nis, 8); 7237 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7238 if (nvecs) { 7239 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7240 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7241 } 7242 /* further checks */ 7243 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7244 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7245 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7246 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7247 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7248 if (reuse && *mat_n) { 7249 PetscInt mrows, mcols, mnrows, mncols; 7250 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7251 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7252 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7253 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7254 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7255 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7256 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7257 } 7258 PetscCall(MatGetBlockSize(local_mat, &bs)); 7259 PetscValidLogicalCollectiveInt(mat, bs, 1); 7260 7261 /* prepare IS for sending if not provided */ 7262 if (!is_sends) { 7263 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7264 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7265 } else { 7266 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7267 is_sends_internal = is_sends; 7268 } 7269 7270 /* get comm */ 7271 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7272 7273 /* compute number of sends */ 7274 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7275 PetscCall(PetscMPIIntCast(i, &n_sends)); 7276 7277 /* compute number of receives */ 7278 PetscCallMPI(MPI_Comm_size(comm, &size)); 7279 PetscCall(PetscMalloc1(size, &iflags)); 7280 PetscCall(PetscArrayzero(iflags, size)); 7281 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7282 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7283 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7284 PetscCall(PetscFree(iflags)); 7285 7286 /* restrict comm if requested */ 7287 subcomm = NULL; 7288 destroy_mat = PETSC_FALSE; 7289 if (restrict_comm) { 7290 PetscMPIInt color, subcommsize; 7291 7292 color = 0; 7293 if (restrict_full) { 7294 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7295 } else { 7296 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7297 } 7298 PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7299 subcommsize = size - subcommsize; 7300 /* check if reuse has been requested */ 7301 if (reuse) { 7302 if (*mat_n) { 7303 PetscMPIInt subcommsize2; 7304 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7305 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7306 comm_n = PetscObjectComm((PetscObject)*mat_n); 7307 } else { 7308 comm_n = PETSC_COMM_SELF; 7309 } 7310 } else { /* MAT_INITIAL_MATRIX */ 7311 PetscMPIInt rank; 7312 7313 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7314 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7315 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7316 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7317 comm_n = PetscSubcommChild(subcomm); 7318 } 7319 /* flag to destroy *mat_n if not significative */ 7320 if (color) destroy_mat = PETSC_TRUE; 7321 } else { 7322 comm_n = comm; 7323 } 7324 7325 /* prepare send/receive buffers */ 7326 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 7327 PetscCall(PetscArrayzero(ilengths_idxs, size)); 7328 PetscCall(PetscMalloc1(size, &ilengths_vals)); 7329 PetscCall(PetscArrayzero(ilengths_vals, size)); 7330 if (nis) { PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); } 7331 7332 /* Get data from local matrices */ 7333 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 7334 /* TODO: See below some guidelines on how to prepare the local buffers */ 7335 /* 7336 send_buffer_vals should contain the raw values of the local matrix 7337 send_buffer_idxs should contain: 7338 - MatType_PRIVATE type 7339 - PetscInt size_of_l2gmap 7340 - PetscInt global_row_indices[size_of_l2gmap] 7341 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7342 */ 7343 { 7344 ISLocalToGlobalMapping mapping; 7345 7346 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7347 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 7348 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 7349 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 7350 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7351 send_buffer_idxs[1] = i; 7352 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 7353 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 7354 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 7355 PetscCall(PetscMPIIntCast(i, &len)); 7356 for (i = 0; i < n_sends; i++) { 7357 ilengths_vals[is_indices[i]] = len * len; 7358 ilengths_idxs[is_indices[i]] = len + 2; 7359 } 7360 } 7361 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 7362 /* additional is (if any) */ 7363 if (nis) { 7364 PetscMPIInt psum; 7365 PetscInt j; 7366 for (j = 0, psum = 0; j < nis; j++) { 7367 PetscInt plen; 7368 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7369 PetscCall(PetscMPIIntCast(plen, &len)); 7370 psum += len + 1; /* indices + length */ 7371 } 7372 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 7373 for (j = 0, psum = 0; j < nis; j++) { 7374 PetscInt plen; 7375 const PetscInt *is_array_idxs; 7376 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7377 send_buffer_idxs_is[psum] = plen; 7378 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 7379 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 7380 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 7381 psum += plen + 1; /* indices + length */ 7382 } 7383 for (i = 0; i < n_sends; i++) { ilengths_idxs_is[is_indices[i]] = psum; } 7384 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 7385 } 7386 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7387 7388 buf_size_idxs = 0; 7389 buf_size_vals = 0; 7390 buf_size_idxs_is = 0; 7391 buf_size_vecs = 0; 7392 for (i = 0; i < n_recvs; i++) { 7393 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7394 buf_size_vals += (PetscInt)olengths_vals[i]; 7395 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7396 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7397 } 7398 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 7399 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 7400 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 7401 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 7402 7403 /* get new tags for clean communications */ 7404 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 7405 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 7406 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 7407 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 7408 7409 /* allocate for requests */ 7410 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 7411 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 7412 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 7413 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 7414 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 7415 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 7416 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 7417 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 7418 7419 /* communications */ 7420 ptr_idxs = recv_buffer_idxs; 7421 ptr_vals = recv_buffer_vals; 7422 ptr_idxs_is = recv_buffer_idxs_is; 7423 ptr_vecs = recv_buffer_vecs; 7424 for (i = 0; i < n_recvs; i++) { 7425 source_dest = onodes[i]; 7426 PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i])); 7427 PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i])); 7428 ptr_idxs += olengths_idxs[i]; 7429 ptr_vals += olengths_vals[i]; 7430 if (nis) { 7431 source_dest = onodes_is[i]; 7432 PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i])); 7433 ptr_idxs_is += olengths_idxs_is[i]; 7434 } 7435 if (nvecs) { 7436 source_dest = onodes[i]; 7437 PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i])); 7438 ptr_vecs += olengths_idxs[i] - 2; 7439 } 7440 } 7441 for (i = 0; i < n_sends; i++) { 7442 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 7443 PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 7444 PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 7445 if (nis) { PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); } 7446 if (nvecs) { 7447 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7448 PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 7449 } 7450 } 7451 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 7452 PetscCall(ISDestroy(&is_sends_internal)); 7453 7454 /* assemble new l2g map */ 7455 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 7456 ptr_idxs = recv_buffer_idxs; 7457 new_local_rows = 0; 7458 for (i = 0; i < n_recvs; i++) { 7459 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7460 ptr_idxs += olengths_idxs[i]; 7461 } 7462 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 7463 ptr_idxs = recv_buffer_idxs; 7464 new_local_rows = 0; 7465 for (i = 0; i < n_recvs; i++) { 7466 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 7467 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7468 ptr_idxs += olengths_idxs[i]; 7469 } 7470 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 7471 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 7472 PetscCall(PetscFree(l2gmap_indices)); 7473 7474 /* infer new local matrix type from received local matrices type */ 7475 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7476 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7477 if (n_recvs) { 7478 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7479 ptr_idxs = recv_buffer_idxs; 7480 for (i = 0; i < n_recvs; i++) { 7481 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7482 new_local_type_private = MATAIJ_PRIVATE; 7483 break; 7484 } 7485 ptr_idxs += olengths_idxs[i]; 7486 } 7487 switch (new_local_type_private) { 7488 case MATDENSE_PRIVATE: 7489 new_local_type = MATSEQAIJ; 7490 bs = 1; 7491 break; 7492 case MATAIJ_PRIVATE: 7493 new_local_type = MATSEQAIJ; 7494 bs = 1; 7495 break; 7496 case MATBAIJ_PRIVATE: new_local_type = MATSEQBAIJ; break; 7497 case MATSBAIJ_PRIVATE: new_local_type = MATSEQSBAIJ; break; 7498 default: SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 7499 } 7500 } else { /* by default, new_local_type is seqaij */ 7501 new_local_type = MATSEQAIJ; 7502 bs = 1; 7503 } 7504 7505 /* create MATIS object if needed */ 7506 if (!reuse) { 7507 PetscCall(MatGetSize(mat, &rows, &cols)); 7508 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7509 } else { 7510 /* it also destroys the local matrices */ 7511 if (*mat_n) { 7512 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 7513 } else { /* this is a fake object */ 7514 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7515 } 7516 } 7517 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 7518 PetscCall(MatSetType(local_mat, new_local_type)); 7519 7520 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 7521 7522 /* Global to local map of received indices */ 7523 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 7524 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 7525 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7526 7527 /* restore attributes -> type of incoming data and its size */ 7528 buf_size_idxs = 0; 7529 for (i = 0; i < n_recvs; i++) { 7530 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7531 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 7532 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7533 } 7534 PetscCall(PetscFree(recv_buffer_idxs)); 7535 7536 /* set preallocation */ 7537 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 7538 if (!newisdense) { 7539 PetscInt *new_local_nnz = NULL; 7540 7541 ptr_idxs = recv_buffer_idxs_local; 7542 if (n_recvs) { PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); } 7543 for (i = 0; i < n_recvs; i++) { 7544 PetscInt j; 7545 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7546 for (j = 0; j < *(ptr_idxs + 1); j++) { new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); } 7547 } else { 7548 /* TODO */ 7549 } 7550 ptr_idxs += olengths_idxs[i]; 7551 } 7552 if (new_local_nnz) { 7553 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 7554 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 7555 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 7556 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7557 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 7558 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7559 } else { 7560 PetscCall(MatSetUp(local_mat)); 7561 } 7562 PetscCall(PetscFree(new_local_nnz)); 7563 } else { 7564 PetscCall(MatSetUp(local_mat)); 7565 } 7566 7567 /* set values */ 7568 ptr_vals = recv_buffer_vals; 7569 ptr_idxs = recv_buffer_idxs_local; 7570 for (i = 0; i < n_recvs; i++) { 7571 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7572 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 7573 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 7574 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 7575 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 7576 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 7577 } else { 7578 /* TODO */ 7579 } 7580 ptr_idxs += olengths_idxs[i]; 7581 ptr_vals += olengths_vals[i]; 7582 } 7583 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 7584 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 7585 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 7586 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 7587 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 7588 PetscCall(PetscFree(recv_buffer_vals)); 7589 7590 #if 0 7591 if (!restrict_comm) { /* check */ 7592 Vec lvec,rvec; 7593 PetscReal infty_error; 7594 7595 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7596 PetscCall(VecSetRandom(rvec,NULL)); 7597 PetscCall(MatMult(mat,rvec,lvec)); 7598 PetscCall(VecScale(lvec,-1.0)); 7599 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7600 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7601 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7602 PetscCall(VecDestroy(&rvec)); 7603 PetscCall(VecDestroy(&lvec)); 7604 } 7605 #endif 7606 7607 /* assemble new additional is (if any) */ 7608 if (nis) { 7609 PetscInt **temp_idxs, *count_is, j, psum; 7610 7611 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 7612 PetscCall(PetscCalloc1(nis, &count_is)); 7613 ptr_idxs = recv_buffer_idxs_is; 7614 psum = 0; 7615 for (i = 0; i < n_recvs; i++) { 7616 for (j = 0; j < nis; j++) { 7617 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7618 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7619 psum += plen; 7620 ptr_idxs += plen + 1; /* shift pointer to received data */ 7621 } 7622 } 7623 PetscCall(PetscMalloc1(nis, &temp_idxs)); 7624 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 7625 for (i = 1; i < nis; i++) { temp_idxs[i] = temp_idxs[i - 1] + count_is[i - 1]; } 7626 PetscCall(PetscArrayzero(count_is, nis)); 7627 ptr_idxs = recv_buffer_idxs_is; 7628 for (i = 0; i < n_recvs; i++) { 7629 for (j = 0; j < nis; j++) { 7630 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7631 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 7632 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7633 ptr_idxs += plen + 1; /* shift pointer to received data */ 7634 } 7635 } 7636 for (i = 0; i < nis; i++) { 7637 PetscCall(ISDestroy(&isarray[i])); 7638 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 7639 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 7640 } 7641 PetscCall(PetscFree(count_is)); 7642 PetscCall(PetscFree(temp_idxs[0])); 7643 PetscCall(PetscFree(temp_idxs)); 7644 } 7645 /* free workspace */ 7646 PetscCall(PetscFree(recv_buffer_idxs_is)); 7647 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 7648 PetscCall(PetscFree(send_buffer_idxs)); 7649 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 7650 if (isdense) { 7651 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7652 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 7653 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7654 } else { 7655 /* PetscCall(PetscFree(send_buffer_vals)); */ 7656 } 7657 if (nis) { 7658 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 7659 PetscCall(PetscFree(send_buffer_idxs_is)); 7660 } 7661 7662 if (nvecs) { 7663 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 7664 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 7665 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7666 PetscCall(VecDestroy(&nnsp_vec[0])); 7667 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 7668 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 7669 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 7670 /* set values */ 7671 ptr_vals = recv_buffer_vecs; 7672 ptr_idxs = recv_buffer_idxs_local; 7673 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7674 for (i = 0; i < n_recvs; i++) { 7675 PetscInt j; 7676 for (j = 0; j < *(ptr_idxs + 1); j++) { send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); } 7677 ptr_idxs += olengths_idxs[i]; 7678 ptr_vals += olengths_idxs[i] - 2; 7679 } 7680 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7681 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 7682 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 7683 } 7684 7685 PetscCall(PetscFree(recv_buffer_vecs)); 7686 PetscCall(PetscFree(recv_buffer_idxs_local)); 7687 PetscCall(PetscFree(recv_req_idxs)); 7688 PetscCall(PetscFree(recv_req_vals)); 7689 PetscCall(PetscFree(recv_req_vecs)); 7690 PetscCall(PetscFree(recv_req_idxs_is)); 7691 PetscCall(PetscFree(send_req_idxs)); 7692 PetscCall(PetscFree(send_req_vals)); 7693 PetscCall(PetscFree(send_req_vecs)); 7694 PetscCall(PetscFree(send_req_idxs_is)); 7695 PetscCall(PetscFree(ilengths_vals)); 7696 PetscCall(PetscFree(ilengths_idxs)); 7697 PetscCall(PetscFree(olengths_vals)); 7698 PetscCall(PetscFree(olengths_idxs)); 7699 PetscCall(PetscFree(onodes)); 7700 if (nis) { 7701 PetscCall(PetscFree(ilengths_idxs_is)); 7702 PetscCall(PetscFree(olengths_idxs_is)); 7703 PetscCall(PetscFree(onodes_is)); 7704 } 7705 PetscCall(PetscSubcommDestroy(&subcomm)); 7706 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 7707 PetscCall(MatDestroy(mat_n)); 7708 for (i = 0; i < nis; i++) { PetscCall(ISDestroy(&isarray[i])); } 7709 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7710 PetscCall(VecDestroy(&nnsp_vec[0])); 7711 } 7712 *mat_n = NULL; 7713 } 7714 PetscFunctionReturn(0); 7715 } 7716 7717 /* temporary hack into ksp private data structure */ 7718 #include <petsc/private/kspimpl.h> 7719 7720 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals) { 7721 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7722 PC_IS *pcis = (PC_IS *)pc->data; 7723 Mat coarse_mat, coarse_mat_is, coarse_submat_dense; 7724 Mat coarsedivudotp = NULL; 7725 Mat coarseG, t_coarse_mat_is; 7726 MatNullSpace CoarseNullSpace = NULL; 7727 ISLocalToGlobalMapping coarse_islg; 7728 IS coarse_is, *isarray, corners; 7729 PetscInt i, im_active = -1, active_procs = -1; 7730 PetscInt nis, nisdofs, nisneu, nisvert; 7731 PetscInt coarse_eqs_per_proc; 7732 PC pc_temp; 7733 PCType coarse_pc_type; 7734 KSPType coarse_ksp_type; 7735 PetscBool multilevel_requested, multilevel_allowed; 7736 PetscBool coarse_reuse; 7737 PetscInt ncoarse, nedcfield; 7738 PetscBool compute_vecs = PETSC_FALSE; 7739 PetscScalar *array; 7740 MatReuse coarse_mat_reuse; 7741 PetscBool restr, full_restr, have_void; 7742 PetscMPIInt size; 7743 7744 PetscFunctionBegin; 7745 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 7746 /* Assign global numbering to coarse dofs */ 7747 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7748 PetscInt ocoarse_size; 7749 compute_vecs = PETSC_TRUE; 7750 7751 pcbddc->new_primal_space = PETSC_TRUE; 7752 ocoarse_size = pcbddc->coarse_size; 7753 PetscCall(PetscFree(pcbddc->global_primal_indices)); 7754 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 7755 /* see if we can avoid some work */ 7756 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7757 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7758 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7759 PetscCall(KSPReset(pcbddc->coarse_ksp)); 7760 coarse_reuse = PETSC_FALSE; 7761 } else { /* we can safely reuse already computed coarse matrix */ 7762 coarse_reuse = PETSC_TRUE; 7763 } 7764 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7765 coarse_reuse = PETSC_FALSE; 7766 } 7767 /* reset any subassembling information */ 7768 if (!coarse_reuse || pcbddc->recompute_topography) { PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); } 7769 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7770 coarse_reuse = PETSC_TRUE; 7771 } 7772 if (coarse_reuse && pcbddc->coarse_ksp) { 7773 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 7774 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 7775 coarse_mat_reuse = MAT_REUSE_MATRIX; 7776 } else { 7777 coarse_mat = NULL; 7778 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7779 } 7780 7781 /* creates temporary l2gmap and IS for coarse indexes */ 7782 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 7783 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 7784 7785 /* creates temporary MATIS object for coarse matrix */ 7786 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense)); 7787 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is)); 7788 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense)); 7789 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7790 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7791 PetscCall(MatDestroy(&coarse_submat_dense)); 7792 7793 /* count "active" (i.e. with positive local size) and "void" processes */ 7794 im_active = !!(pcis->n); 7795 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7796 7797 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7798 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 7799 /* full_restr : just use the receivers from the subassembling pattern */ 7800 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 7801 coarse_mat_is = NULL; 7802 multilevel_allowed = PETSC_FALSE; 7803 multilevel_requested = PETSC_FALSE; 7804 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 7805 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 7806 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7807 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 7808 if (multilevel_requested) { 7809 ncoarse = active_procs / pcbddc->coarsening_ratio; 7810 restr = PETSC_FALSE; 7811 full_restr = PETSC_FALSE; 7812 } else { 7813 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 7814 restr = PETSC_TRUE; 7815 full_restr = PETSC_TRUE; 7816 } 7817 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7818 ncoarse = PetscMax(1, ncoarse); 7819 if (!pcbddc->coarse_subassembling) { 7820 if (pcbddc->coarsening_ratio > 1) { 7821 if (multilevel_requested) { 7822 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7823 } else { 7824 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7825 } 7826 } else { 7827 PetscMPIInt rank; 7828 7829 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 7830 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7831 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 7832 } 7833 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7834 PetscInt psum; 7835 if (pcbddc->coarse_ksp) psum = 1; 7836 else psum = 0; 7837 PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7838 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 7839 } 7840 /* determine if we can go multilevel */ 7841 if (multilevel_requested) { 7842 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7843 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7844 } 7845 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7846 7847 /* dump subassembling pattern */ 7848 if (pcbddc->dbg_flag && multilevel_allowed) { PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); } 7849 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7850 nedcfield = -1; 7851 corners = NULL; 7852 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 7853 PetscInt *tidxs, *tidxs2, nout, tsize, i; 7854 const PetscInt *idxs; 7855 ISLocalToGlobalMapping tmap; 7856 7857 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7858 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 7859 /* allocate space for temporary storage */ 7860 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 7861 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 7862 /* allocate for IS array */ 7863 nisdofs = pcbddc->n_ISForDofsLocal; 7864 if (pcbddc->nedclocal) { 7865 if (pcbddc->nedfield > -1) { 7866 nedcfield = pcbddc->nedfield; 7867 } else { 7868 nedcfield = 0; 7869 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 7870 nisdofs = 1; 7871 } 7872 } 7873 nisneu = !!pcbddc->NeumannBoundariesLocal; 7874 nisvert = 0; /* nisvert is not used */ 7875 nis = nisdofs + nisneu + nisvert; 7876 PetscCall(PetscMalloc1(nis, &isarray)); 7877 /* dofs splitting */ 7878 for (i = 0; i < nisdofs; i++) { 7879 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 7880 if (nedcfield != i) { 7881 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 7882 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7883 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7884 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7885 } else { 7886 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 7887 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 7888 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7889 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 7890 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 7891 } 7892 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7893 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 7894 /* PetscCall(ISView(isarray[i],0)); */ 7895 } 7896 /* neumann boundaries */ 7897 if (pcbddc->NeumannBoundariesLocal) { 7898 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 7899 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 7900 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 7901 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7902 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 7903 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7904 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 7905 /* PetscCall(ISView(isarray[nisdofs],0)); */ 7906 } 7907 /* coordinates */ 7908 if (pcbddc->corner_selected) { 7909 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 7910 PetscCall(ISGetLocalSize(corners, &tsize)); 7911 PetscCall(ISGetIndices(corners, &idxs)); 7912 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7913 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 7914 PetscCall(ISRestoreIndices(corners, &idxs)); 7915 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 7916 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7917 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 7918 } 7919 PetscCall(PetscFree(tidxs)); 7920 PetscCall(PetscFree(tidxs2)); 7921 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 7922 } else { 7923 nis = 0; 7924 nisdofs = 0; 7925 nisneu = 0; 7926 nisvert = 0; 7927 isarray = NULL; 7928 } 7929 /* destroy no longer needed map */ 7930 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 7931 7932 /* subassemble */ 7933 if (multilevel_allowed) { 7934 Vec vp[1]; 7935 PetscInt nvecs = 0; 7936 PetscBool reuse, reuser; 7937 7938 if (coarse_mat) reuse = PETSC_TRUE; 7939 else reuse = PETSC_FALSE; 7940 PetscCall(MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7941 vp[0] = NULL; 7942 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7943 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 7944 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 7945 PetscCall(VecSetType(vp[0], VECSTANDARD)); 7946 nvecs = 1; 7947 7948 if (pcbddc->divudotp) { 7949 Mat B, loc_divudotp; 7950 Vec v, p; 7951 IS dummy; 7952 PetscInt np; 7953 7954 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 7955 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 7956 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 7957 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 7958 PetscCall(MatCreateVecs(B, &v, &p)); 7959 PetscCall(VecSet(p, 1.)); 7960 PetscCall(MatMultTranspose(B, p, v)); 7961 PetscCall(VecDestroy(&p)); 7962 PetscCall(MatDestroy(&B)); 7963 PetscCall(VecGetArray(vp[0], &array)); 7964 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 7965 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 7966 PetscCall(VecResetArray(pcbddc->vec1_P)); 7967 PetscCall(VecRestoreArray(vp[0], &array)); 7968 PetscCall(ISDestroy(&dummy)); 7969 PetscCall(VecDestroy(&v)); 7970 } 7971 } 7972 if (reuser) { 7973 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 7974 } else { 7975 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 7976 } 7977 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7978 PetscScalar *arraym; 7979 const PetscScalar *arrayv; 7980 PetscInt nl; 7981 PetscCall(VecGetLocalSize(vp[0], &nl)); 7982 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 7983 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 7984 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 7985 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 7986 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 7987 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 7988 PetscCall(VecDestroy(&vp[0])); 7989 } else { 7990 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 7991 } 7992 } else { 7993 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 7994 } 7995 if (coarse_mat_is || coarse_mat) { 7996 if (!multilevel_allowed) { 7997 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 7998 } else { 7999 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8000 if (coarse_mat_is) { 8001 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8002 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8003 coarse_mat = coarse_mat_is; 8004 } 8005 } 8006 } 8007 PetscCall(MatDestroy(&t_coarse_mat_is)); 8008 PetscCall(MatDestroy(&coarse_mat_is)); 8009 8010 /* create local to global scatters for coarse problem */ 8011 if (compute_vecs) { 8012 PetscInt lrows; 8013 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8014 if (coarse_mat) { 8015 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8016 } else { 8017 lrows = 0; 8018 } 8019 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8020 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8021 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8022 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8023 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8024 } 8025 PetscCall(ISDestroy(&coarse_is)); 8026 8027 /* set defaults for coarse KSP and PC */ 8028 if (multilevel_allowed) { 8029 coarse_ksp_type = KSPRICHARDSON; 8030 coarse_pc_type = PCBDDC; 8031 } else { 8032 coarse_ksp_type = KSPPREONLY; 8033 coarse_pc_type = PCREDUNDANT; 8034 } 8035 8036 /* print some info if requested */ 8037 if (pcbddc->dbg_flag) { 8038 if (!multilevel_allowed) { 8039 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8040 if (multilevel_requested) { 8041 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio)); 8042 } else if (pcbddc->max_levels) { 8043 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8044 } 8045 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8046 } 8047 } 8048 8049 /* communicate coarse discrete gradient */ 8050 coarseG = NULL; 8051 if (pcbddc->nedcG && multilevel_allowed) { 8052 MPI_Comm ccomm; 8053 if (coarse_mat) { 8054 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8055 } else { 8056 ccomm = MPI_COMM_NULL; 8057 } 8058 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8059 } 8060 8061 /* create the coarse KSP object only once with defaults */ 8062 if (coarse_mat) { 8063 PetscBool isredundant, isbddc, force, valid; 8064 PetscViewer dbg_viewer = NULL; 8065 PetscBool isset, issym, isher, isspd; 8066 8067 if (pcbddc->dbg_flag) { 8068 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8069 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8070 } 8071 if (!pcbddc->coarse_ksp) { 8072 char prefix[256], str_level[16]; 8073 size_t len; 8074 8075 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8076 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8077 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8078 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1)); 8079 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8080 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8081 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8082 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8083 /* TODO is this logic correct? should check for coarse_mat type */ 8084 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8085 /* prefix */ 8086 PetscCall(PetscStrcpy(prefix, "")); 8087 PetscCall(PetscStrcpy(str_level, "")); 8088 if (!pcbddc->current_level) { 8089 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8090 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8091 } else { 8092 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8093 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8094 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8095 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8096 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8097 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level))); 8098 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8099 } 8100 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8101 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8102 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8103 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8104 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8105 /* allow user customization */ 8106 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8107 /* get some info after set from options */ 8108 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8109 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8110 force = PETSC_FALSE; 8111 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8112 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8113 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8114 if (multilevel_allowed && !force && !valid) { 8115 isbddc = PETSC_TRUE; 8116 PetscCall(PCSetType(pc_temp, PCBDDC)); 8117 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8118 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8119 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8120 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8121 PetscObjectOptionsBegin((PetscObject)pc_temp); 8122 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8123 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8124 PetscOptionsEnd(); 8125 pc_temp->setfromoptionscalled++; 8126 } 8127 } 8128 } 8129 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8130 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8131 if (nisdofs) { 8132 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8133 for (i = 0; i < nisdofs; i++) { PetscCall(ISDestroy(&isarray[i])); } 8134 } 8135 if (nisneu) { 8136 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8137 PetscCall(ISDestroy(&isarray[nisdofs])); 8138 } 8139 if (nisvert) { 8140 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8141 PetscCall(ISDestroy(&isarray[nis - 1])); 8142 } 8143 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8144 8145 /* get some info after set from options */ 8146 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8147 8148 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8149 if (isbddc && !multilevel_allowed) { PetscCall(PCSetType(pc_temp, coarse_pc_type)); } 8150 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8151 force = PETSC_FALSE; 8152 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8153 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8154 if (multilevel_requested && multilevel_allowed && !valid && !force) { PetscCall(PCSetType(pc_temp, PCBDDC)); } 8155 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8156 if (isredundant) { 8157 KSP inner_ksp; 8158 PC inner_pc; 8159 8160 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8161 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8162 } 8163 8164 /* parameters which miss an API */ 8165 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8166 if (isbddc) { 8167 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8168 8169 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8170 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8171 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8172 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8173 if (pcbddc_coarse->benign_saddle_point) { 8174 Mat coarsedivudotp_is; 8175 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8176 IS row, col; 8177 const PetscInt *gidxs; 8178 PetscInt n, st, M, N; 8179 8180 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8181 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8182 st = st - n; 8183 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8184 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8185 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8186 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8187 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8188 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8189 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8190 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8191 PetscCall(ISGetSize(row, &M)); 8192 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8193 PetscCall(ISDestroy(&row)); 8194 PetscCall(ISDestroy(&col)); 8195 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8196 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8197 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8198 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8199 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8200 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8201 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8202 PetscCall(MatDestroy(&coarsedivudotp)); 8203 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8204 PetscCall(MatDestroy(&coarsedivudotp_is)); 8205 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8206 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8207 } 8208 } 8209 8210 /* propagate symmetry info of coarse matrix */ 8211 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8212 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8213 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8214 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8215 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8216 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8217 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8218 8219 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); } 8220 /* set operators */ 8221 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8222 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8223 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8224 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8225 } 8226 PetscCall(MatDestroy(&coarseG)); 8227 PetscCall(PetscFree(isarray)); 8228 #if 0 8229 { 8230 PetscViewer viewer; 8231 char filename[256]; 8232 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8233 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8234 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8235 PetscCall(MatView(coarse_mat,viewer)); 8236 PetscCall(PetscViewerPopFormat(viewer)); 8237 PetscCall(PetscViewerDestroy(&viewer)); 8238 } 8239 #endif 8240 8241 if (corners) { 8242 Vec gv; 8243 IS is; 8244 const PetscInt *idxs; 8245 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8246 PetscScalar *coords; 8247 8248 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8249 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8250 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8251 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8252 PetscCall(VecSetBlockSize(gv, cdim)); 8253 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8254 PetscCall(VecSetType(gv, VECSTANDARD)); 8255 PetscCall(VecSetFromOptions(gv)); 8256 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8257 8258 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8259 PetscCall(ISGetLocalSize(is, &n)); 8260 PetscCall(ISGetIndices(is, &idxs)); 8261 PetscCall(PetscMalloc1(n * cdim, &coords)); 8262 for (i = 0; i < n; i++) { 8263 for (d = 0; d < cdim; d++) { coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; } 8264 } 8265 PetscCall(ISRestoreIndices(is, &idxs)); 8266 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8267 8268 PetscCall(ISGetLocalSize(corners, &n)); 8269 PetscCall(ISGetIndices(corners, &idxs)); 8270 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8271 PetscCall(ISRestoreIndices(corners, &idxs)); 8272 PetscCall(PetscFree(coords)); 8273 PetscCall(VecAssemblyBegin(gv)); 8274 PetscCall(VecAssemblyEnd(gv)); 8275 PetscCall(VecGetArray(gv, &coords)); 8276 if (pcbddc->coarse_ksp) { 8277 PC coarse_pc; 8278 PetscBool isbddc; 8279 8280 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8281 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8282 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8283 PetscReal *realcoords; 8284 8285 PetscCall(VecGetLocalSize(gv, &n)); 8286 #if defined(PETSC_USE_COMPLEX) 8287 PetscCall(PetscMalloc1(n, &realcoords)); 8288 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8289 #else 8290 realcoords = coords; 8291 #endif 8292 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8293 #if defined(PETSC_USE_COMPLEX) 8294 PetscCall(PetscFree(realcoords)); 8295 #endif 8296 } 8297 } 8298 PetscCall(VecRestoreArray(gv, &coords)); 8299 PetscCall(VecDestroy(&gv)); 8300 } 8301 PetscCall(ISDestroy(&corners)); 8302 8303 if (pcbddc->coarse_ksp) { 8304 Vec crhs, csol; 8305 8306 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 8307 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 8308 if (!csol) { PetscCall(MatCreateVecs(coarse_mat, &((pcbddc->coarse_ksp)->vec_sol), NULL)); } 8309 if (!crhs) { PetscCall(MatCreateVecs(coarse_mat, NULL, &((pcbddc->coarse_ksp)->vec_rhs))); } 8310 } 8311 PetscCall(MatDestroy(&coarsedivudotp)); 8312 8313 /* compute null space for coarse solver if the benign trick has been requested */ 8314 if (pcbddc->benign_null) { 8315 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 8316 for (i = 0; i < pcbddc->benign_n; i++) { PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); } 8317 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8318 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8319 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8320 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8321 if (coarse_mat) { 8322 Vec nullv; 8323 PetscScalar *array, *array2; 8324 PetscInt nl; 8325 8326 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 8327 PetscCall(VecGetLocalSize(nullv, &nl)); 8328 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8329 PetscCall(VecGetArray(nullv, &array2)); 8330 PetscCall(PetscArraycpy(array2, array, nl)); 8331 PetscCall(VecRestoreArray(nullv, &array2)); 8332 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8333 PetscCall(VecNormalize(nullv, NULL)); 8334 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 8335 PetscCall(VecDestroy(&nullv)); 8336 } 8337 } 8338 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8339 8340 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8341 if (pcbddc->coarse_ksp) { 8342 PetscBool ispreonly; 8343 8344 if (CoarseNullSpace) { 8345 PetscBool isnull; 8346 8347 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 8348 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 8349 /* TODO: add local nullspaces (if any) */ 8350 } 8351 /* setup coarse ksp */ 8352 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8353 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8354 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 8355 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8356 KSP check_ksp; 8357 KSPType check_ksp_type; 8358 PC check_pc; 8359 Vec check_vec, coarse_vec; 8360 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 8361 PetscInt its; 8362 PetscBool compute_eigs; 8363 PetscReal *eigs_r, *eigs_c; 8364 PetscInt neigs; 8365 const char *prefix; 8366 8367 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8368 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 8369 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 8370 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 8371 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 8372 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size)); 8373 /* prevent from setup unneeded object */ 8374 PetscCall(KSPGetPC(check_ksp, &check_pc)); 8375 PetscCall(PCSetType(check_pc, PCNONE)); 8376 if (ispreonly) { 8377 check_ksp_type = KSPPREONLY; 8378 compute_eigs = PETSC_FALSE; 8379 } else { 8380 check_ksp_type = KSPGMRES; 8381 compute_eigs = PETSC_TRUE; 8382 } 8383 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 8384 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 8385 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 8386 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 8387 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 8388 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 8389 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 8390 PetscCall(KSPSetFromOptions(check_ksp)); 8391 PetscCall(KSPSetUp(check_ksp)); 8392 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 8393 PetscCall(KSPSetPC(check_ksp, check_pc)); 8394 /* create random vec */ 8395 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 8396 PetscCall(VecSetRandom(check_vec, NULL)); 8397 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8398 /* solve coarse problem */ 8399 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 8400 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 8401 /* set eigenvalue estimation if preonly has not been requested */ 8402 if (compute_eigs) { 8403 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 8404 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 8405 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 8406 if (neigs) { 8407 lambda_max = eigs_r[neigs - 1]; 8408 lambda_min = eigs_r[0]; 8409 if (pcbddc->use_coarse_estimates) { 8410 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8411 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 8412 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 8413 } 8414 } 8415 } 8416 } 8417 8418 /* check coarse problem residual error */ 8419 if (pcbddc->dbg_flag) { 8420 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8421 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8422 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 8423 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 8424 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8425 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 8426 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 8427 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp), dbg_viewer)); 8428 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer)); 8429 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 8430 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 8431 if (CoarseNullSpace) { PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); } 8432 if (compute_eigs) { 8433 PetscReal lambda_max_s, lambda_min_s; 8434 KSPConvergedReason reason; 8435 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 8436 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 8437 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 8438 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 8439 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 8440 for (i = 0; i < neigs; i++) { PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); } 8441 } 8442 PetscCall(PetscViewerFlush(dbg_viewer)); 8443 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8444 } 8445 PetscCall(VecDestroy(&check_vec)); 8446 PetscCall(VecDestroy(&coarse_vec)); 8447 PetscCall(KSPDestroy(&check_ksp)); 8448 if (compute_eigs) { 8449 PetscCall(PetscFree(eigs_r)); 8450 PetscCall(PetscFree(eigs_c)); 8451 } 8452 } 8453 } 8454 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8455 /* print additional info */ 8456 if (pcbddc->dbg_flag) { 8457 /* waits until all processes reaches this point */ 8458 PetscCall(PetscBarrier((PetscObject)pc)); 8459 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 8460 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8461 } 8462 8463 /* free memory */ 8464 PetscCall(MatDestroy(&coarse_mat)); 8465 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8466 PetscFunctionReturn(0); 8467 } 8468 8469 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) { 8470 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8471 PC_IS *pcis = (PC_IS *)pc->data; 8472 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 8473 IS subset, subset_mult, subset_n; 8474 PetscInt local_size, coarse_size = 0; 8475 PetscInt *local_primal_indices = NULL; 8476 const PetscInt *t_local_primal_indices; 8477 8478 PetscFunctionBegin; 8479 /* Compute global number of coarse dofs */ 8480 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 8481 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 8482 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 8483 PetscCall(ISDestroy(&subset_n)); 8484 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 8485 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 8486 PetscCall(ISDestroy(&subset)); 8487 PetscCall(ISDestroy(&subset_mult)); 8488 PetscCall(ISGetLocalSize(subset_n, &local_size)); 8489 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 8490 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 8491 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 8492 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 8493 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 8494 PetscCall(ISDestroy(&subset_n)); 8495 8496 /* check numbering */ 8497 if (pcbddc->dbg_flag) { 8498 PetscScalar coarsesum, *array, *array2; 8499 PetscInt i; 8500 PetscBool set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE; 8501 8502 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8503 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8504 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n")); 8505 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8506 /* counter */ 8507 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8508 PetscCall(VecSet(pcis->vec1_N, 1.0)); 8509 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8510 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8511 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8512 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8513 PetscCall(VecSet(pcis->vec1_N, 0.0)); 8514 for (i = 0; i < pcbddc->local_primal_size; i++) { PetscCall(VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES)); } 8515 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8516 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8517 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8518 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8519 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8520 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8521 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8522 PetscCall(VecGetArray(pcis->vec1_N, &array)); 8523 PetscCall(VecGetArray(pcis->vec2_N, &array2)); 8524 for (i = 0; i < pcis->n; i++) { 8525 if (array[i] != 0.0 && array[i] != array2[i]) { 8526 PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi; 8527 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8528 set_error = PETSC_TRUE; 8529 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi)); 8530 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh)); 8531 } 8532 } 8533 PetscCall(VecRestoreArray(pcis->vec2_N, &array2)); 8534 PetscCall(MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8535 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8536 for (i = 0; i < pcis->n; i++) { 8537 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]); 8538 } 8539 PetscCall(VecRestoreArray(pcis->vec1_N, &array)); 8540 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8541 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8542 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8543 PetscCall(VecSum(pcis->vec1_global, &coarsesum)); 8544 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum))); 8545 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8546 PetscInt *gidxs; 8547 8548 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &gidxs)); 8549 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs)); 8550 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n")); 8551 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8552 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank)); 8553 for (i = 0; i < pcbddc->local_primal_size; i++) { 8554 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i])); 8555 } 8556 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8557 PetscCall(PetscFree(gidxs)); 8558 } 8559 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8560 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8561 PetscCheck(!set_error_reduced, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "BDDC Numbering of coarse dofs failed"); 8562 } 8563 8564 /* get back data */ 8565 *coarse_size_n = coarse_size; 8566 *local_primal_indices_n = local_primal_indices; 8567 PetscFunctionReturn(0); 8568 } 8569 8570 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) { 8571 IS localis_t; 8572 PetscInt i, lsize, *idxs, n; 8573 PetscScalar *vals; 8574 8575 PetscFunctionBegin; 8576 /* get indices in local ordering exploiting local to global map */ 8577 PetscCall(ISGetLocalSize(globalis, &lsize)); 8578 PetscCall(PetscMalloc1(lsize, &vals)); 8579 for (i = 0; i < lsize; i++) vals[i] = 1.0; 8580 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 8581 PetscCall(VecSet(gwork, 0.0)); 8582 PetscCall(VecSet(lwork, 0.0)); 8583 if (idxs) { /* multilevel guard */ 8584 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 8585 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 8586 } 8587 PetscCall(VecAssemblyBegin(gwork)); 8588 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 8589 PetscCall(PetscFree(vals)); 8590 PetscCall(VecAssemblyEnd(gwork)); 8591 /* now compute set in local ordering */ 8592 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8593 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8594 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 8595 PetscCall(VecGetSize(lwork, &n)); 8596 for (i = 0, lsize = 0; i < n; i++) { 8597 if (PetscRealPart(vals[i]) > 0.5) { lsize++; } 8598 } 8599 PetscCall(PetscMalloc1(lsize, &idxs)); 8600 for (i = 0, lsize = 0; i < n; i++) { 8601 if (PetscRealPart(vals[i]) > 0.5) { idxs[lsize++] = i; } 8602 } 8603 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 8604 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 8605 *localis = localis_t; 8606 PetscFunctionReturn(0); 8607 } 8608 8609 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) { 8610 PC_IS *pcis = (PC_IS *)pc->data; 8611 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8612 PC_IS *pcisf; 8613 PC_BDDC *pcbddcf; 8614 PC pcf; 8615 8616 PetscFunctionBegin; 8617 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 8618 PetscCall(PetscLogObjectParent((PetscObject)pc, (PetscObject)pcf)); 8619 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 8620 PetscCall(PCSetType(pcf, PCBDDC)); 8621 8622 pcisf = (PC_IS *)pcf->data; 8623 pcbddcf = (PC_BDDC *)pcf->data; 8624 8625 pcisf->is_B_local = pcis->is_B_local; 8626 pcisf->vec1_N = pcis->vec1_N; 8627 pcisf->BtoNmap = pcis->BtoNmap; 8628 pcisf->n = pcis->n; 8629 pcisf->n_B = pcis->n_B; 8630 8631 PetscCall(PetscFree(pcbddcf->mat_graph)); 8632 PetscCall(PetscFree(pcbddcf->sub_schurs)); 8633 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 8634 pcbddcf->sub_schurs = schurs; 8635 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 8636 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 8637 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 8638 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 8639 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 8640 pcbddcf->use_faces = PETSC_TRUE; 8641 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 8642 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 8643 pcbddcf->use_qr_single = (PetscBool)!constraints; 8644 pcbddcf->fake_change = PETSC_TRUE; 8645 pcbddcf->dbg_flag = pcbddc->dbg_flag; 8646 8647 PetscCall(PCBDDCAdaptiveSelection(pcf)); 8648 PetscCall(PCBDDCConstraintsSetUp(pcf)); 8649 8650 *change = pcbddcf->ConstraintMatrix; 8651 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 8652 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 8653 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 8654 8655 if (schurs) pcbddcf->sub_schurs = NULL; 8656 pcbddcf->ConstraintMatrix = NULL; 8657 pcbddcf->mat_graph = NULL; 8658 pcisf->is_B_local = NULL; 8659 pcisf->vec1_N = NULL; 8660 pcisf->BtoNmap = NULL; 8661 PetscCall(PCDestroy(&pcf)); 8662 PetscFunctionReturn(0); 8663 } 8664 8665 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) { 8666 PC_IS *pcis = (PC_IS *)pc->data; 8667 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8668 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 8669 Mat S_j; 8670 PetscInt *used_xadj, *used_adjncy; 8671 PetscBool free_used_adj; 8672 8673 PetscFunctionBegin; 8674 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8675 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8676 free_used_adj = PETSC_FALSE; 8677 if (pcbddc->sub_schurs_layers == -1) { 8678 used_xadj = NULL; 8679 used_adjncy = NULL; 8680 } else { 8681 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8682 used_xadj = pcbddc->mat_graph->xadj; 8683 used_adjncy = pcbddc->mat_graph->adjncy; 8684 } else if (pcbddc->computed_rowadj) { 8685 used_xadj = pcbddc->mat_graph->xadj; 8686 used_adjncy = pcbddc->mat_graph->adjncy; 8687 } else { 8688 PetscBool flg_row = PETSC_FALSE; 8689 const PetscInt *xadj, *adjncy; 8690 PetscInt nvtxs; 8691 8692 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8693 if (flg_row) { 8694 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 8695 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 8696 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 8697 free_used_adj = PETSC_TRUE; 8698 } else { 8699 pcbddc->sub_schurs_layers = -1; 8700 used_xadj = NULL; 8701 used_adjncy = NULL; 8702 } 8703 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8704 } 8705 } 8706 8707 /* setup sub_schurs data */ 8708 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 8709 if (!sub_schurs->schur_explicit) { 8710 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8711 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 8712 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 8713 } else { 8714 Mat change = NULL; 8715 Vec scaling = NULL; 8716 IS change_primal = NULL, iP; 8717 PetscInt benign_n; 8718 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8719 PetscBool need_change = PETSC_FALSE; 8720 PetscBool discrete_harmonic = PETSC_FALSE; 8721 8722 if (!pcbddc->use_vertices && reuse_solvers) { 8723 PetscInt n_vertices; 8724 8725 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 8726 reuse_solvers = (PetscBool)!n_vertices; 8727 } 8728 if (!pcbddc->benign_change_explicit) { 8729 benign_n = pcbddc->benign_n; 8730 } else { 8731 benign_n = 0; 8732 } 8733 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8734 We need a global reduction to avoid possible deadlocks. 8735 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8736 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8737 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8738 PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8739 need_change = (PetscBool)(!need_change); 8740 } 8741 /* If the user defines additional constraints, we import them here */ 8742 if (need_change) { 8743 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 8744 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 8745 } 8746 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8747 8748 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 8749 if (iP) { 8750 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 8751 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 8752 PetscOptionsEnd(); 8753 } 8754 if (discrete_harmonic) { 8755 Mat A; 8756 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 8757 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 8758 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 8759 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 8760 pcbddc->benign_zerodiag_subs, change, change_primal)); 8761 PetscCall(MatDestroy(&A)); 8762 } else { 8763 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 8764 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 8765 } 8766 PetscCall(MatDestroy(&change)); 8767 PetscCall(ISDestroy(&change_primal)); 8768 } 8769 PetscCall(MatDestroy(&S_j)); 8770 8771 /* free adjacency */ 8772 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 8773 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8774 PetscFunctionReturn(0); 8775 } 8776 8777 PetscErrorCode PCBDDCInitSubSchurs(PC pc) { 8778 PC_IS *pcis = (PC_IS *)pc->data; 8779 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8780 PCBDDCGraph graph; 8781 8782 PetscFunctionBegin; 8783 /* attach interface graph for determining subsets */ 8784 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8785 IS verticesIS, verticescomm; 8786 PetscInt vsize, *idxs; 8787 8788 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8789 PetscCall(ISGetSize(verticesIS, &vsize)); 8790 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 8791 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 8792 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 8793 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8794 PetscCall(PCBDDCGraphCreate(&graph)); 8795 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 8796 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 8797 PetscCall(ISDestroy(&verticescomm)); 8798 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 8799 } else { 8800 graph = pcbddc->mat_graph; 8801 } 8802 /* print some info */ 8803 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8804 IS vertices; 8805 PetscInt nv, nedges, nfaces; 8806 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 8807 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8808 PetscCall(ISGetSize(vertices, &nv)); 8809 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8810 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 8811 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 8812 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 8813 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 8814 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8815 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 8816 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8817 } 8818 8819 /* sub_schurs init */ 8820 if (!pcbddc->sub_schurs) { PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); } 8821 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 8822 8823 /* free graph struct */ 8824 if (pcbddc->sub_schurs_rebuild) { PetscCall(PCBDDCGraphDestroy(&graph)); } 8825 PetscFunctionReturn(0); 8826 } 8827 8828 PetscErrorCode PCBDDCCheckOperator(PC pc) { 8829 PC_IS *pcis = (PC_IS *)pc->data; 8830 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8831 8832 PetscFunctionBegin; 8833 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8834 IS zerodiag = NULL; 8835 Mat S_j, B0_B = NULL; 8836 Vec dummy_vec = NULL, vec_check_B, vec_scale_P; 8837 PetscScalar *p0_check, *array, *array2; 8838 PetscReal norm; 8839 PetscInt i; 8840 8841 /* B0 and B0_B */ 8842 if (zerodiag) { 8843 IS dummy; 8844 8845 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &dummy)); 8846 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 8847 PetscCall(MatCreateVecs(B0_B, NULL, &dummy_vec)); 8848 PetscCall(ISDestroy(&dummy)); 8849 } 8850 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8851 PetscCall(VecDuplicate(pcbddc->vec1_P, &vec_scale_P)); 8852 PetscCall(VecSet(pcbddc->vec1_P, 1.0)); 8853 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8854 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8855 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE)); 8856 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE)); 8857 PetscCall(VecReciprocal(vec_scale_P)); 8858 /* S_j */ 8859 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 8860 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 8861 8862 /* mimic vector in \widetilde{W}_\Gamma */ 8863 PetscCall(VecSetRandom(pcis->vec1_N, NULL)); 8864 /* continuous in primal space */ 8865 PetscCall(VecSetRandom(pcbddc->coarse_vec, NULL)); 8866 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8867 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8868 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 8869 PetscCall(PetscCalloc1(pcbddc->benign_n, &p0_check)); 8870 for (i = 0; i < pcbddc->benign_n; i++) p0_check[i] = array[pcbddc->local_primal_size - pcbddc->benign_n + i]; 8871 PetscCall(VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES)); 8872 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 8873 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8874 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8875 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD)); 8876 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD)); 8877 PetscCall(VecDuplicate(pcis->vec2_B, &vec_check_B)); 8878 PetscCall(VecCopy(pcis->vec2_B, vec_check_B)); 8879 8880 /* assemble rhs for coarse problem */ 8881 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8882 /* local with Schur */ 8883 PetscCall(MatMult(S_j, pcis->vec2_B, pcis->vec1_B)); 8884 if (zerodiag) { 8885 PetscCall(VecGetArray(dummy_vec, &array)); 8886 for (i = 0; i < pcbddc->benign_n; i++) array[i] = p0_check[i]; 8887 PetscCall(VecRestoreArray(dummy_vec, &array)); 8888 PetscCall(MatMultTransposeAdd(B0_B, dummy_vec, pcis->vec1_B, pcis->vec1_B)); 8889 } 8890 /* sum on primal nodes the local contributions */ 8891 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE)); 8892 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE)); 8893 PetscCall(VecGetArray(pcis->vec1_N, &array)); 8894 PetscCall(VecGetArray(pcbddc->vec1_P, &array2)); 8895 for (i = 0; i < pcbddc->local_primal_size; i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8896 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array2)); 8897 PetscCall(VecRestoreArray(pcis->vec1_N, &array)); 8898 PetscCall(VecSet(pcbddc->coarse_vec, 0.)); 8899 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8900 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8901 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8902 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8903 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 8904 /* scale primal nodes (BDDC sums contibutions) */ 8905 PetscCall(VecPointwiseMult(pcbddc->vec1_P, vec_scale_P, pcbddc->vec1_P)); 8906 PetscCall(VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES)); 8907 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 8908 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8909 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8910 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 8911 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 8912 /* global: \widetilde{B0}_B w_\Gamma */ 8913 if (zerodiag) { 8914 PetscCall(MatMult(B0_B, pcis->vec2_B, dummy_vec)); 8915 PetscCall(VecGetArray(dummy_vec, &array)); 8916 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = array[i]; 8917 PetscCall(VecRestoreArray(dummy_vec, &array)); 8918 } 8919 /* BDDC */ 8920 PetscCall(VecSet(pcis->vec1_D, 0.)); 8921 PetscCall(PCBDDCApplyInterfacePreconditioner(pc, PETSC_FALSE)); 8922 8923 PetscCall(VecCopy(pcis->vec1_B, pcis->vec2_B)); 8924 PetscCall(VecAXPY(pcis->vec1_B, -1.0, vec_check_B)); 8925 PetscCall(VecNorm(pcis->vec1_B, NORM_INFINITY, &norm)); 8926 PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC local error is %1.4e\n", PetscGlobalRank, (double)norm)); 8927 for (i = 0; i < pcbddc->benign_n; i++) { PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n", PetscGlobalRank, i, (double)PetscAbsScalar(pcbddc->benign_p0[i] - p0_check[i]))); } 8928 PetscCall(PetscFree(p0_check)); 8929 PetscCall(VecDestroy(&vec_scale_P)); 8930 PetscCall(VecDestroy(&vec_check_B)); 8931 PetscCall(VecDestroy(&dummy_vec)); 8932 PetscCall(MatDestroy(&S_j)); 8933 PetscCall(MatDestroy(&B0_B)); 8934 } 8935 PetscFunctionReturn(0); 8936 } 8937 8938 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8939 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) { 8940 Mat At; 8941 IS rows; 8942 PetscInt rst, ren; 8943 PetscLayout rmap; 8944 8945 PetscFunctionBegin; 8946 rst = ren = 0; 8947 if (ccomm != MPI_COMM_NULL) { 8948 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 8949 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 8950 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 8951 PetscCall(PetscLayoutSetUp(rmap)); 8952 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 8953 } 8954 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 8955 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 8956 PetscCall(ISDestroy(&rows)); 8957 8958 if (ccomm != MPI_COMM_NULL) { 8959 Mat_MPIAIJ *a, *b; 8960 IS from, to; 8961 Vec gvec; 8962 PetscInt lsize; 8963 8964 PetscCall(MatCreate(ccomm, B)); 8965 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 8966 PetscCall(MatSetType(*B, MATAIJ)); 8967 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 8968 PetscCall(PetscLayoutSetUp((*B)->cmap)); 8969 a = (Mat_MPIAIJ *)At->data; 8970 b = (Mat_MPIAIJ *)(*B)->data; 8971 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 8972 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 8973 PetscCall(PetscObjectReference((PetscObject)a->A)); 8974 PetscCall(PetscObjectReference((PetscObject)a->B)); 8975 b->A = a->A; 8976 b->B = a->B; 8977 8978 b->donotstash = a->donotstash; 8979 b->roworiented = a->roworiented; 8980 b->rowindices = NULL; 8981 b->rowvalues = NULL; 8982 b->getrowactive = PETSC_FALSE; 8983 8984 (*B)->rmap = rmap; 8985 (*B)->factortype = A->factortype; 8986 (*B)->assembled = PETSC_TRUE; 8987 (*B)->insertmode = NOT_SET_VALUES; 8988 (*B)->preallocated = PETSC_TRUE; 8989 8990 if (a->colmap) { 8991 #if defined(PETSC_USE_CTABLE) 8992 PetscCall(PetscTableCreateCopy(a->colmap, &b->colmap)); 8993 #else 8994 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 8995 PetscCall(PetscLogObjectMemory((PetscObject)*B, At->cmap->N * sizeof(PetscInt))); 8996 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 8997 #endif 8998 } else b->colmap = NULL; 8999 if (a->garray) { 9000 PetscInt len; 9001 len = a->B->cmap->n; 9002 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9003 PetscCall(PetscLogObjectMemory((PetscObject)(*B), len * sizeof(PetscInt))); 9004 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9005 } else b->garray = NULL; 9006 9007 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9008 b->lvec = a->lvec; 9009 PetscCall(PetscLogObjectParent((PetscObject)*B, (PetscObject)b->lvec)); 9010 9011 /* cannot use VecScatterCopy */ 9012 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9013 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9014 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9015 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9016 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9017 PetscCall(PetscLogObjectParent((PetscObject)*B, (PetscObject)b->Mvctx)); 9018 PetscCall(ISDestroy(&from)); 9019 PetscCall(ISDestroy(&to)); 9020 PetscCall(VecDestroy(&gvec)); 9021 } 9022 PetscCall(MatDestroy(&At)); 9023 PetscFunctionReturn(0); 9024 } 9025