1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork, *data, *U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM, bN, lwork, lierr, di = 1; 20 PetscInt ulw, i, nr, nc, n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A, &nr, &nc)); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 32 PetscCall(PetscMalloc1(ulw, &uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr, nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n, &sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr * nr, &U)); 46 PetscCall(PetscBLASIntCast(nr, &bM)); 47 PetscCall(PetscBLASIntCast(nc, &bN)); 48 PetscCall(PetscBLASIntCast(ulw, &lwork)); 49 PetscCall(MatDenseGetArray(A, &data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 53 #else 54 PetscCall(PetscMalloc1(5 * n, &rwork2)); 55 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 60 PetscCall(MatDenseRestoreArray(A, &data)); 61 for (i = 0; i < n; i++) 62 if (sing[i] < PETSC_SMALL) break; 63 if (!rwork) PetscCall(PetscFree(sing)); 64 if (!work) PetscCall(PetscFree(uwork)); 65 /* create B */ 66 if (!range) { 67 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 68 PetscCall(MatDenseGetArray(*B, &data)); 69 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 70 } else { 71 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 72 PetscCall(MatDenseGetArray(*B, &data)); 73 PetscCall(PetscArraycpy(data, U, i * nr)); 74 } 75 PetscCall(MatDenseRestoreArray(*B, &data)); 76 PetscCall(PetscFree(U)); 77 PetscFunctionReturn(0); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 Mat GE, GEd; 89 PetscInt rsize, csize, esize; 90 PetscScalar *ptr; 91 92 PetscFunctionBegin; 93 PetscCall(ISGetSize(edge, &esize)); 94 if (!esize) PetscFunctionReturn(0); 95 PetscCall(ISGetSize(extrow, &rsize)); 96 PetscCall(ISGetSize(extcol, &csize)); 97 98 /* gradients */ 99 ptr = work + 5 * esize; 100 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 101 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 102 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 103 PetscCall(MatDestroy(&GE)); 104 105 /* constants */ 106 ptr += rsize * csize; 107 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 108 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 109 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 110 PetscCall(MatDestroy(&GE)); 111 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 112 PetscCall(MatDestroy(&GEd)); 113 114 if (corners) { 115 Mat GEc; 116 const PetscScalar *vals; 117 PetscScalar v; 118 119 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 120 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 121 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 122 /* v = PetscAbsScalar(vals[0]) */; 123 v = 1.; 124 cvals[0] = vals[0] / v; 125 cvals[1] = vals[1] / v; 126 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 127 PetscCall(MatScale(*GKins, 1. / v)); 128 #if defined(PRINT_GDET) 129 { 130 PetscViewer viewer; 131 char filename[256]; 132 sprintf(filename, "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++); 133 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 134 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 135 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 136 PetscCall(MatView(GEc, viewer)); 137 PetscCall(PetscObjectSetName((PetscObject)(*GKins), "GK")); 138 PetscCall(MatView(*GKins, viewer)); 139 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 140 PetscCall(MatView(GEd, viewer)); 141 PetscCall(PetscViewerDestroy(&viewer)); 142 } 143 #endif 144 PetscCall(MatDestroy(&GEd)); 145 PetscCall(MatDestroy(&GEc)); 146 } 147 148 PetscFunctionReturn(0); 149 } 150 151 PetscErrorCode PCBDDCNedelecSupport(PC pc) 152 { 153 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 154 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 155 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 156 Vec tvec; 157 PetscSF sfv; 158 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 159 MPI_Comm comm; 160 IS lned, primals, allprimals, nedfieldlocal; 161 IS *eedges, *extrows, *extcols, *alleedges; 162 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 163 PetscScalar *vals, *work; 164 PetscReal *rwork; 165 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 166 PetscInt ne, nv, Lv, order, n, field; 167 PetscInt n_neigh, *neigh, *n_shared, **shared; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, singular, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 singular = PETSC_FALSE; 187 188 /* Command line customization */ 189 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 190 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 191 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL)); 192 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 193 /* print debug info TODO: to be removed */ 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 195 PetscOptionsEnd(); 196 197 /* Return if there are no edges in the decomposition and the problem is not singular */ 198 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 199 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 200 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 201 if (!singular) { 202 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 203 lrc[0] = PETSC_FALSE; 204 for (i = 0; i < n; i++) { 205 if (PetscRealPart(vals[i]) > 2.) { 206 lrc[0] = PETSC_TRUE; 207 break; 208 } 209 } 210 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 211 PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 212 if (!lrc[1]) PetscFunctionReturn(0); 213 } 214 215 /* Get Nedelec field */ 216 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 217 if (pcbddc->n_ISForDofsLocal && field >= 0) { 218 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 219 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 220 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 221 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 222 ne = n; 223 nedfieldlocal = NULL; 224 global = PETSC_TRUE; 225 } else if (field == PETSC_DECIDE) { 226 PetscInt rst, ren, *idx; 227 228 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 229 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 230 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 231 for (i = rst; i < ren; i++) { 232 PetscInt nc; 233 234 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 235 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 236 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 237 } 238 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 239 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 240 PetscCall(PetscMalloc1(n, &idx)); 241 for (i = 0, ne = 0; i < n; i++) 242 if (matis->sf_leafdata[i]) idx[ne++] = i; 243 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 244 } else { 245 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 246 } 247 248 /* Sanity checks */ 249 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 250 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 251 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 252 253 /* Just set primal dofs and return */ 254 if (setprimal) { 255 IS enedfieldlocal; 256 PetscInt *eidxs; 257 258 PetscCall(PetscMalloc1(ne, &eidxs)); 259 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 260 if (nedfieldlocal) { 261 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 262 for (i = 0, cum = 0; i < ne; i++) { 263 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 264 } 265 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 266 } else { 267 for (i = 0, cum = 0; i < ne; i++) { 268 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 269 } 270 } 271 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 272 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 273 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 274 PetscCall(PetscFree(eidxs)); 275 PetscCall(ISDestroy(&nedfieldlocal)); 276 PetscCall(ISDestroy(&enedfieldlocal)); 277 PetscFunctionReturn(0); 278 } 279 280 /* Compute some l2g maps */ 281 if (nedfieldlocal) { 282 IS is; 283 284 /* need to map from the local Nedelec field to local numbering */ 285 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 286 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 287 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 288 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 289 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 290 if (global) { 291 PetscCall(PetscObjectReference((PetscObject)al2g)); 292 el2g = al2g; 293 } else { 294 IS gis; 295 296 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 297 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 298 PetscCall(ISDestroy(&gis)); 299 } 300 PetscCall(ISDestroy(&is)); 301 } else { 302 /* restore default */ 303 pcbddc->nedfield = -1; 304 /* one ref for the destruction of al2g, one for el2g */ 305 PetscCall(PetscObjectReference((PetscObject)al2g)); 306 PetscCall(PetscObjectReference((PetscObject)al2g)); 307 el2g = al2g; 308 fl2g = NULL; 309 } 310 311 /* Start communication to drop connections for interior edges (for cc analysis only) */ 312 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 313 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 314 if (nedfieldlocal) { 315 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 316 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 317 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 318 } else { 319 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 320 } 321 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 322 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 323 324 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 325 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 326 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 327 if (global) { 328 PetscInt rst; 329 330 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 331 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 332 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 333 } 334 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 335 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 336 } else { 337 PetscInt *tbz; 338 339 PetscCall(PetscMalloc1(ne, &tbz)); 340 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 342 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 343 for (i = 0, cum = 0; i < ne; i++) 344 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 345 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 346 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 347 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 348 PetscCall(PetscFree(tbz)); 349 } 350 } else { /* we need the entire G to infer the nullspace */ 351 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 352 G = pcbddc->discretegradient; 353 } 354 355 /* Extract subdomain relevant rows of G */ 356 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 357 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 358 PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); 359 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 360 PetscCall(ISDestroy(&lned)); 361 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 362 PetscCall(MatDestroy(&lGall)); 363 PetscCall(MatISGetLocalMat(lGis, &lG)); 364 365 /* SF for nodal dofs communications */ 366 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 367 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 368 PetscCall(PetscObjectReference((PetscObject)vl2g)); 369 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 370 PetscCall(PetscSFCreate(comm, &sfv)); 371 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 372 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 373 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 374 i = singular ? 2 : 1; 375 PetscCall(PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots)); 376 377 /* Destroy temporary G created in MATIS format and modified G */ 378 PetscCall(PetscObjectReference((PetscObject)lG)); 379 PetscCall(MatDestroy(&lGis)); 380 PetscCall(MatDestroy(&G)); 381 382 if (print) { 383 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 384 PetscCall(MatView(lG, NULL)); 385 } 386 387 /* Save lG for values insertion in change of basis */ 388 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 389 390 /* Analyze the edge-nodes connections (duplicate lG) */ 391 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 392 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 393 PetscCall(PetscBTCreate(nv, &btv)); 394 PetscCall(PetscBTCreate(ne, &bte)); 395 PetscCall(PetscBTCreate(ne, &btb)); 396 PetscCall(PetscBTCreate(ne, &btbd)); 397 PetscCall(PetscBTCreate(nv, &btvcand)); 398 /* need to import the boundary specification to ensure the 399 proper detection of coarse edges' endpoints */ 400 if (pcbddc->DirichletBoundariesLocal) { 401 IS is; 402 403 if (fl2g) { 404 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 405 } else { 406 is = pcbddc->DirichletBoundariesLocal; 407 } 408 PetscCall(ISGetLocalSize(is, &cum)); 409 PetscCall(ISGetIndices(is, &idxs)); 410 for (i = 0; i < cum; i++) { 411 if (idxs[i] >= 0) { 412 PetscCall(PetscBTSet(btb, idxs[i])); 413 PetscCall(PetscBTSet(btbd, idxs[i])); 414 } 415 } 416 PetscCall(ISRestoreIndices(is, &idxs)); 417 if (fl2g) PetscCall(ISDestroy(&is)); 418 } 419 if (pcbddc->NeumannBoundariesLocal) { 420 IS is; 421 422 if (fl2g) { 423 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 424 } else { 425 is = pcbddc->NeumannBoundariesLocal; 426 } 427 PetscCall(ISGetLocalSize(is, &cum)); 428 PetscCall(ISGetIndices(is, &idxs)); 429 for (i = 0; i < cum; i++) { 430 if (idxs[i] >= 0) PetscCall(PetscBTSet(btb, idxs[i])); 431 } 432 PetscCall(ISRestoreIndices(is, &idxs)); 433 if (fl2g) PetscCall(ISDestroy(&is)); 434 } 435 436 /* Count neighs per dof */ 437 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs)); 438 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs)); 439 440 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 441 for proper detection of coarse edges' endpoints */ 442 PetscCall(PetscBTCreate(ne, &btee)); 443 for (i = 0; i < ne; i++) { 444 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 445 } 446 PetscCall(PetscMalloc1(ne, &marks)); 447 if (!conforming) { 448 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 449 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 450 } 451 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 452 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 453 cum = 0; 454 for (i = 0; i < ne; i++) { 455 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 456 if (!PetscBTLookup(btee, i)) { 457 marks[cum++] = i; 458 continue; 459 } 460 /* set badly connected edge dofs as primal */ 461 if (!conforming) { 462 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 463 marks[cum++] = i; 464 PetscCall(PetscBTSet(bte, i)); 465 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 466 } else { 467 /* every edge dofs should be connected trough a certain number of nodal dofs 468 to other edge dofs belonging to coarse edges 469 - at most 2 endpoints 470 - order-1 interior nodal dofs 471 - no undefined nodal dofs (nconn < order) 472 */ 473 PetscInt ends = 0, ints = 0, undef = 0; 474 for (j = ii[i]; j < ii[i + 1]; j++) { 475 PetscInt v = jj[j], k; 476 PetscInt nconn = iit[v + 1] - iit[v]; 477 for (k = iit[v]; k < iit[v + 1]; k++) 478 if (!PetscBTLookup(btee, jjt[k])) nconn--; 479 if (nconn > order) ends++; 480 else if (nconn == order) ints++; 481 else undef++; 482 } 483 if (undef || ends > 2 || ints != order - 1) { 484 marks[cum++] = i; 485 PetscCall(PetscBTSet(bte, i)); 486 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 487 } 488 } 489 } 490 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 491 if (!order && ii[i + 1] != ii[i]) { 492 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 493 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 494 } 495 } 496 PetscCall(PetscBTDestroy(&btee)); 497 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 498 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 499 if (!conforming) { 500 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 501 PetscCall(MatDestroy(&lGt)); 502 } 503 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 504 505 /* identify splitpoints and corner candidates */ 506 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 507 if (print) { 508 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 509 PetscCall(MatView(lGe, NULL)); 510 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 511 PetscCall(MatView(lGt, NULL)); 512 } 513 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 514 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 515 for (i = 0; i < nv; i++) { 516 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 517 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 518 if (!order) { /* variable order */ 519 PetscReal vorder = 0.; 520 521 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 522 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 523 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 524 ord = 1; 525 } 526 PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord); 527 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 528 if (PetscBTLookup(btbd, jj[j])) { 529 bdir = PETSC_TRUE; 530 break; 531 } 532 if (vc != ecount[jj[j]]) { 533 sneighs = PETSC_FALSE; 534 } else { 535 PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]]; 536 for (k = 0; k < vc; k++) { 537 if (vn[k] != en[k]) { 538 sneighs = PETSC_FALSE; 539 break; 540 } 541 } 542 } 543 } 544 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 545 if (print) PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]); 546 PetscCall(PetscBTSet(btv, i)); 547 } else if (test == ord) { 548 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 549 if (print) PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i); 550 PetscCall(PetscBTSet(btv, i)); 551 } else { 552 if (print) PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i); 553 PetscCall(PetscBTSet(btvcand, i)); 554 } 555 } 556 } 557 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 558 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 559 PetscCall(PetscBTDestroy(&btbd)); 560 561 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 562 if (order != 1) { 563 if (print) PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"); 564 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 565 for (i = 0; i < nv; i++) { 566 if (PetscBTLookup(btvcand, i)) { 567 PetscBool found = PETSC_FALSE; 568 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 569 PetscInt k, e = jj[j]; 570 if (PetscBTLookup(bte, e)) continue; 571 for (k = iit[e]; k < iit[e + 1]; k++) { 572 PetscInt v = jjt[k]; 573 if (v != i && PetscBTLookup(btvcand, v)) { 574 found = PETSC_TRUE; 575 break; 576 } 577 } 578 } 579 if (!found) { 580 if (print) PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i); 581 PetscCall(PetscBTClear(btvcand, i)); 582 } else { 583 if (print) PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i); 584 } 585 } 586 } 587 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 588 } 589 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 590 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 591 PetscCall(MatDestroy(&lGe)); 592 593 /* Get the local G^T explicitly */ 594 PetscCall(MatDestroy(&lGt)); 595 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 596 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 597 598 /* Mark interior nodal dofs */ 599 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 600 PetscCall(PetscBTCreate(nv, &btvi)); 601 for (i = 1; i < n_neigh; i++) { 602 for (j = 0; j < n_shared[i]; j++) PetscCall(PetscBTSet(btvi, shared[i][j])); 603 } 604 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 605 606 /* communicate corners and splitpoints */ 607 PetscCall(PetscMalloc1(nv, &vmarks)); 608 PetscCall(PetscArrayzero(sfvleaves, nv)); 609 PetscCall(PetscArrayzero(sfvroots, Lv)); 610 for (i = 0; i < nv; i++) 611 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 612 613 if (print) { 614 IS tbz; 615 616 cum = 0; 617 for (i = 0; i < nv; i++) 618 if (sfvleaves[i]) vmarks[cum++] = i; 619 620 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 621 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 622 PetscCall(ISView(tbz, NULL)); 623 PetscCall(ISDestroy(&tbz)); 624 } 625 626 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 627 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 628 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 629 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 630 631 /* Zero rows of lGt corresponding to identified corners 632 and interior nodal dofs */ 633 cum = 0; 634 for (i = 0; i < nv; i++) { 635 if (sfvleaves[i]) { 636 vmarks[cum++] = i; 637 PetscCall(PetscBTSet(btv, i)); 638 } 639 if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 640 } 641 PetscCall(PetscBTDestroy(&btvi)); 642 if (print) { 643 IS tbz; 644 645 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 646 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 647 PetscCall(ISView(tbz, NULL)); 648 PetscCall(ISDestroy(&tbz)); 649 } 650 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 651 PetscCall(PetscFree(vmarks)); 652 PetscCall(PetscSFDestroy(&sfv)); 653 PetscCall(PetscFree2(sfvleaves, sfvroots)); 654 655 /* Recompute G */ 656 PetscCall(MatDestroy(&lG)); 657 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 658 if (print) { 659 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 660 PetscCall(MatView(lG, NULL)); 661 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 662 PetscCall(MatView(lGt, NULL)); 663 } 664 665 /* Get primal dofs (if any) */ 666 cum = 0; 667 for (i = 0; i < ne; i++) { 668 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 669 } 670 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 671 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 672 if (print) { 673 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 674 PetscCall(ISView(primals, NULL)); 675 } 676 PetscCall(PetscBTDestroy(&bte)); 677 /* TODO: what if the user passed in some of them ? */ 678 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 679 PetscCall(ISDestroy(&primals)); 680 681 /* Compute edge connectivity */ 682 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 683 684 /* Symbolic conn = lG*lGt */ 685 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 686 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 687 PetscCall(MatProductSetAlgorithm(conn, "default")); 688 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 689 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 690 PetscCall(MatProductSetFromOptions(conn)); 691 PetscCall(MatProductSymbolic(conn)); 692 693 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 694 if (fl2g) { 695 PetscBT btf; 696 PetscInt *iia, *jja, *iiu, *jju; 697 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 698 699 /* create CSR for all local dofs */ 700 PetscCall(PetscMalloc1(n + 1, &iia)); 701 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 702 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 703 iiu = pcbddc->mat_graph->xadj; 704 jju = pcbddc->mat_graph->adjncy; 705 } else if (pcbddc->use_local_adj) { 706 rest = PETSC_TRUE; 707 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 708 } else { 709 free = PETSC_TRUE; 710 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 711 iiu[0] = 0; 712 for (i = 0; i < n; i++) { 713 iiu[i + 1] = i + 1; 714 jju[i] = -1; 715 } 716 } 717 718 /* import sizes of CSR */ 719 iia[0] = 0; 720 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 721 722 /* overwrite entries corresponding to the Nedelec field */ 723 PetscCall(PetscBTCreate(n, &btf)); 724 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 725 for (i = 0; i < ne; i++) { 726 PetscCall(PetscBTSet(btf, idxs[i])); 727 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 728 } 729 730 /* iia in CSR */ 731 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 732 733 /* jja in CSR */ 734 PetscCall(PetscMalloc1(iia[n], &jja)); 735 for (i = 0; i < n; i++) 736 if (!PetscBTLookup(btf, i)) 737 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 738 739 /* map edge dofs connectivity */ 740 if (jj) { 741 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 742 for (i = 0; i < ne; i++) { 743 PetscInt e = idxs[i]; 744 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 745 } 746 } 747 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 748 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER)); 749 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 750 if (free) PetscCall(PetscFree2(iiu, jju)); 751 PetscCall(PetscBTDestroy(&btf)); 752 } else { 753 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER)); 754 } 755 756 /* Analyze interface for edge dofs */ 757 PetscCall(PCBDDCAnalyzeInterface(pc)); 758 pcbddc->mat_graph->twodim = PETSC_FALSE; 759 760 /* Get coarse edges in the edge space */ 761 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 762 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 763 764 if (fl2g) { 765 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 766 PetscCall(PetscMalloc1(nee, &eedges)); 767 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 768 } else { 769 eedges = alleedges; 770 primals = allprimals; 771 } 772 773 /* Mark fine edge dofs with their coarse edge id */ 774 PetscCall(PetscArrayzero(marks, ne)); 775 PetscCall(ISGetLocalSize(primals, &cum)); 776 PetscCall(ISGetIndices(primals, &idxs)); 777 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 778 PetscCall(ISRestoreIndices(primals, &idxs)); 779 if (print) { 780 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 781 PetscCall(ISView(primals, NULL)); 782 } 783 784 maxsize = 0; 785 for (i = 0; i < nee; i++) { 786 PetscInt size, mark = i + 1; 787 788 PetscCall(ISGetLocalSize(eedges[i], &size)); 789 PetscCall(ISGetIndices(eedges[i], &idxs)); 790 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 791 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 792 maxsize = PetscMax(maxsize, size); 793 } 794 795 /* Find coarse edge endpoints */ 796 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 797 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 798 for (i = 0; i < nee; i++) { 799 PetscInt mark = i + 1, size; 800 801 PetscCall(ISGetLocalSize(eedges[i], &size)); 802 if (!size && nedfieldlocal) continue; 803 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 804 PetscCall(ISGetIndices(eedges[i], &idxs)); 805 if (print) { 806 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 807 PetscCall(ISView(eedges[i], NULL)); 808 } 809 for (j = 0; j < size; j++) { 810 PetscInt k, ee = idxs[j]; 811 if (print) PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee); 812 for (k = ii[ee]; k < ii[ee + 1]; k++) { 813 if (print) PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k]); 814 if (PetscBTLookup(btv, jj[k])) { 815 if (print) PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k]); 816 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 817 PetscInt k2; 818 PetscBool corner = PETSC_FALSE; 819 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 820 if (print) PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])); 821 /* it's a corner if either is connected with an edge dof belonging to a different cc or 822 if the edge dof lie on the natural part of the boundary */ 823 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 824 corner = PETSC_TRUE; 825 break; 826 } 827 } 828 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 829 if (print) PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k]); 830 PetscCall(PetscBTSet(btv, jj[k])); 831 } else { 832 if (print) PetscPrintf(PETSC_COMM_SELF, " no corners found\n"); 833 } 834 } 835 } 836 } 837 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 838 } 839 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 840 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 841 PetscCall(PetscBTDestroy(&btb)); 842 843 /* Reset marked primal dofs */ 844 PetscCall(ISGetLocalSize(primals, &cum)); 845 PetscCall(ISGetIndices(primals, &idxs)); 846 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 847 PetscCall(ISRestoreIndices(primals, &idxs)); 848 849 /* Now use the initial lG */ 850 PetscCall(MatDestroy(&lG)); 851 PetscCall(MatDestroy(&lGt)); 852 lG = lGinit; 853 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 854 855 /* Compute extended cols indices */ 856 PetscCall(PetscBTCreate(nv, &btvc)); 857 PetscCall(PetscBTCreate(nee, &bter)); 858 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 859 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 860 i *= maxsize; 861 PetscCall(PetscCalloc1(nee, &extcols)); 862 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 863 eerr = PETSC_FALSE; 864 for (i = 0; i < nee; i++) { 865 PetscInt size, found = 0; 866 867 cum = 0; 868 PetscCall(ISGetLocalSize(eedges[i], &size)); 869 if (!size && nedfieldlocal) continue; 870 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 871 PetscCall(ISGetIndices(eedges[i], &idxs)); 872 PetscCall(PetscBTMemzero(nv, btvc)); 873 for (j = 0; j < size; j++) { 874 PetscInt k, ee = idxs[j]; 875 for (k = ii[ee]; k < ii[ee + 1]; k++) { 876 PetscInt vv = jj[k]; 877 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 878 else if (!PetscBTLookupSet(btvc, vv)) found++; 879 } 880 } 881 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 882 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 883 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 884 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 885 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 886 /* it may happen that endpoints are not defined at this point 887 if it is the case, mark this edge for a second pass */ 888 if (cum != size - 1 || found != 2) { 889 PetscCall(PetscBTSet(bter, i)); 890 if (print) { 891 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 892 PetscCall(ISView(eedges[i], NULL)); 893 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 894 PetscCall(ISView(extcols[i], NULL)); 895 } 896 eerr = PETSC_TRUE; 897 } 898 } 899 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 900 PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 901 if (done) { 902 PetscInt *newprimals; 903 904 PetscCall(PetscMalloc1(ne, &newprimals)); 905 PetscCall(ISGetLocalSize(primals, &cum)); 906 PetscCall(ISGetIndices(primals, &idxs)); 907 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 908 PetscCall(ISRestoreIndices(primals, &idxs)); 909 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 910 if (print) PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]); 911 for (i = 0; i < nee; i++) { 912 PetscBool has_candidates = PETSC_FALSE; 913 if (PetscBTLookup(bter, i)) { 914 PetscInt size, mark = i + 1; 915 916 PetscCall(ISGetLocalSize(eedges[i], &size)); 917 PetscCall(ISGetIndices(eedges[i], &idxs)); 918 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 919 for (j = 0; j < size; j++) { 920 PetscInt k, ee = idxs[j]; 921 if (print) PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]); 922 for (k = ii[ee]; k < ii[ee + 1]; k++) { 923 /* set all candidates located on the edge as corners */ 924 if (PetscBTLookup(btvcand, jj[k])) { 925 PetscInt k2, vv = jj[k]; 926 has_candidates = PETSC_TRUE; 927 if (print) PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv); 928 PetscCall(PetscBTSet(btv, vv)); 929 /* set all edge dofs connected to candidate as primals */ 930 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 931 if (marks[jjt[k2]] == mark) { 932 PetscInt k3, ee2 = jjt[k2]; 933 if (print) PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2); 934 newprimals[cum++] = ee2; 935 /* finally set the new corners */ 936 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 937 if (print) PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]); 938 PetscCall(PetscBTSet(btv, jj[k3])); 939 } 940 } 941 } 942 } else { 943 if (print) PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k]); 944 } 945 } 946 } 947 if (!has_candidates) { /* circular edge */ 948 PetscInt k, ee = idxs[0], *tmarks; 949 950 PetscCall(PetscCalloc1(ne, &tmarks)); 951 if (print) PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i); 952 for (k = ii[ee]; k < ii[ee + 1]; k++) { 953 PetscInt k2; 954 if (print) PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k]); 955 PetscCall(PetscBTSet(btv, jj[k])); 956 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 957 } 958 for (j = 0; j < size; j++) { 959 if (tmarks[idxs[j]] > 1) { 960 if (print) PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]); 961 newprimals[cum++] = idxs[j]; 962 } 963 } 964 PetscCall(PetscFree(tmarks)); 965 } 966 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 967 } 968 PetscCall(ISDestroy(&extcols[i])); 969 } 970 PetscCall(PetscFree(extcols)); 971 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 972 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 973 if (fl2g) { 974 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 975 PetscCall(ISDestroy(&primals)); 976 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 977 PetscCall(PetscFree(eedges)); 978 } 979 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 980 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 981 PetscCall(PetscFree(newprimals)); 982 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 983 PetscCall(ISDestroy(&primals)); 984 PetscCall(PCBDDCAnalyzeInterface(pc)); 985 pcbddc->mat_graph->twodim = PETSC_FALSE; 986 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 987 if (fl2g) { 988 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 989 PetscCall(PetscMalloc1(nee, &eedges)); 990 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 991 } else { 992 eedges = alleedges; 993 primals = allprimals; 994 } 995 PetscCall(PetscCalloc1(nee, &extcols)); 996 997 /* Mark again */ 998 PetscCall(PetscArrayzero(marks, ne)); 999 for (i = 0; i < nee; i++) { 1000 PetscInt size, mark = i + 1; 1001 1002 PetscCall(ISGetLocalSize(eedges[i], &size)); 1003 PetscCall(ISGetIndices(eedges[i], &idxs)); 1004 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1005 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1006 } 1007 if (print) { 1008 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1009 PetscCall(ISView(primals, NULL)); 1010 } 1011 1012 /* Recompute extended cols */ 1013 eerr = PETSC_FALSE; 1014 for (i = 0; i < nee; i++) { 1015 PetscInt size; 1016 1017 cum = 0; 1018 PetscCall(ISGetLocalSize(eedges[i], &size)); 1019 if (!size && nedfieldlocal) continue; 1020 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1021 PetscCall(ISGetIndices(eedges[i], &idxs)); 1022 for (j = 0; j < size; j++) { 1023 PetscInt k, ee = idxs[j]; 1024 for (k = ii[ee]; k < ii[ee + 1]; k++) 1025 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1026 } 1027 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1028 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1029 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1030 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1031 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1032 if (cum != size - 1) { 1033 if (print) { 1034 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1035 PetscCall(ISView(eedges[i], NULL)); 1036 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1037 PetscCall(ISView(extcols[i], NULL)); 1038 } 1039 eerr = PETSC_TRUE; 1040 } 1041 } 1042 } 1043 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1044 PetscCall(PetscFree2(extrow, gidxs)); 1045 PetscCall(PetscBTDestroy(&bter)); 1046 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1047 /* an error should not occur at this point */ 1048 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1049 1050 /* Check the number of endpoints */ 1051 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1052 PetscCall(PetscMalloc1(2 * nee, &corners)); 1053 PetscCall(PetscMalloc1(nee, &cedges)); 1054 for (i = 0; i < nee; i++) { 1055 PetscInt size, found = 0, gc[2]; 1056 1057 /* init with defaults */ 1058 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1059 PetscCall(ISGetLocalSize(eedges[i], &size)); 1060 if (!size && nedfieldlocal) continue; 1061 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1062 PetscCall(ISGetIndices(eedges[i], &idxs)); 1063 PetscCall(PetscBTMemzero(nv, btvc)); 1064 for (j = 0; j < size; j++) { 1065 PetscInt k, ee = idxs[j]; 1066 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1067 PetscInt vv = jj[k]; 1068 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1069 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more then two corners for edge %" PetscInt_FMT, i); 1070 corners[i * 2 + found++] = vv; 1071 } 1072 } 1073 } 1074 if (found != 2) { 1075 PetscInt e; 1076 if (fl2g) { 1077 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1078 } else { 1079 e = idxs[0]; 1080 } 1081 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1082 } 1083 1084 /* get primal dof index on this coarse edge */ 1085 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1086 if (gc[0] > gc[1]) { 1087 PetscInt swap = corners[2 * i]; 1088 corners[2 * i] = corners[2 * i + 1]; 1089 corners[2 * i + 1] = swap; 1090 } 1091 cedges[i] = idxs[size - 1]; 1092 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1093 if (print) PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]); 1094 } 1095 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1096 PetscCall(PetscBTDestroy(&btvc)); 1097 1098 if (PetscDefined(USE_DEBUG)) { 1099 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1100 not interfere with neighbouring coarse edges */ 1101 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1102 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1103 for (i = 0; i < nv; i++) { 1104 PetscInt emax = 0, eemax = 0; 1105 1106 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1107 PetscCall(PetscArrayzero(emarks, nee + 1)); 1108 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1109 for (j = 1; j < nee + 1; j++) { 1110 if (emax < emarks[j]) { 1111 emax = emarks[j]; 1112 eemax = j; 1113 } 1114 } 1115 /* not relevant for edges */ 1116 if (!eemax) continue; 1117 1118 for (j = ii[i]; j < ii[i + 1]; j++) { 1119 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1120 } 1121 } 1122 PetscCall(PetscFree(emarks)); 1123 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1124 } 1125 1126 /* Compute extended rows indices for edge blocks of the change of basis */ 1127 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1128 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1129 extmem *= maxsize; 1130 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1131 PetscCall(PetscMalloc1(nee, &extrows)); 1132 PetscCall(PetscCalloc1(nee, &extrowcum)); 1133 for (i = 0; i < nv; i++) { 1134 PetscInt mark = 0, size, start; 1135 1136 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1137 for (j = ii[i]; j < ii[i + 1]; j++) 1138 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1139 1140 /* not relevant */ 1141 if (!mark) continue; 1142 1143 /* import extended row */ 1144 mark--; 1145 start = mark * extmem + extrowcum[mark]; 1146 size = ii[i + 1] - ii[i]; 1147 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1148 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1149 extrowcum[mark] += size; 1150 } 1151 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1152 PetscCall(MatDestroy(&lGt)); 1153 PetscCall(PetscFree(marks)); 1154 1155 /* Compress extrows */ 1156 cum = 0; 1157 for (i = 0; i < nee; i++) { 1158 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1159 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1160 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1161 cum = PetscMax(cum, size); 1162 } 1163 PetscCall(PetscFree(extrowcum)); 1164 PetscCall(PetscBTDestroy(&btv)); 1165 PetscCall(PetscBTDestroy(&btvcand)); 1166 1167 /* Workspace for lapack inner calls and VecSetValues */ 1168 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1169 1170 /* Create change of basis matrix (preallocation can be improved) */ 1171 PetscCall(MatCreate(comm, &T)); 1172 PetscCall(MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N)); 1173 PetscCall(MatSetType(T, MATAIJ)); 1174 PetscCall(MatSeqAIJSetPreallocation(T, 10, NULL)); 1175 PetscCall(MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL)); 1176 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1177 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1178 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1179 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1180 1181 /* Defaults to identity */ 1182 PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL)); 1183 PetscCall(VecSet(tvec, 1.0)); 1184 PetscCall(MatDiagonalSet(T, tvec, INSERT_VALUES)); 1185 PetscCall(VecDestroy(&tvec)); 1186 1187 /* Create discrete gradient for the coarser level if needed */ 1188 PetscCall(MatDestroy(&pcbddc->nedcG)); 1189 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1190 if (pcbddc->current_level < pcbddc->max_levels) { 1191 ISLocalToGlobalMapping cel2g, cvl2g; 1192 IS wis, gwis; 1193 PetscInt cnv, cne; 1194 1195 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1196 if (fl2g) { 1197 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1198 } else { 1199 PetscCall(PetscObjectReference((PetscObject)wis)); 1200 pcbddc->nedclocal = wis; 1201 } 1202 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1203 PetscCall(ISDestroy(&wis)); 1204 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1205 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1206 PetscCall(ISDestroy(&wis)); 1207 PetscCall(ISDestroy(&gwis)); 1208 1209 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1210 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1211 PetscCall(ISDestroy(&wis)); 1212 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1213 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1214 PetscCall(ISDestroy(&wis)); 1215 PetscCall(ISDestroy(&gwis)); 1216 1217 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1218 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1219 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1220 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1221 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1222 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1223 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1224 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1225 } 1226 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1227 1228 #if defined(PRINT_GDET) 1229 inc = 0; 1230 lev = pcbddc->current_level; 1231 #endif 1232 1233 /* Insert values in the change of basis matrix */ 1234 for (i = 0; i < nee; i++) { 1235 Mat Gins = NULL, GKins = NULL; 1236 IS cornersis = NULL; 1237 PetscScalar cvals[2]; 1238 1239 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1240 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1241 if (Gins && GKins) { 1242 const PetscScalar *data; 1243 const PetscInt *rows, *cols; 1244 PetscInt nrh, nch, nrc, ncc; 1245 1246 PetscCall(ISGetIndices(eedges[i], &cols)); 1247 /* H1 */ 1248 PetscCall(ISGetIndices(extrows[i], &rows)); 1249 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1250 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1251 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1252 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1253 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1254 /* complement */ 1255 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1256 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1257 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1258 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1259 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1260 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1261 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1262 1263 /* coarse discrete gradient */ 1264 if (pcbddc->nedcG) { 1265 PetscInt cols[2]; 1266 1267 cols[0] = 2 * i; 1268 cols[1] = 2 * i + 1; 1269 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1270 } 1271 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1272 } 1273 PetscCall(ISDestroy(&extrows[i])); 1274 PetscCall(ISDestroy(&extcols[i])); 1275 PetscCall(ISDestroy(&cornersis)); 1276 PetscCall(MatDestroy(&Gins)); 1277 PetscCall(MatDestroy(&GKins)); 1278 } 1279 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1280 1281 /* Start assembling */ 1282 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1283 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1284 1285 /* Free */ 1286 if (fl2g) { 1287 PetscCall(ISDestroy(&primals)); 1288 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1289 PetscCall(PetscFree(eedges)); 1290 } 1291 1292 /* hack mat_graph with primal dofs on the coarse edges */ 1293 { 1294 PCBDDCGraph graph = pcbddc->mat_graph; 1295 PetscInt *oqueue = graph->queue; 1296 PetscInt *ocptr = graph->cptr; 1297 PetscInt ncc, *idxs; 1298 1299 /* find first primal edge */ 1300 if (pcbddc->nedclocal) { 1301 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1302 } else { 1303 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1304 idxs = cedges; 1305 } 1306 cum = 0; 1307 while (cum < nee && cedges[cum] < 0) cum++; 1308 1309 /* adapt connected components */ 1310 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1311 graph->cptr[0] = 0; 1312 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1313 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1314 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1315 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1316 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1317 ncc++; 1318 lc--; 1319 cum++; 1320 while (cum < nee && cedges[cum] < 0) cum++; 1321 } 1322 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1323 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1324 ncc++; 1325 } 1326 graph->ncc = ncc; 1327 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1328 PetscCall(PetscFree2(ocptr, oqueue)); 1329 } 1330 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1331 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1332 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1333 PetscCall(MatDestroy(&conn)); 1334 1335 PetscCall(ISDestroy(&nedfieldlocal)); 1336 PetscCall(PetscFree(extrow)); 1337 PetscCall(PetscFree2(work, rwork)); 1338 PetscCall(PetscFree(corners)); 1339 PetscCall(PetscFree(cedges)); 1340 PetscCall(PetscFree(extrows)); 1341 PetscCall(PetscFree(extcols)); 1342 PetscCall(MatDestroy(&lG)); 1343 1344 /* Complete assembling */ 1345 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1346 if (pcbddc->nedcG) { 1347 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1348 #if 0 1349 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1350 PetscCall(MatView(pcbddc->nedcG,NULL)); 1351 #endif 1352 } 1353 1354 /* set change of basis */ 1355 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, singular)); 1356 PetscCall(MatDestroy(&T)); 1357 1358 PetscFunctionReturn(0); 1359 } 1360 1361 /* the near-null space of BDDC carries information on quadrature weights, 1362 and these can be collinear -> so cheat with MatNullSpaceCreate 1363 and create a suitable set of basis vectors first */ 1364 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1365 { 1366 PetscInt i; 1367 1368 PetscFunctionBegin; 1369 for (i = 0; i < nvecs; i++) { 1370 PetscInt first, last; 1371 1372 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1373 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1374 if (i >= first && i < last) { 1375 PetscScalar *data; 1376 PetscCall(VecGetArray(quad_vecs[i], &data)); 1377 if (!has_const) { 1378 data[i - first] = 1.; 1379 } else { 1380 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1381 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1382 } 1383 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1384 } 1385 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1386 } 1387 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1388 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1389 PetscInt first, last; 1390 PetscCall(VecLockReadPop(quad_vecs[i])); 1391 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1392 if (i >= first && i < last) { 1393 PetscScalar *data; 1394 PetscCall(VecGetArray(quad_vecs[i], &data)); 1395 if (!has_const) { 1396 data[i - first] = 0.; 1397 } else { 1398 data[2 * i - first] = 0.; 1399 data[2 * i - first + 1] = 0.; 1400 } 1401 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1402 } 1403 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1404 PetscCall(VecLockReadPush(quad_vecs[i])); 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1410 { 1411 Mat loc_divudotp; 1412 Vec p, v, vins, quad_vec, *quad_vecs; 1413 ISLocalToGlobalMapping map; 1414 PetscScalar *vals; 1415 const PetscScalar *array; 1416 PetscInt i, maxneighs = 0, maxsize, *gidxs; 1417 PetscInt n_neigh, *neigh, *n_shared, **shared; 1418 PetscMPIInt rank; 1419 1420 PetscFunctionBegin; 1421 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1422 for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs); 1423 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A))); 1424 if (!maxneighs) { 1425 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1426 *nnsp = NULL; 1427 PetscFunctionReturn(0); 1428 } 1429 maxsize = 0; 1430 for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize); 1431 PetscCall(PetscMalloc2(maxsize, &gidxs, maxsize, &vals)); 1432 /* create vectors to hold quadrature weights */ 1433 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1434 if (!transpose) { 1435 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1436 } else { 1437 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1438 } 1439 PetscCall(VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs)); 1440 PetscCall(VecDestroy(&quad_vec)); 1441 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp)); 1442 for (i = 0; i < maxneighs; i++) PetscCall(VecLockReadPop(quad_vecs[i])); 1443 1444 /* compute local quad vec */ 1445 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1446 if (!transpose) { 1447 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1448 } else { 1449 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1450 } 1451 PetscCall(VecSet(p, 1.)); 1452 if (!transpose) { 1453 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1454 } else { 1455 PetscCall(MatMult(loc_divudotp, p, v)); 1456 } 1457 if (vl2l) { 1458 Mat lA; 1459 VecScatter sc; 1460 1461 PetscCall(MatISGetLocalMat(A, &lA)); 1462 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1463 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1464 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1466 PetscCall(VecScatterDestroy(&sc)); 1467 } else { 1468 vins = v; 1469 } 1470 PetscCall(VecGetArrayRead(vins, &array)); 1471 PetscCall(VecDestroy(&p)); 1472 1473 /* insert in global quadrature vecs */ 1474 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1475 for (i = 1; i < n_neigh; i++) { 1476 const PetscInt *idxs; 1477 PetscInt idx, nn, j; 1478 1479 idxs = shared[i]; 1480 nn = n_shared[i]; 1481 for (j = 0; j < nn; j++) vals[j] = array[idxs[j]]; 1482 PetscCall(PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx)); 1483 idx = -(idx + 1); 1484 PetscCheck(idx >= 0 && idx < maxneighs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")", idx, maxneighs); 1485 PetscCall(ISLocalToGlobalMappingApply(map, nn, idxs, gidxs)); 1486 PetscCall(VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES)); 1487 } 1488 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1489 PetscCall(VecRestoreArrayRead(vins, &array)); 1490 if (vl2l) PetscCall(VecDestroy(&vins)); 1491 PetscCall(VecDestroy(&v)); 1492 PetscCall(PetscFree2(gidxs, vals)); 1493 1494 /* assemble near null space */ 1495 for (i = 0; i < maxneighs; i++) PetscCall(VecAssemblyBegin(quad_vecs[i])); 1496 for (i = 0; i < maxneighs; i++) { 1497 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1498 PetscCall(VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view")); 1499 PetscCall(VecLockReadPush(quad_vecs[i])); 1500 } 1501 PetscCall(VecDestroyVecs(maxneighs, &quad_vecs)); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1506 { 1507 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1508 1509 PetscFunctionBegin; 1510 if (primalv) { 1511 if (pcbddc->user_primal_vertices_local) { 1512 IS list[2], newp; 1513 1514 list[0] = primalv; 1515 list[1] = pcbddc->user_primal_vertices_local; 1516 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1517 PetscCall(ISSortRemoveDups(newp)); 1518 PetscCall(ISDestroy(&list[1])); 1519 pcbddc->user_primal_vertices_local = newp; 1520 } else { 1521 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1522 } 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1528 { 1529 PetscInt f, *comp = (PetscInt *)ctx; 1530 1531 PetscFunctionBegin; 1532 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1533 PetscFunctionReturn(0); 1534 } 1535 1536 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1537 { 1538 Vec local, global; 1539 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1540 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1541 PetscBool monolithic = PETSC_FALSE; 1542 1543 PetscFunctionBegin; 1544 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1545 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1546 PetscOptionsEnd(); 1547 /* need to convert from global to local topology information and remove references to information in global ordering */ 1548 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1549 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1550 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1551 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1552 if (monolithic) { /* just get block size to properly compute vertices */ 1553 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1554 goto boundary; 1555 } 1556 1557 if (pcbddc->user_provided_isfordofs) { 1558 if (pcbddc->n_ISForDofs) { 1559 PetscInt i; 1560 1561 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1562 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1563 PetscInt bs; 1564 1565 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1566 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1567 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1568 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1569 } 1570 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1571 pcbddc->n_ISForDofs = 0; 1572 PetscCall(PetscFree(pcbddc->ISForDofs)); 1573 } 1574 } else { 1575 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1576 DM dm; 1577 1578 PetscCall(MatGetDM(pc->pmat, &dm)); 1579 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1580 if (dm) { 1581 IS *fields; 1582 PetscInt nf, i; 1583 1584 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1585 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1586 for (i = 0; i < nf; i++) { 1587 PetscInt bs; 1588 1589 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1590 PetscCall(ISGetBlockSize(fields[i], &bs)); 1591 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1592 PetscCall(ISDestroy(&fields[i])); 1593 } 1594 PetscCall(PetscFree(fields)); 1595 pcbddc->n_ISForDofsLocal = nf; 1596 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1597 PetscContainer c; 1598 1599 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1600 if (c) { 1601 MatISLocalFields lf; 1602 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1603 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1604 } else { /* fallback, create the default fields if bs > 1 */ 1605 PetscInt i, n = matis->A->rmap->n; 1606 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1607 if (i > 1) { 1608 pcbddc->n_ISForDofsLocal = i; 1609 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1610 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1611 } 1612 } 1613 } 1614 } else { 1615 PetscInt i; 1616 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1617 } 1618 } 1619 1620 boundary: 1621 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1622 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1623 } else if (pcbddc->DirichletBoundariesLocal) { 1624 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1625 } 1626 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1627 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1628 } else if (pcbddc->NeumannBoundariesLocal) { 1629 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1630 } 1631 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1632 PetscCall(VecDestroy(&global)); 1633 PetscCall(VecDestroy(&local)); 1634 /* detect local disconnected subdomains if requested (use matis->A) */ 1635 if (pcbddc->detect_disconnected) { 1636 IS primalv = NULL; 1637 PetscInt i; 1638 PetscBool filter = pcbddc->detect_disconnected_filter; 1639 1640 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1641 PetscCall(PetscFree(pcbddc->local_subs)); 1642 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1643 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1644 PetscCall(ISDestroy(&primalv)); 1645 } 1646 /* early stage corner detection */ 1647 { 1648 DM dm; 1649 1650 PetscCall(MatGetDM(pc->pmat, &dm)); 1651 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1652 if (dm) { 1653 PetscBool isda; 1654 1655 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1656 if (isda) { 1657 ISLocalToGlobalMapping l2l; 1658 IS corners; 1659 Mat lA; 1660 PetscBool gl, lo; 1661 1662 { 1663 Vec cvec; 1664 const PetscScalar *coords; 1665 PetscInt dof, n, cdim; 1666 PetscBool memc = PETSC_TRUE; 1667 1668 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1669 PetscCall(DMGetCoordinates(dm, &cvec)); 1670 PetscCall(VecGetLocalSize(cvec, &n)); 1671 PetscCall(VecGetBlockSize(cvec, &cdim)); 1672 n /= cdim; 1673 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1674 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1675 PetscCall(VecGetArrayRead(cvec, &coords)); 1676 #if defined(PETSC_USE_COMPLEX) 1677 memc = PETSC_FALSE; 1678 #endif 1679 if (dof != 1) memc = PETSC_FALSE; 1680 if (memc) { 1681 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1682 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1683 PetscReal *bcoords = pcbddc->mat_graph->coords; 1684 PetscInt i, b, d; 1685 1686 for (i = 0; i < n; i++) { 1687 for (b = 0; b < dof; b++) { 1688 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1689 } 1690 } 1691 } 1692 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1693 pcbddc->mat_graph->cdim = cdim; 1694 pcbddc->mat_graph->cnloc = dof * n; 1695 pcbddc->mat_graph->cloc = PETSC_FALSE; 1696 } 1697 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1698 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1699 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1700 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1701 lo = (PetscBool)(l2l && corners); 1702 PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1703 if (gl) { /* From PETSc's DMDA */ 1704 const PetscInt *idx; 1705 PetscInt dof, bs, *idxout, n; 1706 1707 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1708 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1709 PetscCall(ISGetLocalSize(corners, &n)); 1710 PetscCall(ISGetIndices(corners, &idx)); 1711 if (bs == dof) { 1712 PetscCall(PetscMalloc1(n, &idxout)); 1713 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1714 } else { /* the original DMDA local-to-local map have been modified */ 1715 PetscInt i, d; 1716 1717 PetscCall(PetscMalloc1(dof * n, &idxout)); 1718 for (i = 0; i < n; i++) 1719 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1720 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1721 1722 bs = 1; 1723 n *= dof; 1724 } 1725 PetscCall(ISRestoreIndices(corners, &idx)); 1726 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1727 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1728 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1729 PetscCall(ISDestroy(&corners)); 1730 pcbddc->corner_selected = PETSC_TRUE; 1731 pcbddc->corner_selection = PETSC_TRUE; 1732 } 1733 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1734 } 1735 } 1736 } 1737 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1738 DM dm; 1739 1740 PetscCall(MatGetDM(pc->pmat, &dm)); 1741 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1742 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1743 Vec vcoords; 1744 PetscSection section; 1745 PetscReal *coords; 1746 PetscInt d, cdim, nl, nf, **ctxs; 1747 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1748 /* debug coordinates */ 1749 PetscViewer viewer; 1750 PetscBool flg; 1751 PetscViewerFormat format; 1752 const char *prefix; 1753 1754 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1755 PetscCall(DMGetLocalSection(dm, §ion)); 1756 PetscCall(PetscSectionGetNumFields(section, &nf)); 1757 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1758 PetscCall(VecGetLocalSize(vcoords, &nl)); 1759 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1760 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1761 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1762 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1763 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1764 1765 /* debug coordinates */ 1766 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1767 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1768 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1769 for (d = 0; d < cdim; d++) { 1770 PetscInt i; 1771 const PetscScalar *v; 1772 char name[16]; 1773 1774 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1775 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d)); 1776 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1777 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1778 if (flg) PetscCall(VecView(vcoords, viewer)); 1779 PetscCall(VecGetArrayRead(vcoords, &v)); 1780 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1781 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1782 } 1783 PetscCall(VecDestroy(&vcoords)); 1784 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1785 PetscCall(PetscFree(coords)); 1786 PetscCall(PetscFree(ctxs[0])); 1787 PetscCall(PetscFree2(funcs, ctxs)); 1788 if (flg) { 1789 PetscCall(PetscViewerPopFormat(viewer)); 1790 PetscCall(PetscViewerDestroy(&viewer)); 1791 } 1792 } 1793 } 1794 PetscFunctionReturn(0); 1795 } 1796 1797 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1798 { 1799 Mat_IS *matis = (Mat_IS *)(pc->pmat->data); 1800 IS nis; 1801 const PetscInt *idxs; 1802 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1803 1804 PetscFunctionBegin; 1805 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1806 if (mop == MPI_LAND) { 1807 /* init rootdata with true */ 1808 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1809 } else { 1810 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1811 } 1812 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1813 PetscCall(ISGetLocalSize(*is, &nd)); 1814 PetscCall(ISGetIndices(*is, &idxs)); 1815 for (i = 0; i < nd; i++) 1816 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1817 PetscCall(ISRestoreIndices(*is, &idxs)); 1818 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1819 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1820 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1821 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1822 if (mop == MPI_LAND) { 1823 PetscCall(PetscMalloc1(nd, &nidxs)); 1824 } else { 1825 PetscCall(PetscMalloc1(n, &nidxs)); 1826 } 1827 for (i = 0, nnd = 0; i < n; i++) 1828 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 1829 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 1830 PetscCall(ISDestroy(is)); 1831 *is = nis; 1832 PetscFunctionReturn(0); 1833 } 1834 1835 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 1836 { 1837 PC_IS *pcis = (PC_IS *)(pc->data); 1838 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 1839 1840 PetscFunctionBegin; 1841 if (!pcbddc->benign_have_null) PetscFunctionReturn(0); 1842 if (pcbddc->ChangeOfBasisMatrix) { 1843 Vec swap; 1844 1845 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 1846 swap = pcbddc->work_change; 1847 pcbddc->work_change = r; 1848 r = swap; 1849 } 1850 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1851 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1852 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1853 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 1854 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1855 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 1856 PetscCall(VecSet(z, 0.)); 1857 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1858 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1859 if (pcbddc->ChangeOfBasisMatrix) { 1860 pcbddc->work_change = r; 1861 PetscCall(VecCopy(z, pcbddc->work_change)); 1862 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 1863 } 1864 PetscFunctionReturn(0); 1865 } 1866 1867 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1868 { 1869 PCBDDCBenignMatMult_ctx ctx; 1870 PetscBool apply_right, apply_left, reset_x; 1871 1872 PetscFunctionBegin; 1873 PetscCall(MatShellGetContext(A, &ctx)); 1874 if (transpose) { 1875 apply_right = ctx->apply_left; 1876 apply_left = ctx->apply_right; 1877 } else { 1878 apply_right = ctx->apply_right; 1879 apply_left = ctx->apply_left; 1880 } 1881 reset_x = PETSC_FALSE; 1882 if (apply_right) { 1883 const PetscScalar *ax; 1884 PetscInt nl, i; 1885 1886 PetscCall(VecGetLocalSize(x, &nl)); 1887 PetscCall(VecGetArrayRead(x, &ax)); 1888 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 1889 PetscCall(VecRestoreArrayRead(x, &ax)); 1890 for (i = 0; i < ctx->benign_n; i++) { 1891 PetscScalar sum, val; 1892 const PetscInt *idxs; 1893 PetscInt nz, j; 1894 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1895 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1896 sum = 0.; 1897 if (ctx->apply_p0) { 1898 val = ctx->work[idxs[nz - 1]]; 1899 for (j = 0; j < nz - 1; j++) { 1900 sum += ctx->work[idxs[j]]; 1901 ctx->work[idxs[j]] += val; 1902 } 1903 } else { 1904 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 1905 } 1906 ctx->work[idxs[nz - 1]] -= sum; 1907 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1908 } 1909 PetscCall(VecPlaceArray(x, ctx->work)); 1910 reset_x = PETSC_TRUE; 1911 } 1912 if (transpose) { 1913 PetscCall(MatMultTranspose(ctx->A, x, y)); 1914 } else { 1915 PetscCall(MatMult(ctx->A, x, y)); 1916 } 1917 if (reset_x) PetscCall(VecResetArray(x)); 1918 if (apply_left) { 1919 PetscScalar *ay; 1920 PetscInt i; 1921 1922 PetscCall(VecGetArray(y, &ay)); 1923 for (i = 0; i < ctx->benign_n; i++) { 1924 PetscScalar sum, val; 1925 const PetscInt *idxs; 1926 PetscInt nz, j; 1927 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1928 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1929 val = -ay[idxs[nz - 1]]; 1930 if (ctx->apply_p0) { 1931 sum = 0.; 1932 for (j = 0; j < nz - 1; j++) { 1933 sum += ay[idxs[j]]; 1934 ay[idxs[j]] += val; 1935 } 1936 ay[idxs[nz - 1]] += sum; 1937 } else { 1938 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 1939 ay[idxs[nz - 1]] = 0.; 1940 } 1941 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1942 } 1943 PetscCall(VecRestoreArray(y, &ay)); 1944 } 1945 PetscFunctionReturn(0); 1946 } 1947 1948 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1949 { 1950 PetscFunctionBegin; 1951 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 1952 PetscFunctionReturn(0); 1953 } 1954 1955 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1956 { 1957 PetscFunctionBegin; 1958 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1963 { 1964 PC_IS *pcis = (PC_IS *)pc->data; 1965 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1966 PCBDDCBenignMatMult_ctx ctx; 1967 1968 PetscFunctionBegin; 1969 if (!restore) { 1970 Mat A_IB, A_BI; 1971 PetscScalar *work; 1972 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1973 1974 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 1975 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1976 PetscCall(PetscMalloc1(pcis->n, &work)); 1977 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 1978 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 1979 PetscCall(MatSetType(A_IB, MATSHELL)); 1980 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 1981 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 1982 PetscCall(PetscNew(&ctx)); 1983 PetscCall(MatShellSetContext(A_IB, ctx)); 1984 ctx->apply_left = PETSC_TRUE; 1985 ctx->apply_right = PETSC_FALSE; 1986 ctx->apply_p0 = PETSC_FALSE; 1987 ctx->benign_n = pcbddc->benign_n; 1988 if (reuse) { 1989 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1990 ctx->free = PETSC_FALSE; 1991 } else { /* TODO: could be optimized for successive solves */ 1992 ISLocalToGlobalMapping N_to_D; 1993 PetscInt i; 1994 1995 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 1996 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 1997 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 1998 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 1999 ctx->free = PETSC_TRUE; 2000 } 2001 ctx->A = pcis->A_IB; 2002 ctx->work = work; 2003 PetscCall(MatSetUp(A_IB)); 2004 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2005 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2006 pcis->A_IB = A_IB; 2007 2008 /* A_BI as A_IB^T */ 2009 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2010 pcbddc->benign_original_mat = pcis->A_BI; 2011 pcis->A_BI = A_BI; 2012 } else { 2013 if (!pcbddc->benign_original_mat) PetscFunctionReturn(0); 2014 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2015 PetscCall(MatDestroy(&pcis->A_IB)); 2016 pcis->A_IB = ctx->A; 2017 ctx->A = NULL; 2018 PetscCall(MatDestroy(&pcis->A_BI)); 2019 pcis->A_BI = pcbddc->benign_original_mat; 2020 pcbddc->benign_original_mat = NULL; 2021 if (ctx->free) { 2022 PetscInt i; 2023 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2024 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2025 } 2026 PetscCall(PetscFree(ctx->work)); 2027 PetscCall(PetscFree(ctx)); 2028 } 2029 PetscFunctionReturn(0); 2030 } 2031 2032 /* used just in bddc debug mode */ 2033 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2034 { 2035 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2036 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2037 Mat An; 2038 2039 PetscFunctionBegin; 2040 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2041 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2042 if (is1) { 2043 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2044 PetscCall(MatDestroy(&An)); 2045 } else { 2046 *B = An; 2047 } 2048 PetscFunctionReturn(0); 2049 } 2050 2051 /* TODO: add reuse flag */ 2052 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2053 { 2054 Mat Bt; 2055 PetscScalar *a, *bdata; 2056 const PetscInt *ii, *ij; 2057 PetscInt m, n, i, nnz, *bii, *bij; 2058 PetscBool flg_row; 2059 2060 PetscFunctionBegin; 2061 PetscCall(MatGetSize(A, &n, &m)); 2062 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2063 PetscCall(MatSeqAIJGetArray(A, &a)); 2064 nnz = n; 2065 for (i = 0; i < ii[n]; i++) { 2066 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2067 } 2068 PetscCall(PetscMalloc1(n + 1, &bii)); 2069 PetscCall(PetscMalloc1(nnz, &bij)); 2070 PetscCall(PetscMalloc1(nnz, &bdata)); 2071 nnz = 0; 2072 bii[0] = 0; 2073 for (i = 0; i < n; i++) { 2074 PetscInt j; 2075 for (j = ii[i]; j < ii[i + 1]; j++) { 2076 PetscScalar entry = a[j]; 2077 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2078 bij[nnz] = ij[j]; 2079 bdata[nnz] = entry; 2080 nnz++; 2081 } 2082 } 2083 bii[i + 1] = nnz; 2084 } 2085 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2086 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2087 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2088 { 2089 Mat_SeqAIJ *b = (Mat_SeqAIJ *)(Bt->data); 2090 b->free_a = PETSC_TRUE; 2091 b->free_ij = PETSC_TRUE; 2092 } 2093 if (*B == A) PetscCall(MatDestroy(&A)); 2094 *B = Bt; 2095 PetscFunctionReturn(0); 2096 } 2097 2098 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2099 { 2100 Mat B = NULL; 2101 DM dm; 2102 IS is_dummy, *cc_n; 2103 ISLocalToGlobalMapping l2gmap_dummy; 2104 PCBDDCGraph graph; 2105 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2106 PetscInt i, n; 2107 PetscInt *xadj, *adjncy; 2108 PetscBool isplex = PETSC_FALSE; 2109 2110 PetscFunctionBegin; 2111 if (ncc) *ncc = 0; 2112 if (cc) *cc = NULL; 2113 if (primalv) *primalv = NULL; 2114 PetscCall(PCBDDCGraphCreate(&graph)); 2115 PetscCall(MatGetDM(pc->pmat, &dm)); 2116 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2117 if (dm) PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex)); 2118 if (filter) isplex = PETSC_FALSE; 2119 2120 if (isplex) { /* this code has been modified from plexpartition.c */ 2121 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2122 PetscInt *adj = NULL; 2123 IS cellNumbering; 2124 const PetscInt *cellNum; 2125 PetscBool useCone, useClosure; 2126 PetscSection section; 2127 PetscSegBuffer adjBuffer; 2128 PetscSF sfPoint; 2129 2130 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2131 PetscCall(DMGetPointSF(dm, &sfPoint)); 2132 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2133 /* Build adjacency graph via a section/segbuffer */ 2134 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2135 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2136 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2137 /* Always use FVM adjacency to create partitioner graph */ 2138 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2139 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2140 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2141 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2142 for (n = 0, p = pStart; p < pEnd; p++) { 2143 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2144 if (nroots > 0) { 2145 if (cellNum[p] < 0) continue; 2146 } 2147 adjSize = PETSC_DETERMINE; 2148 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2149 for (a = 0; a < adjSize; ++a) { 2150 const PetscInt point = adj[a]; 2151 if (pStart <= point && point < pEnd) { 2152 PetscInt *PETSC_RESTRICT pBuf; 2153 PetscCall(PetscSectionAddDof(section, p, 1)); 2154 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2155 *pBuf = point; 2156 } 2157 } 2158 n++; 2159 } 2160 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2161 /* Derive CSR graph from section/segbuffer */ 2162 PetscCall(PetscSectionSetUp(section)); 2163 PetscCall(PetscSectionGetStorageSize(section, &size)); 2164 PetscCall(PetscMalloc1(n + 1, &xadj)); 2165 for (idx = 0, p = pStart; p < pEnd; p++) { 2166 if (nroots > 0) { 2167 if (cellNum[p] < 0) continue; 2168 } 2169 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2170 } 2171 xadj[n] = size; 2172 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2173 /* Clean up */ 2174 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2175 PetscCall(PetscSectionDestroy(§ion)); 2176 PetscCall(PetscFree(adj)); 2177 graph->xadj = xadj; 2178 graph->adjncy = adjncy; 2179 } else { 2180 Mat A; 2181 PetscBool isseqaij, flg_row; 2182 2183 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2184 if (!A->rmap->N || !A->cmap->N) { 2185 PetscCall(PCBDDCGraphDestroy(&graph)); 2186 PetscFunctionReturn(0); 2187 } 2188 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2189 if (!isseqaij && filter) { 2190 PetscBool isseqdense; 2191 2192 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2193 if (!isseqdense) { 2194 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2195 } else { /* TODO: rectangular case and LDA */ 2196 PetscScalar *array; 2197 PetscReal chop = 1.e-6; 2198 2199 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2200 PetscCall(MatDenseGetArray(B, &array)); 2201 PetscCall(MatGetSize(B, &n, NULL)); 2202 for (i = 0; i < n; i++) { 2203 PetscInt j; 2204 for (j = i + 1; j < n; j++) { 2205 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2206 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2207 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2208 } 2209 } 2210 PetscCall(MatDenseRestoreArray(B, &array)); 2211 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2212 } 2213 } else { 2214 PetscCall(PetscObjectReference((PetscObject)A)); 2215 B = A; 2216 } 2217 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2218 2219 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2220 if (filter) { 2221 PetscScalar *data; 2222 PetscInt j, cum; 2223 2224 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2225 PetscCall(MatSeqAIJGetArray(B, &data)); 2226 cum = 0; 2227 for (i = 0; i < n; i++) { 2228 PetscInt t; 2229 2230 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2231 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2232 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2233 } 2234 t = xadj_filtered[i]; 2235 xadj_filtered[i] = cum; 2236 cum += t; 2237 } 2238 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2239 graph->xadj = xadj_filtered; 2240 graph->adjncy = adjncy_filtered; 2241 } else { 2242 graph->xadj = xadj; 2243 graph->adjncy = adjncy; 2244 } 2245 } 2246 /* compute local connected components using PCBDDCGraph */ 2247 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2248 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2249 PetscCall(ISDestroy(&is_dummy)); 2250 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT)); 2251 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2252 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2253 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2254 2255 /* partial clean up */ 2256 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2257 if (B) { 2258 PetscBool flg_row; 2259 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2260 PetscCall(MatDestroy(&B)); 2261 } 2262 if (isplex) { 2263 PetscCall(PetscFree(xadj)); 2264 PetscCall(PetscFree(adjncy)); 2265 } 2266 2267 /* get back data */ 2268 if (isplex) { 2269 if (ncc) *ncc = graph->ncc; 2270 if (cc || primalv) { 2271 Mat A; 2272 PetscBT btv, btvt; 2273 PetscSection subSection; 2274 PetscInt *ids, cum, cump, *cids, *pids; 2275 2276 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2277 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2278 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2279 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2280 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2281 2282 cids[0] = 0; 2283 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2284 PetscInt j; 2285 2286 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2287 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2288 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2289 2290 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2291 for (k = 0; k < 2 * size; k += 2) { 2292 PetscInt s, pp, p = closure[k], off, dof, cdof; 2293 2294 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2295 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2296 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2297 for (s = 0; s < dof - cdof; s++) { 2298 if (PetscBTLookupSet(btvt, off + s)) continue; 2299 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2300 else pids[cump++] = off + s; /* cross-vertex */ 2301 } 2302 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2303 if (pp != p) { 2304 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2305 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2306 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2307 for (s = 0; s < dof - cdof; s++) { 2308 if (PetscBTLookupSet(btvt, off + s)) continue; 2309 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2310 else pids[cump++] = off + s; /* cross-vertex */ 2311 } 2312 } 2313 } 2314 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2315 } 2316 cids[i + 1] = cum; 2317 /* mark dofs as already assigned */ 2318 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2319 } 2320 if (cc) { 2321 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2322 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2323 *cc = cc_n; 2324 } 2325 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2326 PetscCall(PetscFree3(ids, cids, pids)); 2327 PetscCall(PetscBTDestroy(&btv)); 2328 PetscCall(PetscBTDestroy(&btvt)); 2329 } 2330 } else { 2331 if (ncc) *ncc = graph->ncc; 2332 if (cc) { 2333 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2334 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2335 *cc = cc_n; 2336 } 2337 } 2338 /* clean up graph */ 2339 graph->xadj = NULL; 2340 graph->adjncy = NULL; 2341 PetscCall(PCBDDCGraphDestroy(&graph)); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2346 { 2347 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2348 PC_IS *pcis = (PC_IS *)(pc->data); 2349 IS dirIS = NULL; 2350 PetscInt i; 2351 2352 PetscFunctionBegin; 2353 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2354 if (zerodiag) { 2355 Mat A; 2356 Vec vec3_N; 2357 PetscScalar *vals; 2358 const PetscInt *idxs; 2359 PetscInt nz, *count; 2360 2361 /* p0 */ 2362 PetscCall(VecSet(pcis->vec1_N, 0.)); 2363 PetscCall(PetscMalloc1(pcis->n, &vals)); 2364 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2365 PetscCall(ISGetIndices(zerodiag, &idxs)); 2366 for (i = 0; i < nz; i++) vals[i] = 1.; 2367 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2368 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2369 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2370 /* v_I */ 2371 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2372 for (i = 0; i < nz; i++) vals[i] = 0.; 2373 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2374 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2375 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2376 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2377 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2378 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2379 if (dirIS) { 2380 PetscInt n; 2381 2382 PetscCall(ISGetLocalSize(dirIS, &n)); 2383 PetscCall(ISGetIndices(dirIS, &idxs)); 2384 for (i = 0; i < n; i++) vals[i] = 0.; 2385 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2386 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2387 } 2388 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2389 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2390 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2391 PetscCall(VecSet(vec3_N, 0.)); 2392 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2393 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2394 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2395 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2396 PetscCall(PetscFree(vals)); 2397 PetscCall(VecDestroy(&vec3_N)); 2398 2399 /* there should not be any pressure dofs lying on the interface */ 2400 PetscCall(PetscCalloc1(pcis->n, &count)); 2401 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2402 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2403 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2404 PetscCall(ISGetIndices(zerodiag, &idxs)); 2405 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2406 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2407 PetscCall(PetscFree(count)); 2408 } 2409 PetscCall(ISDestroy(&dirIS)); 2410 2411 /* check PCBDDCBenignGetOrSetP0 */ 2412 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2413 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2414 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2415 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2416 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2417 for (i = 0; i < pcbddc->benign_n; i++) { 2418 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2419 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2420 } 2421 PetscFunctionReturn(0); 2422 } 2423 2424 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2425 { 2426 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2427 Mat_IS *matis = (Mat_IS *)(pc->pmat->data); 2428 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2429 PetscInt nz, n, benign_n, bsp = 1; 2430 PetscInt *interior_dofs, n_interior_dofs, nneu; 2431 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2432 2433 PetscFunctionBegin; 2434 if (reuse) goto project_b0; 2435 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2436 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2437 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2438 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2439 has_null_pressures = PETSC_TRUE; 2440 have_null = PETSC_TRUE; 2441 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2442 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2443 Checks if all the pressure dofs in each subdomain have a zero diagonal 2444 If not, a change of basis on pressures is not needed 2445 since the local Schur complements are already SPD 2446 */ 2447 if (pcbddc->n_ISForDofsLocal) { 2448 IS iP = NULL; 2449 PetscInt p, *pp; 2450 PetscBool flg; 2451 2452 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2453 n = pcbddc->n_ISForDofsLocal; 2454 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2455 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2456 PetscOptionsEnd(); 2457 if (!flg) { 2458 n = 1; 2459 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2460 } 2461 2462 bsp = 0; 2463 for (p = 0; p < n; p++) { 2464 PetscInt bs; 2465 2466 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2467 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2468 bsp += bs; 2469 } 2470 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2471 bsp = 0; 2472 for (p = 0; p < n; p++) { 2473 const PetscInt *idxs; 2474 PetscInt b, bs, npl, *bidxs; 2475 2476 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2477 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2478 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2479 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2480 for (b = 0; b < bs; b++) { 2481 PetscInt i; 2482 2483 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2484 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2485 bsp++; 2486 } 2487 PetscCall(PetscFree(bidxs)); 2488 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2489 } 2490 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2491 2492 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2493 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2494 if (iP) { 2495 IS newpressures; 2496 2497 PetscCall(ISDifference(pressures, iP, &newpressures)); 2498 PetscCall(ISDestroy(&pressures)); 2499 pressures = newpressures; 2500 } 2501 PetscCall(ISSorted(pressures, &sorted)); 2502 if (!sorted) PetscCall(ISSort(pressures)); 2503 PetscCall(PetscFree(pp)); 2504 } 2505 2506 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2507 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2508 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2509 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2510 PetscCall(ISSorted(zerodiag, &sorted)); 2511 if (!sorted) PetscCall(ISSort(zerodiag)); 2512 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2513 zerodiag_save = zerodiag; 2514 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2515 if (!nz) { 2516 if (n) have_null = PETSC_FALSE; 2517 has_null_pressures = PETSC_FALSE; 2518 PetscCall(ISDestroy(&zerodiag)); 2519 } 2520 recompute_zerodiag = PETSC_FALSE; 2521 2522 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2523 zerodiag_subs = NULL; 2524 benign_n = 0; 2525 n_interior_dofs = 0; 2526 interior_dofs = NULL; 2527 nneu = 0; 2528 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2529 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2530 if (checkb) { /* need to compute interior nodes */ 2531 PetscInt n, i, j; 2532 PetscInt n_neigh, *neigh, *n_shared, **shared; 2533 PetscInt *iwork; 2534 2535 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping, &n)); 2536 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2537 PetscCall(PetscCalloc1(n, &iwork)); 2538 PetscCall(PetscMalloc1(n, &interior_dofs)); 2539 for (i = 1; i < n_neigh; i++) 2540 for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1; 2541 for (i = 0; i < n; i++) 2542 if (!iwork[i]) interior_dofs[n_interior_dofs++] = i; 2543 PetscCall(PetscFree(iwork)); 2544 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2545 } 2546 if (has_null_pressures) { 2547 IS *subs; 2548 PetscInt nsubs, i, j, nl; 2549 const PetscInt *idxs; 2550 PetscScalar *array; 2551 Vec *work; 2552 2553 subs = pcbddc->local_subs; 2554 nsubs = pcbddc->n_local_subs; 2555 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2556 if (checkb) { 2557 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2558 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2559 PetscCall(ISGetIndices(zerodiag, &idxs)); 2560 /* work[0] = 1_p */ 2561 PetscCall(VecSet(work[0], 0.)); 2562 PetscCall(VecGetArray(work[0], &array)); 2563 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2564 PetscCall(VecRestoreArray(work[0], &array)); 2565 /* work[0] = 1_v */ 2566 PetscCall(VecSet(work[1], 1.)); 2567 PetscCall(VecGetArray(work[1], &array)); 2568 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2569 PetscCall(VecRestoreArray(work[1], &array)); 2570 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2571 } 2572 2573 if (nsubs > 1 || bsp > 1) { 2574 IS *is; 2575 PetscInt b, totb; 2576 2577 totb = bsp; 2578 is = bsp > 1 ? bzerodiag : &zerodiag; 2579 nsubs = PetscMax(nsubs, 1); 2580 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2581 for (b = 0; b < totb; b++) { 2582 for (i = 0; i < nsubs; i++) { 2583 ISLocalToGlobalMapping l2g; 2584 IS t_zerodiag_subs; 2585 PetscInt nl; 2586 2587 if (subs) { 2588 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2589 } else { 2590 IS tis; 2591 2592 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2593 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2594 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2595 PetscCall(ISDestroy(&tis)); 2596 } 2597 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2598 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2599 if (nl) { 2600 PetscBool valid = PETSC_TRUE; 2601 2602 if (checkb) { 2603 PetscCall(VecSet(matis->x, 0)); 2604 PetscCall(ISGetLocalSize(subs[i], &nl)); 2605 PetscCall(ISGetIndices(subs[i], &idxs)); 2606 PetscCall(VecGetArray(matis->x, &array)); 2607 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2608 PetscCall(VecRestoreArray(matis->x, &array)); 2609 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2610 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2611 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2612 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2613 PetscCall(VecGetArray(matis->y, &array)); 2614 for (j = 0; j < n_interior_dofs; j++) { 2615 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2616 valid = PETSC_FALSE; 2617 break; 2618 } 2619 } 2620 PetscCall(VecRestoreArray(matis->y, &array)); 2621 } 2622 if (valid && nneu) { 2623 const PetscInt *idxs; 2624 PetscInt nzb; 2625 2626 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2627 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2628 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2629 if (nzb) valid = PETSC_FALSE; 2630 } 2631 if (valid && pressures) { 2632 IS t_pressure_subs, tmp; 2633 PetscInt i1, i2; 2634 2635 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2636 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2637 PetscCall(ISGetLocalSize(tmp, &i1)); 2638 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2639 if (i2 != i1) valid = PETSC_FALSE; 2640 PetscCall(ISDestroy(&t_pressure_subs)); 2641 PetscCall(ISDestroy(&tmp)); 2642 } 2643 if (valid) { 2644 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2645 benign_n++; 2646 } else recompute_zerodiag = PETSC_TRUE; 2647 } 2648 PetscCall(ISDestroy(&t_zerodiag_subs)); 2649 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2650 } 2651 } 2652 } else { /* there's just one subdomain (or zero if they have not been detected */ 2653 PetscBool valid = PETSC_TRUE; 2654 2655 if (nneu) valid = PETSC_FALSE; 2656 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2657 if (valid && checkb) { 2658 PetscCall(MatMult(matis->A, work[0], matis->x)); 2659 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2660 PetscCall(VecGetArray(matis->x, &array)); 2661 for (j = 0; j < n_interior_dofs; j++) { 2662 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2663 valid = PETSC_FALSE; 2664 break; 2665 } 2666 } 2667 PetscCall(VecRestoreArray(matis->x, &array)); 2668 } 2669 if (valid) { 2670 benign_n = 1; 2671 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2672 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2673 zerodiag_subs[0] = zerodiag; 2674 } 2675 } 2676 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2677 } 2678 PetscCall(PetscFree(interior_dofs)); 2679 2680 if (!benign_n) { 2681 PetscInt n; 2682 2683 PetscCall(ISDestroy(&zerodiag)); 2684 recompute_zerodiag = PETSC_FALSE; 2685 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2686 if (n) have_null = PETSC_FALSE; 2687 } 2688 2689 /* final check for null pressures */ 2690 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2691 2692 if (recompute_zerodiag) { 2693 PetscCall(ISDestroy(&zerodiag)); 2694 if (benign_n == 1) { 2695 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2696 zerodiag = zerodiag_subs[0]; 2697 } else { 2698 PetscInt i, nzn, *new_idxs; 2699 2700 nzn = 0; 2701 for (i = 0; i < benign_n; i++) { 2702 PetscInt ns; 2703 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2704 nzn += ns; 2705 } 2706 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2707 nzn = 0; 2708 for (i = 0; i < benign_n; i++) { 2709 PetscInt ns, *idxs; 2710 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2711 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2712 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2713 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2714 nzn += ns; 2715 } 2716 PetscCall(PetscSortInt(nzn, new_idxs)); 2717 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2718 } 2719 have_null = PETSC_FALSE; 2720 } 2721 2722 /* determines if the coarse solver will be singular or not */ 2723 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2724 2725 /* Prepare matrix to compute no-net-flux */ 2726 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2727 Mat A, loc_divudotp; 2728 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2729 IS row, col, isused = NULL; 2730 PetscInt M, N, n, st, n_isused; 2731 2732 if (pressures) { 2733 isused = pressures; 2734 } else { 2735 isused = zerodiag_save; 2736 } 2737 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2738 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2739 PetscCall(MatGetLocalSize(A, &n, NULL)); 2740 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2741 n_isused = 0; 2742 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 2743 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2744 st = st - n_isused; 2745 if (n) { 2746 const PetscInt *gidxs; 2747 2748 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2749 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2750 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2751 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2752 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2753 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2754 } else { 2755 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 2756 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2757 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 2758 } 2759 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 2760 PetscCall(ISGetSize(row, &M)); 2761 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 2762 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 2763 PetscCall(ISDestroy(&row)); 2764 PetscCall(ISDestroy(&col)); 2765 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 2766 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 2767 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 2768 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 2769 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2770 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2771 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 2772 PetscCall(MatDestroy(&loc_divudotp)); 2773 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2774 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2775 } 2776 PetscCall(ISDestroy(&zerodiag_save)); 2777 PetscCall(ISDestroy(&pressures)); 2778 if (bzerodiag) { 2779 PetscInt i; 2780 2781 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 2782 PetscCall(PetscFree(bzerodiag)); 2783 } 2784 pcbddc->benign_n = benign_n; 2785 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2786 2787 /* determines if the problem has subdomains with 0 pressure block */ 2788 have_null = (PetscBool)(!!pcbddc->benign_n); 2789 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 2790 2791 project_b0: 2792 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2793 /* change of basis and p0 dofs */ 2794 if (pcbddc->benign_n) { 2795 PetscInt i, s, *nnz; 2796 2797 /* local change of basis for pressures */ 2798 PetscCall(MatDestroy(&pcbddc->benign_change)); 2799 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 2800 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 2801 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 2802 PetscCall(PetscMalloc1(n, &nnz)); 2803 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 2804 for (i = 0; i < pcbddc->benign_n; i++) { 2805 const PetscInt *idxs; 2806 PetscInt nzs, j; 2807 2808 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 2809 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2810 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 2811 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 2812 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2813 } 2814 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 2815 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2816 PetscCall(PetscFree(nnz)); 2817 /* set identity by default */ 2818 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 2819 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 2820 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 2821 /* set change on pressures */ 2822 for (s = 0; s < pcbddc->benign_n; s++) { 2823 PetscScalar *array; 2824 const PetscInt *idxs; 2825 PetscInt nzs; 2826 2827 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 2828 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2829 for (i = 0; i < nzs - 1; i++) { 2830 PetscScalar vals[2]; 2831 PetscInt cols[2]; 2832 2833 cols[0] = idxs[i]; 2834 cols[1] = idxs[nzs - 1]; 2835 vals[0] = 1.; 2836 vals[1] = 1.; 2837 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 2838 } 2839 PetscCall(PetscMalloc1(nzs, &array)); 2840 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 2841 array[nzs - 1] = 1.; 2842 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 2843 /* store local idxs for p0 */ 2844 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 2845 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2846 PetscCall(PetscFree(array)); 2847 } 2848 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2849 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2850 2851 /* project if needed */ 2852 if (pcbddc->benign_change_explicit) { 2853 Mat M; 2854 2855 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 2856 PetscCall(MatDestroy(&pcbddc->local_mat)); 2857 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 2858 PetscCall(MatDestroy(&M)); 2859 } 2860 /* store global idxs for p0 */ 2861 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 2862 } 2863 *zerodiaglocal = zerodiag; 2864 PetscFunctionReturn(0); 2865 } 2866 2867 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2868 { 2869 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2870 PetscScalar *array; 2871 2872 PetscFunctionBegin; 2873 if (!pcbddc->benign_sf) { 2874 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 2875 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 2876 } 2877 if (get) { 2878 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 2879 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2880 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2881 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 2882 } else { 2883 PetscCall(VecGetArray(v, &array)); 2884 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2885 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2886 PetscCall(VecRestoreArray(v, &array)); 2887 } 2888 PetscFunctionReturn(0); 2889 } 2890 2891 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2892 { 2893 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2894 2895 PetscFunctionBegin; 2896 /* TODO: add error checking 2897 - avoid nested pop (or push) calls. 2898 - cannot push before pop. 2899 - cannot call this if pcbddc->local_mat is NULL 2900 */ 2901 if (!pcbddc->benign_n) PetscFunctionReturn(0); 2902 if (pop) { 2903 if (pcbddc->benign_change_explicit) { 2904 IS is_p0; 2905 MatReuse reuse; 2906 2907 /* extract B_0 */ 2908 reuse = MAT_INITIAL_MATRIX; 2909 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 2910 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 2911 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 2912 /* remove rows and cols from local problem */ 2913 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 2914 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 2915 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 2916 PetscCall(ISDestroy(&is_p0)); 2917 } else { 2918 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2919 PetscScalar *vals; 2920 PetscInt i, n, *idxs_ins; 2921 2922 PetscCall(VecGetLocalSize(matis->y, &n)); 2923 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 2924 if (!pcbddc->benign_B0) { 2925 PetscInt *nnz; 2926 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 2927 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 2928 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 2929 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 2930 for (i = 0; i < pcbddc->benign_n; i++) { 2931 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 2932 nnz[i] = n - nnz[i]; 2933 } 2934 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 2935 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2936 PetscCall(PetscFree(nnz)); 2937 } 2938 2939 for (i = 0; i < pcbddc->benign_n; i++) { 2940 PetscScalar *array; 2941 PetscInt *idxs, j, nz, cum; 2942 2943 PetscCall(VecSet(matis->x, 0.)); 2944 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 2945 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 2946 for (j = 0; j < nz; j++) vals[j] = 1.; 2947 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 2948 PetscCall(VecAssemblyBegin(matis->x)); 2949 PetscCall(VecAssemblyEnd(matis->x)); 2950 PetscCall(VecSet(matis->y, 0.)); 2951 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2952 PetscCall(VecGetArray(matis->y, &array)); 2953 cum = 0; 2954 for (j = 0; j < n; j++) { 2955 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2956 vals[cum] = array[j]; 2957 idxs_ins[cum] = j; 2958 cum++; 2959 } 2960 } 2961 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 2962 PetscCall(VecRestoreArray(matis->y, &array)); 2963 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 2964 } 2965 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 2966 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 2967 PetscCall(PetscFree2(idxs_ins, vals)); 2968 } 2969 } else { /* push */ 2970 if (pcbddc->benign_change_explicit) { 2971 PetscInt i; 2972 2973 for (i = 0; i < pcbddc->benign_n; i++) { 2974 PetscScalar *B0_vals; 2975 PetscInt *B0_cols, B0_ncol; 2976 2977 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 2978 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 2979 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 2980 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 2981 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 2982 } 2983 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 2984 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 2985 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 2986 } 2987 PetscFunctionReturn(0); 2988 } 2989 2990 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2991 { 2992 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2993 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2994 PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork; 2995 PetscBLASInt *B_iwork, *B_ifail; 2996 PetscScalar *work, lwork; 2997 PetscScalar *St, *S, *eigv; 2998 PetscScalar *Sarray, *Starray; 2999 PetscReal *eigs, thresh, lthresh, uthresh; 3000 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3001 PetscBool allocated_S_St, upart; 3002 #if defined(PETSC_USE_COMPLEX) 3003 PetscReal *rwork; 3004 #endif 3005 3006 PetscFunctionBegin; 3007 if (!pcbddc->adaptive_selection) PetscFunctionReturn(0); 3008 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3009 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3010 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3011 sub_schurs->is_posdef); 3012 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3013 3014 if (pcbddc->dbg_flag) { 3015 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3016 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3017 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3018 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3019 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3020 } 3021 3022 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3023 3024 /* max size of subsets */ 3025 mss = 0; 3026 for (i = 0; i < sub_schurs->n_subs; i++) { 3027 PetscInt subset_size; 3028 3029 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3030 mss = PetscMax(mss, subset_size); 3031 } 3032 3033 /* min/max and threshold */ 3034 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3035 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3036 nmax = PetscMax(nmin, nmax); 3037 allocated_S_St = PETSC_FALSE; 3038 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3039 allocated_S_St = PETSC_TRUE; 3040 } 3041 3042 /* allocate lapack workspace */ 3043 cum = cum2 = 0; 3044 maxneigs = 0; 3045 for (i = 0; i < sub_schurs->n_subs; i++) { 3046 PetscInt n, subset_size; 3047 3048 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3049 n = PetscMin(subset_size, nmax); 3050 cum += subset_size; 3051 cum2 += subset_size * n; 3052 maxneigs = PetscMax(maxneigs, n); 3053 } 3054 lwork = 0; 3055 if (mss) { 3056 if (sub_schurs->is_symmetric) { 3057 PetscScalar sdummy = 0.; 3058 PetscBLASInt B_itype = 1; 3059 PetscBLASInt B_N = mss, idummy = 0; 3060 PetscReal rdummy = 0., zero = 0.0; 3061 PetscReal eps = 0.0; /* dlamch? */ 3062 3063 B_lwork = -1; 3064 /* some implementations may complain about NULL pointers, even if we are querying */ 3065 S = &sdummy; 3066 St = &sdummy; 3067 eigs = &rdummy; 3068 eigv = &sdummy; 3069 B_iwork = &idummy; 3070 B_ifail = &idummy; 3071 #if defined(PETSC_USE_COMPLEX) 3072 rwork = &rdummy; 3073 #endif 3074 thresh = 1.0; 3075 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3076 #if defined(PETSC_USE_COMPLEX) 3077 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3078 #else 3079 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3080 #endif 3081 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr); 3082 PetscCall(PetscFPTrapPop()); 3083 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3084 } 3085 3086 nv = 0; 3087 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3088 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3089 } 3090 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3091 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3092 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3093 #if defined(PETSC_USE_COMPLEX) 3094 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3095 #endif 3096 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3097 &pcbddc->adaptive_constraints_data)); 3098 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3099 3100 maxneigs = 0; 3101 cum = cumarray = 0; 3102 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3103 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3104 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3105 const PetscInt *idxs; 3106 3107 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3108 for (cum = 0; cum < nv; cum++) { 3109 pcbddc->adaptive_constraints_n[cum] = 1; 3110 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3111 pcbddc->adaptive_constraints_data[cum] = 1.0; 3112 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3113 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3114 } 3115 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3116 } 3117 3118 if (mss) { /* multilevel */ 3119 if (sub_schurs->gdsw) { 3120 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3121 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3122 } else { 3123 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3124 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3125 } 3126 } 3127 3128 lthresh = pcbddc->adaptive_threshold[0]; 3129 uthresh = pcbddc->adaptive_threshold[1]; 3130 upart = pcbddc->use_deluxe_scaling; 3131 for (i = 0; i < sub_schurs->n_subs; i++) { 3132 const PetscInt *idxs; 3133 PetscReal upper, lower; 3134 PetscInt j, subset_size, eigs_start = 0; 3135 PetscBLASInt B_N; 3136 PetscBool same_data = PETSC_FALSE; 3137 PetscBool scal = PETSC_FALSE; 3138 3139 if (upart) { 3140 upper = PETSC_MAX_REAL; 3141 lower = uthresh; 3142 } else { 3143 if (sub_schurs->gdsw) { 3144 upper = uthresh; 3145 lower = PETSC_MIN_REAL; 3146 } else { 3147 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3148 upper = 1. / uthresh; 3149 lower = 0.; 3150 } 3151 } 3152 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3153 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3154 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3155 /* this is experimental: we assume the dofs have been properly grouped to have 3156 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3157 if (!sub_schurs->is_posdef) { 3158 Mat T; 3159 3160 for (j = 0; j < subset_size; j++) { 3161 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3162 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3163 PetscCall(MatScale(T, -1.0)); 3164 PetscCall(MatDestroy(&T)); 3165 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3166 PetscCall(MatScale(T, -1.0)); 3167 PetscCall(MatDestroy(&T)); 3168 if (sub_schurs->change_primal_sub) { 3169 PetscInt nz, k; 3170 const PetscInt *idxs; 3171 3172 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3173 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3174 for (k = 0; k < nz; k++) { 3175 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3176 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3177 } 3178 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3179 } 3180 scal = PETSC_TRUE; 3181 break; 3182 } 3183 } 3184 } 3185 3186 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3187 if (sub_schurs->is_symmetric) { 3188 PetscInt j, k; 3189 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3190 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3191 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3192 } 3193 for (j = 0; j < subset_size; j++) { 3194 for (k = j; k < subset_size; k++) { 3195 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3196 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3197 } 3198 } 3199 } else { 3200 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3201 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3202 } 3203 } else { 3204 S = Sarray + cumarray; 3205 St = Starray + cumarray; 3206 } 3207 /* see if we can save some work */ 3208 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3209 3210 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3211 B_neigs = 0; 3212 } else { 3213 if (sub_schurs->is_symmetric) { 3214 PetscBLASInt B_itype = 1; 3215 PetscBLASInt B_IL, B_IU; 3216 PetscReal eps = -1.0; /* dlamch? */ 3217 PetscInt nmin_s; 3218 PetscBool compute_range; 3219 3220 B_neigs = 0; 3221 compute_range = (PetscBool)!same_data; 3222 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3223 3224 if (pcbddc->dbg_flag) { 3225 PetscInt nc = 0; 3226 3227 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3228 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, 3229 sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc)); 3230 } 3231 3232 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3233 if (compute_range) { 3234 /* ask for eigenvalues larger than thresh */ 3235 if (sub_schurs->is_posdef) { 3236 #if defined(PETSC_USE_COMPLEX) 3237 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3238 #else 3239 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3240 #endif 3241 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3242 } else { /* no theory so far, but it works nicely */ 3243 PetscInt recipe = 0, recipe_m = 1; 3244 PetscReal bb[2]; 3245 3246 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3247 switch (recipe) { 3248 case 0: 3249 if (scal) { 3250 bb[0] = PETSC_MIN_REAL; 3251 bb[1] = lthresh; 3252 } else { 3253 bb[0] = uthresh; 3254 bb[1] = PETSC_MAX_REAL; 3255 } 3256 #if defined(PETSC_USE_COMPLEX) 3257 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3258 #else 3259 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3260 #endif 3261 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3262 break; 3263 case 1: 3264 bb[0] = PETSC_MIN_REAL; 3265 bb[1] = lthresh * lthresh; 3266 #if defined(PETSC_USE_COMPLEX) 3267 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3268 #else 3269 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3270 #endif 3271 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3272 if (!scal) { 3273 PetscBLASInt B_neigs2 = 0; 3274 3275 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3276 bb[1] = PETSC_MAX_REAL; 3277 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3278 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3279 #if defined(PETSC_USE_COMPLEX) 3280 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3281 #else 3282 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3283 #endif 3284 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3285 B_neigs += B_neigs2; 3286 } 3287 break; 3288 case 2: 3289 if (scal) { 3290 bb[0] = PETSC_MIN_REAL; 3291 bb[1] = 0; 3292 #if defined(PETSC_USE_COMPLEX) 3293 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3294 #else 3295 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3296 #endif 3297 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3298 } else { 3299 PetscBLASInt B_neigs2 = 0; 3300 PetscBool import = PETSC_FALSE; 3301 3302 lthresh = PetscMax(lthresh, 0.0); 3303 if (lthresh > 0.0) { 3304 bb[0] = PETSC_MIN_REAL; 3305 bb[1] = lthresh * lthresh; 3306 3307 import = PETSC_TRUE; 3308 #if defined(PETSC_USE_COMPLEX) 3309 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3310 #else 3311 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3312 #endif 3313 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3314 } 3315 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3316 bb[1] = PETSC_MAX_REAL; 3317 if (import) { 3318 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3319 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3320 } 3321 #if defined(PETSC_USE_COMPLEX) 3322 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3323 #else 3324 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3325 #endif 3326 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3327 B_neigs += B_neigs2; 3328 } 3329 break; 3330 case 3: 3331 if (scal) { 3332 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3333 } else { 3334 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3335 } 3336 if (!scal) { 3337 bb[0] = uthresh; 3338 bb[1] = PETSC_MAX_REAL; 3339 #if defined(PETSC_USE_COMPLEX) 3340 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3341 #else 3342 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3343 #endif 3344 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3345 } 3346 if (recipe_m > 0 && B_N - B_neigs > 0) { 3347 PetscBLASInt B_neigs2 = 0; 3348 3349 B_IL = 1; 3350 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3351 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3352 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3353 #if defined(PETSC_USE_COMPLEX) 3354 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3355 #else 3356 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3357 #endif 3358 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3359 B_neigs += B_neigs2; 3360 } 3361 break; 3362 case 4: 3363 bb[0] = PETSC_MIN_REAL; 3364 bb[1] = lthresh; 3365 #if defined(PETSC_USE_COMPLEX) 3366 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3367 #else 3368 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3369 #endif 3370 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3371 { 3372 PetscBLASInt B_neigs2 = 0; 3373 3374 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3375 bb[1] = PETSC_MAX_REAL; 3376 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3377 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3378 #if defined(PETSC_USE_COMPLEX) 3379 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3380 #else 3381 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3382 #endif 3383 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3384 B_neigs += B_neigs2; 3385 } 3386 break; 3387 case 5: /* same as before: first compute all eigenvalues, then filter */ 3388 #if defined(PETSC_USE_COMPLEX) 3389 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3390 #else 3391 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3392 #endif 3393 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3394 { 3395 PetscInt e, k, ne; 3396 for (e = 0, ne = 0; e < B_neigs; e++) { 3397 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3398 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3399 eigs[ne] = eigs[e]; 3400 ne++; 3401 } 3402 } 3403 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3404 B_neigs = ne; 3405 } 3406 break; 3407 default: 3408 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3409 } 3410 } 3411 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3412 B_IU = PetscMax(1, PetscMin(B_N, nmax)); 3413 B_IL = 1; 3414 #if defined(PETSC_USE_COMPLEX) 3415 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3416 #else 3417 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3418 #endif 3419 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3420 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3421 PetscInt k; 3422 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3423 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3424 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3425 nmin = nmax; 3426 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3427 for (k = 0; k < nmax; k++) { 3428 eigs[k] = 1. / PETSC_SMALL; 3429 eigv[k * (subset_size + 1)] = 1.0; 3430 } 3431 } 3432 PetscCall(PetscFPTrapPop()); 3433 if (B_ierr) { 3434 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3435 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3436 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3437 } 3438 3439 if (B_neigs > nmax) { 3440 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3441 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3442 B_neigs = nmax; 3443 } 3444 3445 nmin_s = PetscMin(nmin, B_N); 3446 if (B_neigs < nmin_s) { 3447 PetscBLASInt B_neigs2 = 0; 3448 3449 if (upart) { 3450 if (scal) { 3451 B_IU = nmin_s; 3452 B_IL = B_neigs + 1; 3453 } else { 3454 B_IL = B_N - nmin_s + 1; 3455 B_IU = B_N - B_neigs; 3456 } 3457 } else { 3458 B_IL = B_neigs + 1; 3459 B_IU = nmin_s; 3460 } 3461 if (pcbddc->dbg_flag) { 3462 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3463 } 3464 if (sub_schurs->is_symmetric) { 3465 PetscInt j, k; 3466 for (j = 0; j < subset_size; j++) { 3467 for (k = j; k < subset_size; k++) { 3468 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3469 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3470 } 3471 } 3472 } else { 3473 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3474 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3475 } 3476 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3477 #if defined(PETSC_USE_COMPLEX) 3478 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3479 #else 3480 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3481 #endif 3482 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3483 PetscCall(PetscFPTrapPop()); 3484 B_neigs += B_neigs2; 3485 } 3486 if (B_ierr) { 3487 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3488 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3489 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3490 } 3491 if (pcbddc->dbg_flag) { 3492 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3493 for (j = 0; j < B_neigs; j++) { 3494 if (!sub_schurs->gdsw) { 3495 if (eigs[j] == 0.0) { 3496 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3497 } else { 3498 if (upart) { 3499 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3500 } else { 3501 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start]))); 3502 } 3503 } 3504 } else { 3505 double pg = (double)eigs[j + eigs_start]; 3506 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3507 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3508 } 3509 } 3510 } 3511 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3512 } 3513 /* change the basis back to the original one */ 3514 if (sub_schurs->change) { 3515 Mat change, phi, phit; 3516 3517 if (pcbddc->dbg_flag > 2) { 3518 PetscInt ii; 3519 for (ii = 0; ii < B_neigs; ii++) { 3520 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3521 for (j = 0; j < B_N; j++) { 3522 #if defined(PETSC_USE_COMPLEX) 3523 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3524 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3525 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3526 #else 3527 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3528 #endif 3529 } 3530 } 3531 } 3532 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3533 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3534 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi)); 3535 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3536 PetscCall(MatDestroy(&phit)); 3537 PetscCall(MatDestroy(&phi)); 3538 } 3539 maxneigs = PetscMax(B_neigs, maxneigs); 3540 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3541 if (B_neigs) { 3542 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3543 3544 if (pcbddc->dbg_flag > 1) { 3545 PetscInt ii; 3546 for (ii = 0; ii < B_neigs; ii++) { 3547 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3548 for (j = 0; j < B_N; j++) { 3549 #if defined(PETSC_USE_COMPLEX) 3550 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3551 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3552 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3553 #else 3554 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3555 #endif 3556 } 3557 } 3558 } 3559 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3560 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3561 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3562 cum++; 3563 } 3564 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3565 /* shift for next computation */ 3566 cumarray += subset_size * subset_size; 3567 } 3568 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3569 3570 if (mss) { 3571 if (sub_schurs->gdsw) { 3572 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3573 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3574 } else { 3575 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3576 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3577 /* destroy matrices (junk) */ 3578 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3579 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3580 } 3581 } 3582 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3583 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3584 #if defined(PETSC_USE_COMPLEX) 3585 PetscCall(PetscFree(rwork)); 3586 #endif 3587 if (pcbddc->dbg_flag) { 3588 PetscInt maxneigs_r; 3589 PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3590 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3591 } 3592 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3593 PetscFunctionReturn(0); 3594 } 3595 3596 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3597 { 3598 PetscScalar *coarse_submat_vals; 3599 3600 PetscFunctionBegin; 3601 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3602 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3603 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3604 3605 /* Setup local neumann solver ksp_R */ 3606 /* PCBDDCSetUpLocalScatters should be called first! */ 3607 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3608 3609 /* 3610 Setup local correction and local part of coarse basis. 3611 Gives back the dense local part of the coarse matrix in column major ordering 3612 */ 3613 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat_vals)); 3614 3615 /* Compute total number of coarse nodes and setup coarse solver */ 3616 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals)); 3617 3618 /* free */ 3619 PetscCall(PetscFree(coarse_submat_vals)); 3620 PetscFunctionReturn(0); 3621 } 3622 3623 PetscErrorCode PCBDDCResetCustomization(PC pc) 3624 { 3625 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3626 3627 PetscFunctionBegin; 3628 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3629 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3630 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3631 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3632 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3633 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3634 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3635 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3636 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3637 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3638 PetscFunctionReturn(0); 3639 } 3640 3641 PetscErrorCode PCBDDCResetTopography(PC pc) 3642 { 3643 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3644 PetscInt i; 3645 3646 PetscFunctionBegin; 3647 PetscCall(MatDestroy(&pcbddc->nedcG)); 3648 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3649 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3650 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3651 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3652 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3653 PetscCall(VecDestroy(&pcbddc->work_change)); 3654 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3655 PetscCall(MatDestroy(&pcbddc->divudotp)); 3656 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3657 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3658 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3659 pcbddc->n_local_subs = 0; 3660 PetscCall(PetscFree(pcbddc->local_subs)); 3661 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3662 pcbddc->graphanalyzed = PETSC_FALSE; 3663 pcbddc->recompute_topography = PETSC_TRUE; 3664 pcbddc->corner_selected = PETSC_FALSE; 3665 PetscFunctionReturn(0); 3666 } 3667 3668 PetscErrorCode PCBDDCResetSolvers(PC pc) 3669 { 3670 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3671 3672 PetscFunctionBegin; 3673 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3674 if (pcbddc->coarse_phi_B) { 3675 PetscScalar *array; 3676 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &array)); 3677 PetscCall(PetscFree(array)); 3678 } 3679 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3680 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3681 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3682 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3683 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3684 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3685 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3686 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3687 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3688 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3689 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3690 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3691 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3692 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3693 PetscCall(KSPReset(pcbddc->ksp_D)); 3694 PetscCall(KSPReset(pcbddc->ksp_R)); 3695 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3696 PetscCall(MatDestroy(&pcbddc->local_mat)); 3697 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3698 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3699 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3700 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3701 PetscCall(MatDestroy(&pcbddc->benign_change)); 3702 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3703 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3704 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3705 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3706 if (pcbddc->benign_zerodiag_subs) { 3707 PetscInt i; 3708 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3709 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3710 } 3711 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3712 PetscFunctionReturn(0); 3713 } 3714 3715 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3716 { 3717 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3718 PC_IS *pcis = (PC_IS *)pc->data; 3719 VecType impVecType; 3720 PetscInt n_constraints, n_R, old_size; 3721 3722 PetscFunctionBegin; 3723 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3724 n_R = pcis->n - pcbddc->n_vertices; 3725 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3726 /* local work vectors (try to avoid unneeded work)*/ 3727 /* R nodes */ 3728 old_size = -1; 3729 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3730 if (n_R != old_size) { 3731 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3732 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3733 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3734 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3735 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3736 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3737 } 3738 /* local primal dofs */ 3739 old_size = -1; 3740 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3741 if (pcbddc->local_primal_size != old_size) { 3742 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3743 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3744 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3745 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3746 } 3747 /* local explicit constraints */ 3748 old_size = -1; 3749 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 3750 if (n_constraints && n_constraints != old_size) { 3751 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3752 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3753 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3754 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3755 } 3756 PetscFunctionReturn(0); 3757 } 3758 3759 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3760 { 3761 /* pointers to pcis and pcbddc */ 3762 PC_IS *pcis = (PC_IS *)pc->data; 3763 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3764 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3765 /* submatrices of local problem */ 3766 Mat A_RV, A_VR, A_VV, local_auxmat2_R; 3767 /* submatrices of local coarse problem */ 3768 Mat S_VV, S_CV, S_VC, S_CC; 3769 /* working matrices */ 3770 Mat C_CR; 3771 /* additional working stuff */ 3772 PC pc_R; 3773 Mat F, Brhs = NULL; 3774 Vec dummy_vec; 3775 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 3776 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3777 PetscScalar *work; 3778 PetscInt *idx_V_B; 3779 PetscInt lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I; 3780 PetscInt i, n_R, n_D, n_B; 3781 PetscScalar one = 1.0, m_one = -1.0; 3782 3783 PetscFunctionBegin; 3784 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 3785 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3786 3787 /* Set Non-overlapping dimensions */ 3788 n_vertices = pcbddc->n_vertices; 3789 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3790 n_B = pcis->n_B; 3791 n_D = pcis->n - n_B; 3792 n_R = pcis->n - n_vertices; 3793 3794 /* vertices in boundary numbering */ 3795 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 3796 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 3797 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 3798 3799 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3800 PetscCall(PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals)); 3801 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV)); 3802 PetscCall(MatDenseSetLDA(S_VV, pcbddc->local_primal_size)); 3803 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, coarse_submat_vals + n_vertices, &S_CV)); 3804 PetscCall(MatDenseSetLDA(S_CV, pcbddc->local_primal_size)); 3805 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, coarse_submat_vals + pcbddc->local_primal_size * n_vertices, &S_VC)); 3806 PetscCall(MatDenseSetLDA(S_VC, pcbddc->local_primal_size)); 3807 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, coarse_submat_vals + (pcbddc->local_primal_size + 1) * n_vertices, &S_CC)); 3808 PetscCall(MatDenseSetLDA(S_CC, pcbddc->local_primal_size)); 3809 3810 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3811 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 3812 PetscCall(PCSetUp(pc_R)); 3813 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 3814 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 3815 lda_rhs = n_R; 3816 need_benign_correction = PETSC_FALSE; 3817 if (isLU || isCHOL) { 3818 PetscCall(PCFactorGetMatrix(pc_R, &F)); 3819 } else if (sub_schurs && sub_schurs->reuse_solver) { 3820 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3821 MatFactorType type; 3822 3823 F = reuse_solver->F; 3824 PetscCall(MatGetFactorType(F, &type)); 3825 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3826 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3827 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 3828 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3829 } else F = NULL; 3830 3831 /* determine if we can use a sparse right-hand side */ 3832 sparserhs = PETSC_FALSE; 3833 if (F) { 3834 MatSolverType solver; 3835 3836 PetscCall(MatFactorGetSolverType(F, &solver)); 3837 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 3838 } 3839 3840 /* allocate workspace */ 3841 n = 0; 3842 if (n_constraints) n += lda_rhs * n_constraints; 3843 if (n_vertices) { 3844 n = PetscMax(2 * lda_rhs * n_vertices, n); 3845 n = PetscMax((lda_rhs + n_B) * n_vertices, n); 3846 } 3847 if (!pcbddc->symmetric_primal) n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n); 3848 PetscCall(PetscMalloc1(n, &work)); 3849 3850 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3851 dummy_vec = NULL; 3852 if (need_benign_correction && lda_rhs != n_R && F) { 3853 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 3854 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 3855 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 3856 } 3857 3858 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3859 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3860 3861 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3862 if (n_constraints) { 3863 Mat M3, C_B; 3864 IS is_aux; 3865 3866 /* Extract constraints on R nodes: C_{CR} */ 3867 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux)); 3868 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 3869 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 3870 3871 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3872 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3873 if (!sparserhs) { 3874 PetscCall(PetscArrayzero(work, lda_rhs * n_constraints)); 3875 for (i = 0; i < n_constraints; i++) { 3876 const PetscScalar *row_cmat_values; 3877 const PetscInt *row_cmat_indices; 3878 PetscInt size_of_constraint, j; 3879 3880 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3881 for (j = 0; j < size_of_constraint; j++) work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j]; 3882 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3883 } 3884 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs)); 3885 } else { 3886 Mat tC_CR; 3887 3888 PetscCall(MatScale(C_CR, -1.0)); 3889 if (lda_rhs != n_R) { 3890 PetscScalar *aa; 3891 PetscInt r, *ii, *jj; 3892 PetscBool done; 3893 3894 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3895 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 3896 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 3897 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 3898 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3899 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 3900 } else { 3901 PetscCall(PetscObjectReference((PetscObject)C_CR)); 3902 tC_CR = C_CR; 3903 } 3904 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 3905 PetscCall(MatDestroy(&tC_CR)); 3906 } 3907 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R)); 3908 if (F) { 3909 if (need_benign_correction) { 3910 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3911 3912 /* rhs is already zero on interior dofs, no need to change the rhs */ 3913 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 3914 } 3915 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 3916 if (need_benign_correction) { 3917 PetscScalar *marr; 3918 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3919 3920 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 3921 if (lda_rhs != n_R) { 3922 for (i = 0; i < n_constraints; i++) { 3923 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 3924 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 3925 PetscCall(VecResetArray(dummy_vec)); 3926 } 3927 } else { 3928 for (i = 0; i < n_constraints; i++) { 3929 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 3930 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 3931 PetscCall(VecResetArray(pcbddc->vec1_R)); 3932 } 3933 } 3934 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 3935 } 3936 } else { 3937 PetscScalar *marr; 3938 3939 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 3940 for (i = 0; i < n_constraints; i++) { 3941 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 3942 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 3943 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 3944 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 3945 PetscCall(VecResetArray(pcbddc->vec1_R)); 3946 PetscCall(VecResetArray(pcbddc->vec2_R)); 3947 } 3948 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 3949 } 3950 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 3951 PetscCall(MatDestroy(&Brhs)); 3952 if (!pcbddc->switch_static) { 3953 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2)); 3954 for (i = 0; i < n_constraints; i++) { 3955 Vec r, b; 3956 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 3957 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 3958 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 3959 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 3960 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 3961 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 3962 } 3963 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 3964 } else { 3965 if (lda_rhs != n_R) { 3966 IS dummy; 3967 3968 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy)); 3969 PetscCall(MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 3970 PetscCall(ISDestroy(&dummy)); 3971 } else { 3972 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 3973 pcbddc->local_auxmat2 = local_auxmat2_R; 3974 } 3975 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 3976 } 3977 PetscCall(ISDestroy(&is_aux)); 3978 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 3979 PetscCall(MatScale(M3, m_one)); 3980 if (isCHOL) { 3981 PetscCall(MatCholeskyFactor(M3, NULL, NULL)); 3982 } else { 3983 PetscCall(MatLUFactor(M3, NULL, NULL, NULL)); 3984 } 3985 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 3986 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3987 PetscCall(MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1)); 3988 PetscCall(MatDestroy(&C_B)); 3989 PetscCall(MatCopy(M3, S_CC, SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3990 PetscCall(MatDestroy(&M3)); 3991 } 3992 3993 /* Get submatrices from subdomain matrix */ 3994 if (n_vertices) { 3995 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 3996 PetscBool oldpin; 3997 #endif 3998 PetscBool isaij; 3999 IS is_aux; 4000 4001 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4002 IS tis; 4003 4004 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4005 PetscCall(ISSort(tis)); 4006 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4007 PetscCall(ISDestroy(&tis)); 4008 } else { 4009 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4010 } 4011 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4012 oldpin = pcbddc->local_mat->boundtocpu; 4013 #endif 4014 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4015 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4016 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4017 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij)); 4018 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4019 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4020 } 4021 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4022 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4023 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4024 #endif 4025 PetscCall(ISDestroy(&is_aux)); 4026 } 4027 4028 /* Matrix of coarse basis functions (local) */ 4029 if (pcbddc->coarse_phi_B) { 4030 PetscInt on_B, on_primal, on_D = n_D; 4031 if (pcbddc->coarse_phi_D) PetscCall(MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL)); 4032 PetscCall(MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal)); 4033 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4034 PetscScalar *marray; 4035 4036 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &marray)); 4037 PetscCall(PetscFree(marray)); 4038 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4039 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4040 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4041 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4042 } 4043 } 4044 4045 if (!pcbddc->coarse_phi_B) { 4046 PetscScalar *marr; 4047 4048 /* memory size */ 4049 n = n_B * pcbddc->local_primal_size; 4050 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size; 4051 if (!pcbddc->symmetric_primal) n *= 2; 4052 PetscCall(PetscCalloc1(n, &marr)); 4053 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B)); 4054 marr += n_B * pcbddc->local_primal_size; 4055 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4056 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D)); 4057 marr += n_D * pcbddc->local_primal_size; 4058 } 4059 if (!pcbddc->symmetric_primal) { 4060 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B)); 4061 marr += n_B * pcbddc->local_primal_size; 4062 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D)); 4063 } else { 4064 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4065 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4066 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4067 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4068 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4069 } 4070 } 4071 } 4072 4073 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4074 p0_lidx_I = NULL; 4075 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4076 const PetscInt *idxs; 4077 4078 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4079 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4080 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4081 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4082 } 4083 4084 /* vertices */ 4085 if (n_vertices) { 4086 PetscBool restoreavr = PETSC_FALSE; 4087 4088 PetscCall(MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV)); 4089 4090 if (n_R) { 4091 Mat A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */ 4092 PetscBLASInt B_N, B_one = 1; 4093 const PetscScalar *x; 4094 PetscScalar *y; 4095 4096 PetscCall(MatScale(A_RV, m_one)); 4097 if (need_benign_correction) { 4098 ISLocalToGlobalMapping RtoN; 4099 IS is_p0; 4100 PetscInt *idxs_p0, n; 4101 4102 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4103 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4104 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4105 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4106 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4107 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4108 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4109 PetscCall(ISDestroy(&is_p0)); 4110 } 4111 4112 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV)); 4113 if (!sparserhs || need_benign_correction) { 4114 if (lda_rhs == n_R) { 4115 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4116 } else { 4117 PetscScalar *av, *array; 4118 const PetscInt *xadj, *adjncy; 4119 PetscInt n; 4120 PetscBool flg_row; 4121 4122 array = work + lda_rhs * n_vertices; 4123 PetscCall(PetscArrayzero(array, lda_rhs * n_vertices)); 4124 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4125 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4126 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4127 for (i = 0; i < n; i++) { 4128 PetscInt j; 4129 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j]; 4130 } 4131 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4132 PetscCall(MatDestroy(&A_RV)); 4133 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV)); 4134 } 4135 if (need_benign_correction) { 4136 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4137 PetscScalar *marr; 4138 4139 PetscCall(MatDenseGetArray(A_RV, &marr)); 4140 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4141 4142 | 0 0 0 | (V) 4143 L = | 0 0 -1 | (P-p0) 4144 | 0 0 -1 | (p0) 4145 4146 */ 4147 for (i = 0; i < reuse_solver->benign_n; i++) { 4148 const PetscScalar *vals; 4149 const PetscInt *idxs, *idxs_zero; 4150 PetscInt n, j, nz; 4151 4152 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4153 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4154 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4155 for (j = 0; j < n; j++) { 4156 PetscScalar val = vals[j]; 4157 PetscInt k, col = idxs[j]; 4158 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4159 } 4160 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4161 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4162 } 4163 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4164 } 4165 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4166 Brhs = A_RV; 4167 } else { 4168 Mat tA_RVT, A_RVT; 4169 4170 if (!pcbddc->symmetric_primal) { 4171 /* A_RV already scaled by -1 */ 4172 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4173 } else { 4174 restoreavr = PETSC_TRUE; 4175 PetscCall(MatScale(A_VR, -1.0)); 4176 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4177 A_RVT = A_VR; 4178 } 4179 if (lda_rhs != n_R) { 4180 PetscScalar *aa; 4181 PetscInt r, *ii, *jj; 4182 PetscBool done; 4183 4184 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4185 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4186 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4187 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4188 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4189 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4190 } else { 4191 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4192 tA_RVT = A_RVT; 4193 } 4194 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4195 PetscCall(MatDestroy(&tA_RVT)); 4196 PetscCall(MatDestroy(&A_RVT)); 4197 } 4198 if (F) { 4199 /* need to correct the rhs */ 4200 if (need_benign_correction) { 4201 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4202 PetscScalar *marr; 4203 4204 PetscCall(MatDenseGetArray(Brhs, &marr)); 4205 if (lda_rhs != n_R) { 4206 for (i = 0; i < n_vertices; i++) { 4207 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4208 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4209 PetscCall(VecResetArray(dummy_vec)); 4210 } 4211 } else { 4212 for (i = 0; i < n_vertices; i++) { 4213 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4214 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4215 PetscCall(VecResetArray(pcbddc->vec1_R)); 4216 } 4217 } 4218 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4219 } 4220 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4221 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4222 /* need to correct the solution */ 4223 if (need_benign_correction) { 4224 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4225 PetscScalar *marr; 4226 4227 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4228 if (lda_rhs != n_R) { 4229 for (i = 0; i < n_vertices; i++) { 4230 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4231 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4232 PetscCall(VecResetArray(dummy_vec)); 4233 } 4234 } else { 4235 for (i = 0; i < n_vertices; i++) { 4236 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4237 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4238 PetscCall(VecResetArray(pcbddc->vec1_R)); 4239 } 4240 } 4241 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4242 } 4243 } else { 4244 PetscCall(MatDenseGetArray(Brhs, &y)); 4245 for (i = 0; i < n_vertices; i++) { 4246 PetscCall(VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs)); 4247 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs)); 4248 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4249 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4250 PetscCall(VecResetArray(pcbddc->vec1_R)); 4251 PetscCall(VecResetArray(pcbddc->vec2_R)); 4252 } 4253 PetscCall(MatDenseRestoreArray(Brhs, &y)); 4254 } 4255 PetscCall(MatDestroy(&A_RV)); 4256 PetscCall(MatDestroy(&Brhs)); 4257 /* S_VV and S_CV */ 4258 if (n_constraints) { 4259 Mat B; 4260 4261 PetscCall(PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices)); 4262 for (i = 0; i < n_vertices; i++) { 4263 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 4264 PetscCall(VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B)); 4265 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4266 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4267 PetscCall(VecResetArray(pcis->vec1_B)); 4268 PetscCall(VecResetArray(pcbddc->vec1_R)); 4269 } 4270 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B)); 4271 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4272 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV)); 4273 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4274 PetscCall(MatProductSetFromOptions(S_CV)); 4275 PetscCall(MatProductSymbolic(S_CV)); 4276 PetscCall(MatProductNumeric(S_CV)); 4277 PetscCall(MatProductClear(S_CV)); 4278 4279 PetscCall(MatDestroy(&B)); 4280 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B)); 4281 /* Reuse B = local_auxmat2_R * S_CV */ 4282 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B)); 4283 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4284 PetscCall(MatProductSetFromOptions(B)); 4285 PetscCall(MatProductSymbolic(B)); 4286 PetscCall(MatProductNumeric(B)); 4287 4288 PetscCall(MatScale(S_CV, m_one)); 4289 PetscCall(PetscBLASIntCast(lda_rhs * n_vertices, &B_N)); 4290 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one)); 4291 PetscCall(MatDestroy(&B)); 4292 } 4293 if (lda_rhs != n_R) { 4294 PetscCall(MatDestroy(&A_RRmA_RV)); 4295 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV)); 4296 PetscCall(MatDenseSetLDA(A_RRmA_RV, lda_rhs)); 4297 } 4298 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt)); 4299 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4300 if (need_benign_correction) { 4301 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4302 PetscScalar *marr, *sums; 4303 4304 PetscCall(PetscMalloc1(n_vertices, &sums)); 4305 PetscCall(MatDenseGetArray(S_VVt, &marr)); 4306 for (i = 0; i < reuse_solver->benign_n; i++) { 4307 const PetscScalar *vals; 4308 const PetscInt *idxs, *idxs_zero; 4309 PetscInt n, j, nz; 4310 4311 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4312 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4313 for (j = 0; j < n_vertices; j++) { 4314 PetscInt k; 4315 sums[j] = 0.; 4316 for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs]; 4317 } 4318 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4319 for (j = 0; j < n; j++) { 4320 PetscScalar val = vals[j]; 4321 PetscInt k; 4322 for (k = 0; k < n_vertices; k++) marr[idxs[j] + k * n_vertices] += val * sums[k]; 4323 } 4324 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4325 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4326 } 4327 PetscCall(PetscFree(sums)); 4328 PetscCall(MatDenseRestoreArray(S_VVt, &marr)); 4329 PetscCall(MatDestroy(&A_RV_bcorr)); 4330 } 4331 PetscCall(MatDestroy(&A_RRmA_RV)); 4332 PetscCall(PetscBLASIntCast(n_vertices * n_vertices, &B_N)); 4333 PetscCall(MatDenseGetArrayRead(A_VV, &x)); 4334 PetscCall(MatDenseGetArray(S_VVt, &y)); 4335 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one)); 4336 PetscCall(MatDenseRestoreArrayRead(A_VV, &x)); 4337 PetscCall(MatDenseRestoreArray(S_VVt, &y)); 4338 PetscCall(MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN)); 4339 PetscCall(MatDestroy(&S_VVt)); 4340 } else { 4341 PetscCall(MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN)); 4342 } 4343 PetscCall(MatDestroy(&A_VV)); 4344 4345 /* coarse basis functions */ 4346 for (i = 0; i < n_vertices; i++) { 4347 Vec v; 4348 PetscScalar one = 1.0, zero = 0.0; 4349 4350 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4351 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v)); 4352 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4353 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4354 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4355 PetscMPIInt rank; 4356 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank)); 4357 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4358 } 4359 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4360 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4361 PetscCall(VecAssemblyEnd(v)); 4362 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v)); 4363 4364 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4365 PetscInt j; 4366 4367 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v)); 4368 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4369 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4370 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4371 PetscMPIInt rank; 4372 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank)); 4373 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4374 } 4375 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4376 PetscCall(VecAssemblyBegin(v)); 4377 PetscCall(VecAssemblyEnd(v)); 4378 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v)); 4379 } 4380 PetscCall(VecResetArray(pcbddc->vec1_R)); 4381 } 4382 /* if n_R == 0 the object is not destroyed */ 4383 PetscCall(MatDestroy(&A_RV)); 4384 } 4385 PetscCall(VecDestroy(&dummy_vec)); 4386 4387 if (n_constraints) { 4388 Mat B; 4389 4390 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B)); 4391 PetscCall(MatScale(S_CC, m_one)); 4392 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B)); 4393 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4394 PetscCall(MatProductSetFromOptions(B)); 4395 PetscCall(MatProductSymbolic(B)); 4396 PetscCall(MatProductNumeric(B)); 4397 4398 PetscCall(MatScale(S_CC, m_one)); 4399 if (n_vertices) { 4400 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4401 PetscCall(MatTransposeSetPrecursor(S_CV, S_VC)); 4402 PetscCall(MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC)); 4403 } else { 4404 Mat S_VCt; 4405 4406 if (lda_rhs != n_R) { 4407 PetscCall(MatDestroy(&B)); 4408 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B)); 4409 PetscCall(MatDenseSetLDA(B, lda_rhs)); 4410 } 4411 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt)); 4412 PetscCall(MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN)); 4413 PetscCall(MatDestroy(&S_VCt)); 4414 } 4415 } 4416 PetscCall(MatDestroy(&B)); 4417 /* coarse basis functions */ 4418 for (i = 0; i < n_constraints; i++) { 4419 Vec v; 4420 4421 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4422 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4423 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4424 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4425 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4426 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4427 PetscInt j; 4428 PetscScalar zero = 0.0; 4429 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4430 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4431 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4432 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4433 PetscCall(VecAssemblyBegin(v)); 4434 PetscCall(VecAssemblyEnd(v)); 4435 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4436 } 4437 PetscCall(VecResetArray(pcbddc->vec1_R)); 4438 } 4439 } 4440 if (n_constraints) PetscCall(MatDestroy(&local_auxmat2_R)); 4441 PetscCall(PetscFree(p0_lidx_I)); 4442 4443 /* coarse matrix entries relative to B_0 */ 4444 if (pcbddc->benign_n) { 4445 Mat B0_B, B0_BPHI; 4446 IS is_dummy; 4447 const PetscScalar *data; 4448 PetscInt j; 4449 4450 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4451 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4452 PetscCall(ISDestroy(&is_dummy)); 4453 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4454 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4455 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 4456 for (j = 0; j < pcbddc->benign_n; j++) { 4457 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4458 for (i = 0; i < pcbddc->local_primal_size; i++) { 4459 coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j]; 4460 coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j]; 4461 } 4462 } 4463 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 4464 PetscCall(MatDestroy(&B0_B)); 4465 PetscCall(MatDestroy(&B0_BPHI)); 4466 } 4467 4468 /* compute other basis functions for non-symmetric problems */ 4469 if (!pcbddc->symmetric_primal) { 4470 Mat B_V = NULL, B_C = NULL; 4471 PetscScalar *marray; 4472 4473 if (n_constraints) { 4474 Mat S_CCT, C_CRT; 4475 4476 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 4477 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 4478 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C)); 4479 PetscCall(MatDestroy(&S_CCT)); 4480 if (n_vertices) { 4481 Mat S_VCT; 4482 4483 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 4484 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V)); 4485 PetscCall(MatDestroy(&S_VCT)); 4486 } 4487 PetscCall(MatDestroy(&C_CRT)); 4488 } else { 4489 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 4490 } 4491 if (n_vertices && n_R) { 4492 PetscScalar *av, *marray; 4493 const PetscInt *xadj, *adjncy; 4494 PetscInt n; 4495 PetscBool flg_row; 4496 4497 /* B_V = B_V - A_VR^T */ 4498 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4499 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4500 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 4501 PetscCall(MatDenseGetArray(B_V, &marray)); 4502 for (i = 0; i < n; i++) { 4503 PetscInt j; 4504 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 4505 } 4506 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4507 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4508 PetscCall(MatDestroy(&A_VR)); 4509 } 4510 4511 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4512 if (n_vertices) { 4513 PetscCall(MatDenseGetArray(B_V, &marray)); 4514 for (i = 0; i < n_vertices; i++) { 4515 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 4516 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4517 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4518 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4519 PetscCall(VecResetArray(pcbddc->vec1_R)); 4520 PetscCall(VecResetArray(pcbddc->vec2_R)); 4521 } 4522 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4523 } 4524 if (B_C) { 4525 PetscCall(MatDenseGetArray(B_C, &marray)); 4526 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 4527 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 4528 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4529 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4530 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4531 PetscCall(VecResetArray(pcbddc->vec1_R)); 4532 PetscCall(VecResetArray(pcbddc->vec2_R)); 4533 } 4534 PetscCall(MatDenseRestoreArray(B_C, &marray)); 4535 } 4536 /* coarse basis functions */ 4537 for (i = 0; i < pcbddc->local_primal_size; i++) { 4538 Vec v; 4539 4540 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 4541 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 4542 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4543 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4544 if (i < n_vertices) { 4545 PetscScalar one = 1.0; 4546 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4547 PetscCall(VecAssemblyBegin(v)); 4548 PetscCall(VecAssemblyEnd(v)); 4549 } 4550 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 4551 4552 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4553 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 4554 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4555 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4556 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 4557 } 4558 PetscCall(VecResetArray(pcbddc->vec1_R)); 4559 } 4560 PetscCall(MatDestroy(&B_V)); 4561 PetscCall(MatDestroy(&B_C)); 4562 } 4563 4564 /* free memory */ 4565 PetscCall(PetscFree(idx_V_B)); 4566 PetscCall(MatDestroy(&S_VV)); 4567 PetscCall(MatDestroy(&S_CV)); 4568 PetscCall(MatDestroy(&S_VC)); 4569 PetscCall(MatDestroy(&S_CC)); 4570 PetscCall(PetscFree(work)); 4571 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 4572 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 4573 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4574 4575 /* Checking coarse_sub_mat and coarse basis functios */ 4576 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4577 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4578 if (pcbddc->dbg_flag) { 4579 Mat coarse_sub_mat; 4580 Mat AUXMAT, TM1, TM2, TM3, TM4; 4581 Mat coarse_phi_D, coarse_phi_B; 4582 Mat coarse_psi_D, coarse_psi_B; 4583 Mat A_II, A_BB, A_IB, A_BI; 4584 Mat C_B, CPHI; 4585 IS is_dummy; 4586 Vec mones; 4587 MatType checkmattype = MATSEQAIJ; 4588 PetscReal real_value; 4589 4590 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4591 Mat A; 4592 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 4593 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 4594 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 4595 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 4596 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 4597 PetscCall(MatDestroy(&A)); 4598 } else { 4599 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 4600 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 4601 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 4602 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 4603 } 4604 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 4605 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 4606 if (!pcbddc->symmetric_primal) { 4607 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 4608 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 4609 } 4610 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat)); 4611 4612 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 4613 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 4614 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4615 if (!pcbddc->symmetric_primal) { 4616 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4617 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4618 PetscCall(MatDestroy(&AUXMAT)); 4619 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4620 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4621 PetscCall(MatDestroy(&AUXMAT)); 4622 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4623 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4624 PetscCall(MatDestroy(&AUXMAT)); 4625 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4626 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4627 PetscCall(MatDestroy(&AUXMAT)); 4628 } else { 4629 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4630 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4631 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4632 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4633 PetscCall(MatDestroy(&AUXMAT)); 4634 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4635 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4636 PetscCall(MatDestroy(&AUXMAT)); 4637 } 4638 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 4639 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 4640 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 4641 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 4642 if (pcbddc->benign_n) { 4643 Mat B0_B, B0_BPHI; 4644 const PetscScalar *data2; 4645 PetscScalar *data; 4646 PetscInt j; 4647 4648 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4649 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4650 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4651 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4652 PetscCall(MatDenseGetArray(TM1, &data)); 4653 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 4654 for (j = 0; j < pcbddc->benign_n; j++) { 4655 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4656 for (i = 0; i < pcbddc->local_primal_size; i++) { 4657 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 4658 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 4659 } 4660 } 4661 PetscCall(MatDenseRestoreArray(TM1, &data)); 4662 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 4663 PetscCall(MatDestroy(&B0_B)); 4664 PetscCall(ISDestroy(&is_dummy)); 4665 PetscCall(MatDestroy(&B0_BPHI)); 4666 } 4667 #if 0 4668 { 4669 PetscViewer viewer; 4670 char filename[256]; 4671 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4672 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4673 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4674 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4675 PetscCall(MatView(coarse_sub_mat,viewer)); 4676 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4677 PetscCall(MatView(TM1,viewer)); 4678 if (pcbddc->coarse_phi_B) { 4679 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4680 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4681 } 4682 if (pcbddc->coarse_phi_D) { 4683 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4684 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4685 } 4686 if (pcbddc->coarse_psi_B) { 4687 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4688 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4689 } 4690 if (pcbddc->coarse_psi_D) { 4691 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4692 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4693 } 4694 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4695 PetscCall(MatView(pcbddc->local_mat,viewer)); 4696 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4697 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4698 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4699 PetscCall(ISView(pcis->is_I_local,viewer)); 4700 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4701 PetscCall(ISView(pcis->is_B_local,viewer)); 4702 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4703 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4704 PetscCall(PetscViewerDestroy(&viewer)); 4705 } 4706 #endif 4707 PetscCall(MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN)); 4708 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 4709 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4710 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4711 4712 /* check constraints */ 4713 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 4714 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4715 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4716 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4717 } else { 4718 PetscScalar *data; 4719 Mat tmat; 4720 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 4721 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 4722 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 4723 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4724 PetscCall(MatDestroy(&tmat)); 4725 } 4726 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 4727 PetscCall(VecSet(mones, -1.0)); 4728 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4729 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4730 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4731 if (!pcbddc->symmetric_primal) { 4732 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 4733 PetscCall(VecSet(mones, -1.0)); 4734 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4735 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4736 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4737 } 4738 PetscCall(MatDestroy(&C_B)); 4739 PetscCall(MatDestroy(&CPHI)); 4740 PetscCall(ISDestroy(&is_dummy)); 4741 PetscCall(VecDestroy(&mones)); 4742 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4743 PetscCall(MatDestroy(&A_II)); 4744 PetscCall(MatDestroy(&A_BB)); 4745 PetscCall(MatDestroy(&A_IB)); 4746 PetscCall(MatDestroy(&A_BI)); 4747 PetscCall(MatDestroy(&TM1)); 4748 PetscCall(MatDestroy(&TM2)); 4749 PetscCall(MatDestroy(&TM3)); 4750 PetscCall(MatDestroy(&TM4)); 4751 PetscCall(MatDestroy(&coarse_phi_D)); 4752 PetscCall(MatDestroy(&coarse_phi_B)); 4753 if (!pcbddc->symmetric_primal) { 4754 PetscCall(MatDestroy(&coarse_psi_D)); 4755 PetscCall(MatDestroy(&coarse_psi_B)); 4756 } 4757 PetscCall(MatDestroy(&coarse_sub_mat)); 4758 } 4759 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4760 { 4761 PetscBool gpu; 4762 4763 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu)); 4764 if (gpu) { 4765 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 4766 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 4767 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 4768 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 4769 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 4770 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 4771 } 4772 } 4773 /* get back data */ 4774 *coarse_submat_vals_n = coarse_submat_vals; 4775 PetscFunctionReturn(0); 4776 } 4777 4778 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 4779 { 4780 Mat *work_mat; 4781 IS isrow_s, iscol_s; 4782 PetscBool rsorted, csorted; 4783 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 4784 4785 PetscFunctionBegin; 4786 PetscCall(ISSorted(isrow, &rsorted)); 4787 PetscCall(ISSorted(iscol, &csorted)); 4788 PetscCall(ISGetLocalSize(isrow, &rsize)); 4789 PetscCall(ISGetLocalSize(iscol, &csize)); 4790 4791 if (!rsorted) { 4792 const PetscInt *idxs; 4793 PetscInt *idxs_sorted, i; 4794 4795 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 4796 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 4797 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 4798 PetscCall(ISGetIndices(isrow, &idxs)); 4799 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 4800 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4801 PetscCall(ISRestoreIndices(isrow, &idxs)); 4802 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 4803 } else { 4804 PetscCall(PetscObjectReference((PetscObject)isrow)); 4805 isrow_s = isrow; 4806 } 4807 4808 if (!csorted) { 4809 if (isrow == iscol) { 4810 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4811 iscol_s = isrow_s; 4812 } else { 4813 const PetscInt *idxs; 4814 PetscInt *idxs_sorted, i; 4815 4816 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 4817 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 4818 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 4819 PetscCall(ISGetIndices(iscol, &idxs)); 4820 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 4821 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4822 PetscCall(ISRestoreIndices(iscol, &idxs)); 4823 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 4824 } 4825 } else { 4826 PetscCall(PetscObjectReference((PetscObject)iscol)); 4827 iscol_s = iscol; 4828 } 4829 4830 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 4831 4832 if (!rsorted || !csorted) { 4833 Mat new_mat; 4834 IS is_perm_r, is_perm_c; 4835 4836 if (!rsorted) { 4837 PetscInt *idxs_r, i; 4838 PetscCall(PetscMalloc1(rsize, &idxs_r)); 4839 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 4840 PetscCall(PetscFree(idxs_perm_r)); 4841 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 4842 } else { 4843 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 4844 } 4845 PetscCall(ISSetPermutation(is_perm_r)); 4846 4847 if (!csorted) { 4848 if (isrow_s == iscol_s) { 4849 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 4850 is_perm_c = is_perm_r; 4851 } else { 4852 PetscInt *idxs_c, i; 4853 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 4854 PetscCall(PetscMalloc1(csize, &idxs_c)); 4855 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 4856 PetscCall(PetscFree(idxs_perm_c)); 4857 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 4858 } 4859 } else { 4860 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 4861 } 4862 PetscCall(ISSetPermutation(is_perm_c)); 4863 4864 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 4865 PetscCall(MatDestroy(&work_mat[0])); 4866 work_mat[0] = new_mat; 4867 PetscCall(ISDestroy(&is_perm_r)); 4868 PetscCall(ISDestroy(&is_perm_c)); 4869 } 4870 4871 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 4872 *B = work_mat[0]; 4873 PetscCall(MatDestroyMatrices(1, &work_mat)); 4874 PetscCall(ISDestroy(&isrow_s)); 4875 PetscCall(ISDestroy(&iscol_s)); 4876 PetscFunctionReturn(0); 4877 } 4878 4879 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4880 { 4881 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 4882 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4883 Mat new_mat, lA; 4884 IS is_local, is_global; 4885 PetscInt local_size; 4886 PetscBool isseqaij, issym, isset; 4887 4888 PetscFunctionBegin; 4889 PetscCall(MatDestroy(&pcbddc->local_mat)); 4890 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 4891 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 4892 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 4893 PetscCall(ISDestroy(&is_local)); 4894 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 4895 PetscCall(ISDestroy(&is_global)); 4896 4897 if (pcbddc->dbg_flag) { 4898 Vec x, x_change; 4899 PetscReal error; 4900 4901 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 4902 PetscCall(VecSetRandom(x, NULL)); 4903 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 4904 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4905 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4906 PetscCall(MatMult(new_mat, matis->x, matis->y)); 4907 if (!pcbddc->change_interior) { 4908 const PetscScalar *x, *y, *v; 4909 PetscReal lerror = 0.; 4910 PetscInt i; 4911 4912 PetscCall(VecGetArrayRead(matis->x, &x)); 4913 PetscCall(VecGetArrayRead(matis->y, &y)); 4914 PetscCall(VecGetArrayRead(matis->counter, &v)); 4915 for (i = 0; i < local_size; i++) 4916 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 4917 PetscCall(VecRestoreArrayRead(matis->x, &x)); 4918 PetscCall(VecRestoreArrayRead(matis->y, &y)); 4919 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 4920 PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 4921 if (error > PETSC_SMALL) { 4922 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4923 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 4924 } else { 4925 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 4926 } 4927 } 4928 } 4929 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4930 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4931 PetscCall(VecAXPY(x, -1.0, x_change)); 4932 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 4933 if (error > PETSC_SMALL) { 4934 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4935 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 4936 } else { 4937 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 4938 } 4939 } 4940 PetscCall(VecDestroy(&x)); 4941 PetscCall(VecDestroy(&x_change)); 4942 } 4943 4944 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4945 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 4946 4947 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4948 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 4949 if (isseqaij) { 4950 PetscCall(MatDestroy(&pcbddc->local_mat)); 4951 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 4952 if (lA) { 4953 Mat work; 4954 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 4955 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 4956 PetscCall(MatDestroy(&work)); 4957 } 4958 } else { 4959 Mat work_mat; 4960 4961 PetscCall(MatDestroy(&pcbddc->local_mat)); 4962 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 4963 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 4964 PetscCall(MatDestroy(&work_mat)); 4965 if (lA) { 4966 Mat work; 4967 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 4968 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 4969 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 4970 PetscCall(MatDestroy(&work)); 4971 } 4972 } 4973 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 4974 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 4975 PetscCall(MatDestroy(&new_mat)); 4976 PetscFunctionReturn(0); 4977 } 4978 4979 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4980 { 4981 PC_IS *pcis = (PC_IS *)(pc->data); 4982 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4983 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4984 PetscInt *idx_R_local = NULL; 4985 PetscInt n_vertices, i, j, n_R, n_D, n_B; 4986 PetscInt vbs, bs; 4987 PetscBT bitmask = NULL; 4988 4989 PetscFunctionBegin; 4990 /* 4991 No need to setup local scatters if 4992 - primal space is unchanged 4993 AND 4994 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4995 AND 4996 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4997 */ 4998 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(0); 4999 /* destroy old objects */ 5000 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5001 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5002 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5003 /* Set Non-overlapping dimensions */ 5004 n_B = pcis->n_B; 5005 n_D = pcis->n - n_B; 5006 n_vertices = pcbddc->n_vertices; 5007 5008 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5009 5010 /* create auxiliary bitmask and allocate workspace */ 5011 if (!sub_schurs || !sub_schurs->reuse_solver) { 5012 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5013 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5014 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5015 5016 for (i = 0, n_R = 0; i < pcis->n; i++) { 5017 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5018 } 5019 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5020 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5021 5022 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5023 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5024 } 5025 5026 /* Block code */ 5027 vbs = 1; 5028 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5029 if (bs > 1 && !(n_vertices % bs)) { 5030 PetscBool is_blocked = PETSC_TRUE; 5031 PetscInt *vary; 5032 if (!sub_schurs || !sub_schurs->reuse_solver) { 5033 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5034 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5035 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5036 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5037 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5038 for (i = 0; i < pcis->n / bs; i++) { 5039 if (vary[i] != 0 && vary[i] != bs) { 5040 is_blocked = PETSC_FALSE; 5041 break; 5042 } 5043 } 5044 PetscCall(PetscFree(vary)); 5045 } else { 5046 /* Verify directly the R set */ 5047 for (i = 0; i < n_R / bs; i++) { 5048 PetscInt j, node = idx_R_local[bs * i]; 5049 for (j = 1; j < bs; j++) { 5050 if (node != idx_R_local[bs * i + j] - j) { 5051 is_blocked = PETSC_FALSE; 5052 break; 5053 } 5054 } 5055 } 5056 } 5057 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5058 vbs = bs; 5059 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5060 } 5061 } 5062 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5063 if (sub_schurs && sub_schurs->reuse_solver) { 5064 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5065 5066 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5067 PetscCall(ISDestroy(&reuse_solver->is_R)); 5068 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5069 reuse_solver->is_R = pcbddc->is_R_local; 5070 } else { 5071 PetscCall(PetscFree(idx_R_local)); 5072 } 5073 5074 /* print some info if requested */ 5075 if (pcbddc->dbg_flag) { 5076 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5077 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5078 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5079 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5080 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5081 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5082 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5083 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5084 } 5085 5086 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5087 if (!sub_schurs || !sub_schurs->reuse_solver) { 5088 IS is_aux1, is_aux2; 5089 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5090 5091 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5092 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5093 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5094 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5095 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5096 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5097 for (i = 0, j = 0; i < n_R; i++) { 5098 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5099 } 5100 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5101 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5102 for (i = 0, j = 0; i < n_B; i++) { 5103 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5104 } 5105 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5106 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5107 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5108 PetscCall(ISDestroy(&is_aux1)); 5109 PetscCall(ISDestroy(&is_aux2)); 5110 5111 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5112 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5113 for (i = 0, j = 0; i < n_R; i++) { 5114 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5115 } 5116 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5117 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5118 PetscCall(ISDestroy(&is_aux1)); 5119 } 5120 PetscCall(PetscBTDestroy(&bitmask)); 5121 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5122 } else { 5123 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5124 IS tis; 5125 PetscInt schur_size; 5126 5127 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5128 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5129 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5130 PetscCall(ISDestroy(&tis)); 5131 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5132 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5133 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5134 PetscCall(ISDestroy(&tis)); 5135 } 5136 } 5137 PetscFunctionReturn(0); 5138 } 5139 5140 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5141 { 5142 MatNullSpace NullSpace; 5143 Mat dmat; 5144 const Vec *nullvecs; 5145 Vec v, v2, *nullvecs2; 5146 VecScatter sct = NULL; 5147 PetscContainer c; 5148 PetscScalar *ddata; 5149 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5150 PetscBool nnsp_has_cnst; 5151 5152 PetscFunctionBegin; 5153 if (!is && !B) { /* MATIS */ 5154 Mat_IS *matis = (Mat_IS *)A->data; 5155 5156 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5157 sct = matis->cctx; 5158 PetscCall(PetscObjectReference((PetscObject)sct)); 5159 } else { 5160 PetscCall(MatGetNullSpace(B, &NullSpace)); 5161 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5162 if (NullSpace) PetscFunctionReturn(0); 5163 } 5164 PetscCall(MatGetNullSpace(A, &NullSpace)); 5165 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5166 if (!NullSpace) PetscFunctionReturn(0); 5167 5168 PetscCall(MatCreateVecs(A, &v, NULL)); 5169 PetscCall(MatCreateVecs(B, &v2, NULL)); 5170 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5171 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs)); 5172 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5173 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5174 PetscCall(VecGetBlockSize(v2, &bs)); 5175 PetscCall(VecGetSize(v2, &N)); 5176 PetscCall(VecGetLocalSize(v2, &n)); 5177 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5178 for (k = 0; k < nnsp_size; k++) { 5179 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5180 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5181 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5182 } 5183 if (nnsp_has_cnst) { 5184 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5185 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5186 } 5187 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5188 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5189 5190 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5191 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c)); 5192 PetscCall(PetscContainerSetPointer(c, ddata)); 5193 PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault)); 5194 PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c)); 5195 PetscCall(PetscContainerDestroy(&c)); 5196 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5197 PetscCall(MatDestroy(&dmat)); 5198 5199 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5200 PetscCall(PetscFree(nullvecs2)); 5201 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5202 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5203 PetscCall(VecDestroy(&v)); 5204 PetscCall(VecDestroy(&v2)); 5205 PetscCall(VecScatterDestroy(&sct)); 5206 PetscFunctionReturn(0); 5207 } 5208 5209 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5210 { 5211 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5212 PC_IS *pcis = (PC_IS *)pc->data; 5213 PC pc_temp; 5214 Mat A_RR; 5215 MatNullSpace nnsp; 5216 MatReuse reuse; 5217 PetscScalar m_one = -1.0; 5218 PetscReal value; 5219 PetscInt n_D, n_R; 5220 PetscBool issbaij, opts, isset, issym; 5221 void (*f)(void) = NULL; 5222 char dir_prefix[256], neu_prefix[256], str_level[16]; 5223 size_t len; 5224 5225 PetscFunctionBegin; 5226 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5227 /* approximate solver, propagate NearNullSpace if needed */ 5228 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5229 MatNullSpace gnnsp1, gnnsp2; 5230 PetscBool lhas, ghas; 5231 5232 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5233 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5234 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5235 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5236 PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5237 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5238 } 5239 5240 /* compute prefixes */ 5241 PetscCall(PetscStrcpy(dir_prefix, "")); 5242 PetscCall(PetscStrcpy(neu_prefix, "")); 5243 if (!pcbddc->current_level) { 5244 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5245 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5246 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5247 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5248 } else { 5249 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level))); 5250 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5251 len -= 15; /* remove "pc_bddc_coarse_" */ 5252 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5253 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5254 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5255 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5256 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5257 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5258 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5259 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5260 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5261 } 5262 5263 /* DIRICHLET PROBLEM */ 5264 if (dirichlet) { 5265 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5266 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5267 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5268 if (pcbddc->dbg_flag) { 5269 Mat A_IIn; 5270 5271 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5272 PetscCall(MatDestroy(&pcis->A_II)); 5273 pcis->A_II = A_IIn; 5274 } 5275 } 5276 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5277 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5278 5279 /* Matrix for Dirichlet problem is pcis->A_II */ 5280 n_D = pcis->n - pcis->n_B; 5281 opts = PETSC_FALSE; 5282 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5283 opts = PETSC_TRUE; 5284 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5285 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5286 /* default */ 5287 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5288 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5289 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5290 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5291 if (issbaij) { 5292 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5293 } else { 5294 PetscCall(PCSetType(pc_temp, PCLU)); 5295 } 5296 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5297 } 5298 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5299 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5300 /* Allow user's customization */ 5301 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5302 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5303 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5304 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5305 } 5306 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5307 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5308 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5309 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5310 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5311 const PetscInt *idxs; 5312 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5313 5314 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5315 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5316 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5317 for (i = 0; i < nl; i++) { 5318 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5319 } 5320 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5321 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5322 PetscCall(PetscFree(scoords)); 5323 } 5324 if (sub_schurs && sub_schurs->reuse_solver) { 5325 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5326 5327 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5328 } 5329 5330 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5331 if (!n_D) { 5332 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5333 PetscCall(PCSetType(pc_temp, PCNONE)); 5334 } 5335 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5336 /* set ksp_D into pcis data */ 5337 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5338 PetscCall(KSPDestroy(&pcis->ksp_D)); 5339 pcis->ksp_D = pcbddc->ksp_D; 5340 } 5341 5342 /* NEUMANN PROBLEM */ 5343 A_RR = NULL; 5344 if (neumann) { 5345 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5346 PetscInt ibs, mbs; 5347 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5348 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5349 5350 reuse_neumann_solver = PETSC_FALSE; 5351 if (sub_schurs && sub_schurs->reuse_solver) { 5352 IS iP; 5353 5354 reuse_neumann_solver = PETSC_TRUE; 5355 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5356 if (iP) reuse_neumann_solver = PETSC_FALSE; 5357 } 5358 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5359 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5360 if (pcbddc->ksp_R) { /* already created ksp */ 5361 PetscInt nn_R; 5362 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5363 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5364 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5365 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5366 PetscCall(KSPReset(pcbddc->ksp_R)); 5367 PetscCall(MatDestroy(&A_RR)); 5368 reuse = MAT_INITIAL_MATRIX; 5369 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5370 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5371 PetscCall(MatDestroy(&A_RR)); 5372 reuse = MAT_INITIAL_MATRIX; 5373 } else { /* safe to reuse the matrix */ 5374 reuse = MAT_REUSE_MATRIX; 5375 } 5376 } 5377 /* last check */ 5378 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5379 PetscCall(MatDestroy(&A_RR)); 5380 reuse = MAT_INITIAL_MATRIX; 5381 } 5382 } else { /* first time, so we need to create the matrix */ 5383 reuse = MAT_INITIAL_MATRIX; 5384 } 5385 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5386 TODO: Get Rid of these conversions */ 5387 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5388 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5389 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5390 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5391 if (matis->A == pcbddc->local_mat) { 5392 PetscCall(MatDestroy(&pcbddc->local_mat)); 5393 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5394 } else { 5395 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5396 } 5397 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5398 if (matis->A == pcbddc->local_mat) { 5399 PetscCall(MatDestroy(&pcbddc->local_mat)); 5400 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5401 } else { 5402 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5403 } 5404 } 5405 /* extract A_RR */ 5406 if (reuse_neumann_solver) { 5407 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5408 5409 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5410 PetscCall(MatDestroy(&A_RR)); 5411 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5412 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 5413 } else { 5414 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 5415 } 5416 } else { 5417 PetscCall(MatDestroy(&A_RR)); 5418 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 5419 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5420 } 5421 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5422 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 5423 } 5424 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5425 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 5426 opts = PETSC_FALSE; 5427 if (!pcbddc->ksp_R) { /* create object if not present */ 5428 opts = PETSC_TRUE; 5429 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 5430 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 5431 /* default */ 5432 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 5433 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 5434 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5435 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 5436 if (issbaij) { 5437 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5438 } else { 5439 PetscCall(PCSetType(pc_temp, PCLU)); 5440 } 5441 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 5442 } 5443 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 5444 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 5445 if (opts) { /* Allow user's customization once */ 5446 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5447 } 5448 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5449 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5450 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 5451 } 5452 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5453 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5454 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5455 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5456 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5457 const PetscInt *idxs; 5458 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5459 5460 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 5461 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 5462 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5463 for (i = 0; i < nl; i++) { 5464 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5465 } 5466 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 5467 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5468 PetscCall(PetscFree(scoords)); 5469 } 5470 5471 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5472 if (!n_R) { 5473 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5474 PetscCall(PCSetType(pc_temp, PCNONE)); 5475 } 5476 /* Reuse solver if it is present */ 5477 if (reuse_neumann_solver) { 5478 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5479 5480 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 5481 } 5482 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5483 } 5484 5485 if (pcbddc->dbg_flag) { 5486 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5487 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5488 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5489 } 5490 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5491 5492 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5493 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 5494 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 5495 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 5496 /* check Dirichlet and Neumann solvers */ 5497 if (pcbddc->dbg_flag) { 5498 if (dirichlet) { /* Dirichlet */ 5499 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 5500 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 5501 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 5502 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 5503 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 5504 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 5505 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_D))->prefix, (double)value)); 5506 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5507 } 5508 if (neumann) { /* Neumann */ 5509 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 5510 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 5511 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 5512 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5513 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 5514 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 5515 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_R))->prefix, (double)value)); 5516 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5517 } 5518 } 5519 /* free Neumann problem's matrix */ 5520 PetscCall(MatDestroy(&A_RR)); 5521 PetscFunctionReturn(0); 5522 } 5523 5524 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5525 { 5526 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5527 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5528 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5529 5530 PetscFunctionBegin; 5531 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 5532 if (!pcbddc->switch_static) { 5533 if (applytranspose && pcbddc->local_auxmat1) { 5534 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 5535 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5536 } 5537 if (!reuse_solver) { 5538 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5539 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5540 } else { 5541 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5542 5543 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5544 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5545 } 5546 } else { 5547 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5548 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5549 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5550 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5551 if (applytranspose && pcbddc->local_auxmat1) { 5552 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 5553 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5554 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5555 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5556 } 5557 } 5558 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5559 if (!reuse_solver || pcbddc->switch_static) { 5560 if (applytranspose) { 5561 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5562 } else { 5563 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5564 } 5565 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 5566 } else { 5567 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5568 5569 if (applytranspose) { 5570 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5571 } else { 5572 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5573 } 5574 } 5575 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5576 PetscCall(VecSet(inout_B, 0.)); 5577 if (!pcbddc->switch_static) { 5578 if (!reuse_solver) { 5579 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5580 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5581 } else { 5582 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5583 5584 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5585 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5586 } 5587 if (!applytranspose && pcbddc->local_auxmat1) { 5588 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5589 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 5590 } 5591 } else { 5592 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5593 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5594 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5595 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5596 if (!applytranspose && pcbddc->local_auxmat1) { 5597 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5598 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 5599 } 5600 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5601 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5602 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5603 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5604 } 5605 PetscFunctionReturn(0); 5606 } 5607 5608 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5609 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5610 { 5611 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5612 PC_IS *pcis = (PC_IS *)(pc->data); 5613 const PetscScalar zero = 0.0; 5614 5615 PetscFunctionBegin; 5616 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5617 if (!pcbddc->benign_apply_coarse_only) { 5618 if (applytranspose) { 5619 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 5620 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5621 } else { 5622 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 5623 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5624 } 5625 } else { 5626 PetscCall(VecSet(pcbddc->vec1_P, zero)); 5627 } 5628 5629 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5630 if (pcbddc->benign_n) { 5631 PetscScalar *array; 5632 PetscInt j; 5633 5634 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5635 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 5636 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5637 } 5638 5639 /* start communications from local primal nodes to rhs of coarse solver */ 5640 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 5641 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 5642 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 5643 5644 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5645 if (pcbddc->coarse_ksp) { 5646 Mat coarse_mat; 5647 Vec rhs, sol; 5648 MatNullSpace nullsp; 5649 PetscBool isbddc = PETSC_FALSE; 5650 5651 if (pcbddc->benign_have_null) { 5652 PC coarse_pc; 5653 5654 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5655 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 5656 /* we need to propagate to coarser levels the need for a possible benign correction */ 5657 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5658 PC_BDDC *coarsepcbddc = (PC_BDDC *)(coarse_pc->data); 5659 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5660 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5661 } 5662 } 5663 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 5664 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 5665 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 5666 if (applytranspose) { 5667 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 5668 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5669 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 5670 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5671 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5672 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 5673 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5674 } else { 5675 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 5676 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5677 PC coarse_pc; 5678 5679 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 5680 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5681 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 5682 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 5683 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 5684 } else { 5685 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5686 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 5687 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5688 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5689 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5690 } 5691 } 5692 /* we don't need the benign correction at coarser levels anymore */ 5693 if (pcbddc->benign_have_null && isbddc) { 5694 PC coarse_pc; 5695 PC_BDDC *coarsepcbddc; 5696 5697 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5698 coarsepcbddc = (PC_BDDC *)(coarse_pc->data); 5699 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5700 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5701 } 5702 } 5703 5704 /* Local solution on R nodes */ 5705 if (pcis->n && !pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 5706 /* communications from coarse sol to local primal nodes */ 5707 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 5708 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 5709 5710 /* Sum contributions from the two levels */ 5711 if (!pcbddc->benign_apply_coarse_only) { 5712 if (applytranspose) { 5713 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5714 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5715 } else { 5716 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5717 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5718 } 5719 /* store p0 */ 5720 if (pcbddc->benign_n) { 5721 PetscScalar *array; 5722 PetscInt j; 5723 5724 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5725 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 5726 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5727 } 5728 } else { /* expand the coarse solution */ 5729 if (applytranspose) { 5730 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 5731 } else { 5732 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 5733 } 5734 } 5735 PetscFunctionReturn(0); 5736 } 5737 5738 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 5739 { 5740 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5741 Vec from, to; 5742 const PetscScalar *array; 5743 5744 PetscFunctionBegin; 5745 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5746 from = pcbddc->coarse_vec; 5747 to = pcbddc->vec1_P; 5748 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5749 Vec tvec; 5750 5751 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5752 PetscCall(VecResetArray(tvec)); 5753 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 5754 PetscCall(VecGetArrayRead(tvec, &array)); 5755 PetscCall(VecPlaceArray(from, array)); 5756 PetscCall(VecRestoreArrayRead(tvec, &array)); 5757 } 5758 } else { /* from local to global -> put data in coarse right hand side */ 5759 from = pcbddc->vec1_P; 5760 to = pcbddc->coarse_vec; 5761 } 5762 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5763 PetscFunctionReturn(0); 5764 } 5765 5766 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5767 { 5768 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5769 Vec from, to; 5770 const PetscScalar *array; 5771 5772 PetscFunctionBegin; 5773 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5774 from = pcbddc->coarse_vec; 5775 to = pcbddc->vec1_P; 5776 } else { /* from local to global -> put data in coarse right hand side */ 5777 from = pcbddc->vec1_P; 5778 to = pcbddc->coarse_vec; 5779 } 5780 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5781 if (smode == SCATTER_FORWARD) { 5782 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5783 Vec tvec; 5784 5785 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5786 PetscCall(VecGetArrayRead(to, &array)); 5787 PetscCall(VecPlaceArray(tvec, array)); 5788 PetscCall(VecRestoreArrayRead(to, &array)); 5789 } 5790 } else { 5791 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5792 PetscCall(VecResetArray(from)); 5793 } 5794 } 5795 PetscFunctionReturn(0); 5796 } 5797 5798 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5799 { 5800 PC_IS *pcis = (PC_IS *)(pc->data); 5801 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5802 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5803 /* one and zero */ 5804 PetscScalar one = 1.0, zero = 0.0; 5805 /* space to store constraints and their local indices */ 5806 PetscScalar *constraints_data; 5807 PetscInt *constraints_idxs, *constraints_idxs_B; 5808 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 5809 PetscInt *constraints_n; 5810 /* iterators */ 5811 PetscInt i, j, k, total_counts, total_counts_cc, cum; 5812 /* BLAS integers */ 5813 PetscBLASInt lwork, lierr; 5814 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 5815 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 5816 /* reuse */ 5817 PetscInt olocal_primal_size, olocal_primal_size_cc; 5818 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 5819 /* change of basis */ 5820 PetscBool qr_needed; 5821 PetscBT change_basis, qr_needed_idx; 5822 /* auxiliary stuff */ 5823 PetscInt *nnz, *is_indices; 5824 PetscInt ncc; 5825 /* some quantities */ 5826 PetscInt n_vertices, total_primal_vertices, valid_constraints; 5827 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 5828 PetscReal tol; /* tolerance for retaining eigenmodes */ 5829 5830 PetscFunctionBegin; 5831 tol = PetscSqrtReal(PETSC_SMALL); 5832 /* Destroy Mat objects computed previously */ 5833 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 5834 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 5835 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 5836 /* save info on constraints from previous setup (if any) */ 5837 olocal_primal_size = pcbddc->local_primal_size; 5838 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5839 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 5840 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 5841 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 5842 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 5843 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 5844 5845 if (!pcbddc->adaptive_selection) { 5846 IS ISForVertices, *ISForFaces, *ISForEdges; 5847 MatNullSpace nearnullsp; 5848 const Vec *nearnullvecs; 5849 Vec *localnearnullsp; 5850 PetscScalar *array; 5851 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 5852 PetscBool nnsp_has_cnst; 5853 /* LAPACK working arrays for SVD or POD */ 5854 PetscBool skip_lapack, boolforchange; 5855 PetscScalar *work; 5856 PetscReal *singular_vals; 5857 #if defined(PETSC_USE_COMPLEX) 5858 PetscReal *rwork; 5859 #endif 5860 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 5861 PetscBLASInt dummy_int = 1; 5862 PetscScalar dummy_scalar = 1.; 5863 PetscBool use_pod = PETSC_FALSE; 5864 5865 /* MKL SVD with same input gives different results on different processes! */ 5866 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 5867 use_pod = PETSC_TRUE; 5868 #endif 5869 /* Get index sets for faces, edges and vertices from graph */ 5870 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 5871 o_nf = n_ISForFaces; 5872 o_ne = n_ISForEdges; 5873 n_vertices = 0; 5874 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 5875 /* print some info */ 5876 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5877 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 5878 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 5879 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5880 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 5881 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 5882 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 5883 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 5884 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5885 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 5886 } 5887 5888 if (!pcbddc->use_vertices) n_vertices = 0; 5889 if (!pcbddc->use_edges) n_ISForEdges = 0; 5890 if (!pcbddc->use_faces) n_ISForFaces = 0; 5891 5892 /* check if near null space is attached to global mat */ 5893 if (pcbddc->use_nnsp) { 5894 PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 5895 } else nearnullsp = NULL; 5896 5897 if (nearnullsp) { 5898 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 5899 /* remove any stored info */ 5900 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 5901 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 5902 /* store information for BDDC solver reuse */ 5903 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 5904 pcbddc->onearnullspace = nearnullsp; 5905 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 5906 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 5907 } else { /* if near null space is not provided BDDC uses constants by default */ 5908 nnsp_size = 0; 5909 nnsp_has_cnst = PETSC_TRUE; 5910 } 5911 /* get max number of constraints on a single cc */ 5912 max_constraints = nnsp_size; 5913 if (nnsp_has_cnst) max_constraints++; 5914 5915 /* 5916 Evaluate maximum storage size needed by the procedure 5917 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5918 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5919 There can be multiple constraints per connected component 5920 */ 5921 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 5922 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 5923 5924 total_counts = n_ISForFaces + n_ISForEdges; 5925 total_counts *= max_constraints; 5926 total_counts += n_vertices; 5927 PetscCall(PetscBTCreate(total_counts, &change_basis)); 5928 5929 total_counts = 0; 5930 max_size_of_constraint = 0; 5931 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 5932 IS used_is; 5933 if (i < n_ISForEdges) { 5934 used_is = ISForEdges[i]; 5935 } else { 5936 used_is = ISForFaces[i - n_ISForEdges]; 5937 } 5938 PetscCall(ISGetSize(used_is, &j)); 5939 total_counts += j; 5940 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 5941 } 5942 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 5943 5944 /* get local part of global near null space vectors */ 5945 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 5946 for (k = 0; k < nnsp_size; k++) { 5947 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 5948 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 5949 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 5950 } 5951 5952 /* whether or not to skip lapack calls */ 5953 skip_lapack = PETSC_TRUE; 5954 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5955 5956 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5957 if (!skip_lapack) { 5958 PetscScalar temp_work; 5959 5960 if (use_pod) { 5961 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5962 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 5963 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 5964 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 5965 #if defined(PETSC_USE_COMPLEX) 5966 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 5967 #endif 5968 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5969 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 5970 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 5971 lwork = -1; 5972 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 5973 #if !defined(PETSC_USE_COMPLEX) 5974 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 5975 #else 5976 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 5977 #endif 5978 PetscCall(PetscFPTrapPop()); 5979 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr); 5980 } else { 5981 #if !defined(PETSC_MISSING_LAPACK_GESVD) 5982 /* SVD */ 5983 PetscInt max_n, min_n; 5984 max_n = max_size_of_constraint; 5985 min_n = max_constraints; 5986 if (max_size_of_constraint < max_constraints) { 5987 min_n = max_size_of_constraint; 5988 max_n = max_constraints; 5989 } 5990 PetscCall(PetscMalloc1(min_n, &singular_vals)); 5991 #if defined(PETSC_USE_COMPLEX) 5992 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 5993 #endif 5994 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5995 lwork = -1; 5996 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 5997 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 5998 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 5999 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6000 #if !defined(PETSC_USE_COMPLEX) 6001 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6002 #else 6003 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6004 #endif 6005 PetscCall(PetscFPTrapPop()); 6006 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr); 6007 #else 6008 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6009 #endif /* on missing GESVD */ 6010 } 6011 /* Allocate optimal workspace */ 6012 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6013 PetscCall(PetscMalloc1(lwork, &work)); 6014 } 6015 /* Now we can loop on constraining sets */ 6016 total_counts = 0; 6017 constraints_idxs_ptr[0] = 0; 6018 constraints_data_ptr[0] = 0; 6019 /* vertices */ 6020 if (n_vertices) { 6021 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6022 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6023 for (i = 0; i < n_vertices; i++) { 6024 constraints_n[total_counts] = 1; 6025 constraints_data[total_counts] = 1.0; 6026 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6027 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6028 total_counts++; 6029 } 6030 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6031 } 6032 6033 /* edges and faces */ 6034 total_counts_cc = total_counts; 6035 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6036 IS used_is; 6037 PetscBool idxs_copied = PETSC_FALSE; 6038 6039 if (ncc < n_ISForEdges) { 6040 used_is = ISForEdges[ncc]; 6041 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6042 } else { 6043 used_is = ISForFaces[ncc - n_ISForEdges]; 6044 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6045 } 6046 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6047 6048 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6049 if (!size_of_constraint) continue; 6050 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6051 /* change of basis should not be performed on local periodic nodes */ 6052 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6053 if (nnsp_has_cnst) { 6054 PetscScalar quad_value; 6055 6056 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6057 idxs_copied = PETSC_TRUE; 6058 6059 if (!pcbddc->use_nnsp_true) { 6060 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6061 } else { 6062 quad_value = 1.0; 6063 } 6064 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6065 temp_constraints++; 6066 total_counts++; 6067 } 6068 for (k = 0; k < nnsp_size; k++) { 6069 PetscReal real_value; 6070 PetscScalar *ptr_to_data; 6071 6072 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6073 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6074 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6075 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6076 /* check if array is null on the connected component */ 6077 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6078 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6079 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6080 temp_constraints++; 6081 total_counts++; 6082 if (!idxs_copied) { 6083 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6084 idxs_copied = PETSC_TRUE; 6085 } 6086 } 6087 } 6088 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6089 valid_constraints = temp_constraints; 6090 if (!pcbddc->use_nnsp_true && temp_constraints) { 6091 if (temp_constraints == 1) { /* just normalize the constraint */ 6092 PetscScalar norm, *ptr_to_data; 6093 6094 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6095 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6096 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6097 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6098 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6099 } else { /* perform SVD */ 6100 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6101 6102 if (use_pod) { 6103 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6104 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6105 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6106 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6107 from that computed using LAPACKgesvd 6108 -> This is due to a different computation of eigenvectors in LAPACKheev 6109 -> The quality of the POD-computed basis will be the same */ 6110 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6111 /* Store upper triangular part of correlation matrix */ 6112 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6113 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6114 for (j = 0; j < temp_constraints; j++) { 6115 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6116 } 6117 /* compute eigenvalues and eigenvectors of correlation matrix */ 6118 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6119 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6120 #if !defined(PETSC_USE_COMPLEX) 6121 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6122 #else 6123 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6124 #endif 6125 PetscCall(PetscFPTrapPop()); 6126 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr); 6127 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6128 j = 0; 6129 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6130 total_counts = total_counts - j; 6131 valid_constraints = temp_constraints - j; 6132 /* scale and copy POD basis into used quadrature memory */ 6133 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6134 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6135 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6136 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6137 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6138 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6139 if (j < temp_constraints) { 6140 PetscInt ii; 6141 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6142 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6143 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6144 PetscCall(PetscFPTrapPop()); 6145 for (k = 0; k < temp_constraints - j; k++) { 6146 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6147 } 6148 } 6149 } else { 6150 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6151 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6152 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6153 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6154 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6155 #if !defined(PETSC_USE_COMPLEX) 6156 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6157 #else 6158 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6159 #endif 6160 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 6161 PetscCall(PetscFPTrapPop()); 6162 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6163 k = temp_constraints; 6164 if (k > size_of_constraint) k = size_of_constraint; 6165 j = 0; 6166 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6167 valid_constraints = k - j; 6168 total_counts = total_counts - temp_constraints + valid_constraints; 6169 #else 6170 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6171 #endif /* on missing GESVD */ 6172 } 6173 } 6174 } 6175 /* update pointers information */ 6176 if (valid_constraints) { 6177 constraints_n[total_counts_cc] = valid_constraints; 6178 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6179 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6180 /* set change_of_basis flag */ 6181 if (boolforchange) PetscBTSet(change_basis, total_counts_cc); 6182 total_counts_cc++; 6183 } 6184 } 6185 /* free workspace */ 6186 if (!skip_lapack) { 6187 PetscCall(PetscFree(work)); 6188 #if defined(PETSC_USE_COMPLEX) 6189 PetscCall(PetscFree(rwork)); 6190 #endif 6191 PetscCall(PetscFree(singular_vals)); 6192 PetscCall(PetscFree(correlation_mat)); 6193 PetscCall(PetscFree(temp_basis)); 6194 } 6195 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6196 PetscCall(PetscFree(localnearnullsp)); 6197 /* free index sets of faces, edges and vertices */ 6198 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6199 } else { 6200 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6201 6202 total_counts = 0; 6203 n_vertices = 0; 6204 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6205 max_constraints = 0; 6206 total_counts_cc = 0; 6207 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6208 total_counts += pcbddc->adaptive_constraints_n[i]; 6209 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6210 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6211 } 6212 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6213 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6214 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6215 constraints_data = pcbddc->adaptive_constraints_data; 6216 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6217 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6218 total_counts_cc = 0; 6219 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6220 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6221 } 6222 6223 max_size_of_constraint = 0; 6224 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6225 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6226 /* Change of basis */ 6227 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6228 if (pcbddc->use_change_of_basis) { 6229 for (i = 0; i < sub_schurs->n_subs; i++) { 6230 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6231 } 6232 } 6233 } 6234 pcbddc->local_primal_size = total_counts; 6235 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6236 6237 /* map constraints_idxs in boundary numbering */ 6238 if (pcbddc->use_change_of_basis) { 6239 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6240 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6241 } 6242 6243 /* Create constraint matrix */ 6244 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6245 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6246 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6247 6248 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6249 /* determine if a QR strategy is needed for change of basis */ 6250 qr_needed = pcbddc->use_qr_single; 6251 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6252 total_primal_vertices = 0; 6253 pcbddc->local_primal_size_cc = 0; 6254 for (i = 0; i < total_counts_cc; i++) { 6255 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6256 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6257 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6258 pcbddc->local_primal_size_cc += 1; 6259 } else if (PetscBTLookup(change_basis, i)) { 6260 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6261 pcbddc->local_primal_size_cc += constraints_n[i]; 6262 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6263 PetscBTSet(qr_needed_idx, i); 6264 qr_needed = PETSC_TRUE; 6265 } 6266 } else { 6267 pcbddc->local_primal_size_cc += 1; 6268 } 6269 } 6270 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6271 pcbddc->n_vertices = total_primal_vertices; 6272 /* permute indices in order to have a sorted set of vertices */ 6273 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6274 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6275 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6276 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6277 6278 /* nonzero structure of constraint matrix */ 6279 /* and get reference dof for local constraints */ 6280 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6281 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6282 6283 j = total_primal_vertices; 6284 total_counts = total_primal_vertices; 6285 cum = total_primal_vertices; 6286 for (i = n_vertices; i < total_counts_cc; i++) { 6287 if (!PetscBTLookup(change_basis, i)) { 6288 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6289 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6290 cum++; 6291 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6292 for (k = 0; k < constraints_n[i]; k++) { 6293 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6294 nnz[j + k] = size_of_constraint; 6295 } 6296 j += constraints_n[i]; 6297 } 6298 } 6299 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6300 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6301 PetscCall(PetscFree(nnz)); 6302 6303 /* set values in constraint matrix */ 6304 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6305 total_counts = total_primal_vertices; 6306 for (i = n_vertices; i < total_counts_cc; i++) { 6307 if (!PetscBTLookup(change_basis, i)) { 6308 PetscInt *cols; 6309 6310 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6311 cols = constraints_idxs + constraints_idxs_ptr[i]; 6312 for (k = 0; k < constraints_n[i]; k++) { 6313 PetscInt row = total_counts + k; 6314 PetscScalar *vals; 6315 6316 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6317 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6318 } 6319 total_counts += constraints_n[i]; 6320 } 6321 } 6322 /* assembling */ 6323 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6324 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6325 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6326 6327 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6328 if (pcbddc->use_change_of_basis) { 6329 /* dual and primal dofs on a single cc */ 6330 PetscInt dual_dofs, primal_dofs; 6331 /* working stuff for GEQRF */ 6332 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6333 PetscBLASInt lqr_work; 6334 /* working stuff for UNGQR */ 6335 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6336 PetscBLASInt lgqr_work; 6337 /* working stuff for TRTRS */ 6338 PetscScalar *trs_rhs = NULL; 6339 PetscBLASInt Blas_NRHS; 6340 /* pointers for values insertion into change of basis matrix */ 6341 PetscInt *start_rows, *start_cols; 6342 PetscScalar *start_vals; 6343 /* working stuff for values insertion */ 6344 PetscBT is_primal; 6345 PetscInt *aux_primal_numbering_B; 6346 /* matrix sizes */ 6347 PetscInt global_size, local_size; 6348 /* temporary change of basis */ 6349 Mat localChangeOfBasisMatrix; 6350 /* extra space for debugging */ 6351 PetscScalar *dbg_work = NULL; 6352 6353 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6354 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6355 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6356 /* nonzeros for local mat */ 6357 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6358 if (!pcbddc->benign_change || pcbddc->fake_change) { 6359 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6360 } else { 6361 const PetscInt *ii; 6362 PetscInt n; 6363 PetscBool flg_row; 6364 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6365 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6366 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6367 } 6368 for (i = n_vertices; i < total_counts_cc; i++) { 6369 if (PetscBTLookup(change_basis, i)) { 6370 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6371 if (PetscBTLookup(qr_needed_idx, i)) { 6372 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6373 } else { 6374 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6375 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6376 } 6377 } 6378 } 6379 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6380 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6381 PetscCall(PetscFree(nnz)); 6382 /* Set interior change in the matrix */ 6383 if (!pcbddc->benign_change || pcbddc->fake_change) { 6384 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 6385 } else { 6386 const PetscInt *ii, *jj; 6387 PetscScalar *aa; 6388 PetscInt n; 6389 PetscBool flg_row; 6390 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6391 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6392 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 6393 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6394 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6395 } 6396 6397 if (pcbddc->dbg_flag) { 6398 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6399 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 6400 } 6401 6402 /* Now we loop on the constraints which need a change of basis */ 6403 /* 6404 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6405 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6406 6407 Basic blocks of change of basis matrix T computed: 6408 6409 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6410 6411 | 1 0 ... 0 s_1/S | 6412 | 0 1 ... 0 s_2/S | 6413 | ... | 6414 | 0 ... 1 s_{n-1}/S | 6415 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6416 6417 with S = \sum_{i=1}^n s_i^2 6418 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6419 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6420 6421 - QR decomposition of constraints otherwise 6422 */ 6423 if (qr_needed && max_size_of_constraint) { 6424 /* space to store Q */ 6425 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 6426 /* array to store scaling factors for reflectors */ 6427 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 6428 /* first we issue queries for optimal work */ 6429 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6430 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6431 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6432 lqr_work = -1; 6433 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 6434 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr); 6435 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 6436 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t), &qr_work)); 6437 lgqr_work = -1; 6438 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6439 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 6440 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 6441 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6442 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 6443 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 6444 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr); 6445 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 6446 PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t), &gqr_work)); 6447 /* array to store rhs and solution of triangular solver */ 6448 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 6449 /* allocating workspace for check */ 6450 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 6451 } 6452 /* array to store whether a node is primal or not */ 6453 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 6454 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 6455 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 6456 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 6457 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 6458 PetscCall(PetscFree(aux_primal_numbering_B)); 6459 6460 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6461 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 6462 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 6463 if (PetscBTLookup(change_basis, total_counts)) { 6464 /* get constraint info */ 6465 primal_dofs = constraints_n[total_counts]; 6466 dual_dofs = size_of_constraint - primal_dofs; 6467 6468 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 6469 6470 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 6471 6472 /* copy quadrature constraints for change of basis check */ 6473 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6474 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6475 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6476 6477 /* compute QR decomposition of constraints */ 6478 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6479 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6480 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6481 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6482 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 6483 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr); 6484 PetscCall(PetscFPTrapPop()); 6485 6486 /* explicitly compute R^-T */ 6487 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 6488 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 6489 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6490 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 6491 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6492 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6493 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6494 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 6495 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr); 6496 PetscCall(PetscFPTrapPop()); 6497 6498 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6499 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6500 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6501 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6502 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6503 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6504 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 6505 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr); 6506 PetscCall(PetscFPTrapPop()); 6507 6508 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6509 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6510 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6511 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6512 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6513 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6514 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6515 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6516 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6517 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6518 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 6519 PetscCall(PetscFPTrapPop()); 6520 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6521 6522 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6523 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6524 /* insert cols for primal dofs */ 6525 for (j = 0; j < primal_dofs; j++) { 6526 start_vals = &qr_basis[j * size_of_constraint]; 6527 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6528 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6529 } 6530 /* insert cols for dual dofs */ 6531 for (j = 0, k = 0; j < dual_dofs; k++) { 6532 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 6533 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 6534 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6535 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6536 j++; 6537 } 6538 } 6539 6540 /* check change of basis */ 6541 if (pcbddc->dbg_flag) { 6542 PetscInt ii, jj; 6543 PetscBool valid_qr = PETSC_TRUE; 6544 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 6545 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6546 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 6547 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6548 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 6549 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 6550 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6551 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 6552 PetscCall(PetscFPTrapPop()); 6553 for (jj = 0; jj < size_of_constraint; jj++) { 6554 for (ii = 0; ii < primal_dofs; ii++) { 6555 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6556 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6557 } 6558 } 6559 if (!valid_qr) { 6560 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 6561 for (jj = 0; jj < size_of_constraint; jj++) { 6562 for (ii = 0; ii < primal_dofs; ii++) { 6563 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 6564 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6565 } 6566 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 6567 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6568 } 6569 } 6570 } 6571 } else { 6572 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 6573 } 6574 } 6575 } else { /* simple transformation block */ 6576 PetscInt row, col; 6577 PetscScalar val, norm; 6578 6579 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6580 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 6581 for (j = 0; j < size_of_constraint; j++) { 6582 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 6583 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6584 if (!PetscBTLookup(is_primal, row_B)) { 6585 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6586 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 6587 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 6588 } else { 6589 for (k = 0; k < size_of_constraint; k++) { 6590 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6591 if (row != col) { 6592 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 6593 } else { 6594 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 6595 } 6596 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 6597 } 6598 } 6599 } 6600 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 6601 } 6602 } else { 6603 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 6604 } 6605 } 6606 6607 /* free workspace */ 6608 if (qr_needed) { 6609 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 6610 PetscCall(PetscFree(trs_rhs)); 6611 PetscCall(PetscFree(qr_tau)); 6612 PetscCall(PetscFree(qr_work)); 6613 PetscCall(PetscFree(gqr_work)); 6614 PetscCall(PetscFree(qr_basis)); 6615 } 6616 PetscCall(PetscBTDestroy(&is_primal)); 6617 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6618 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6619 6620 /* assembling of global change of variable */ 6621 if (!pcbddc->fake_change) { 6622 Mat tmat; 6623 PetscInt bs; 6624 6625 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 6626 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 6627 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 6628 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 6629 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 6630 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 6631 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 6632 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 6633 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 6634 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 6635 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 6636 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 6637 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 6638 PetscCall(MatDestroy(&tmat)); 6639 PetscCall(VecSet(pcis->vec1_global, 0.0)); 6640 PetscCall(VecSet(pcis->vec1_N, 1.0)); 6641 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6642 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6643 PetscCall(VecReciprocal(pcis->vec1_global)); 6644 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 6645 6646 /* check */ 6647 if (pcbddc->dbg_flag) { 6648 PetscReal error; 6649 Vec x, x_change; 6650 6651 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 6652 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 6653 PetscCall(VecSetRandom(x, NULL)); 6654 PetscCall(VecCopy(x, pcis->vec1_global)); 6655 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6656 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6657 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 6658 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6659 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6660 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 6661 PetscCall(VecAXPY(x, -1.0, x_change)); 6662 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 6663 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 6664 PetscCall(VecDestroy(&x)); 6665 PetscCall(VecDestroy(&x_change)); 6666 } 6667 /* adapt sub_schurs computed (if any) */ 6668 if (pcbddc->use_deluxe_scaling) { 6669 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6670 6671 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6672 if (sub_schurs && sub_schurs->S_Ej_all) { 6673 Mat S_new, tmat; 6674 IS is_all_N, is_V_Sall = NULL; 6675 6676 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 6677 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 6678 if (pcbddc->deluxe_zerorows) { 6679 ISLocalToGlobalMapping NtoSall; 6680 IS is_V; 6681 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 6682 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 6683 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 6684 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6685 PetscCall(ISDestroy(&is_V)); 6686 } 6687 PetscCall(ISDestroy(&is_all_N)); 6688 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6689 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6690 PetscCall(PetscObjectReference((PetscObject)S_new)); 6691 if (pcbddc->deluxe_zerorows) { 6692 const PetscScalar *array; 6693 const PetscInt *idxs_V, *idxs_all; 6694 PetscInt i, n_V; 6695 6696 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6697 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 6698 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 6699 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 6700 PetscCall(VecGetArrayRead(pcis->D, &array)); 6701 for (i = 0; i < n_V; i++) { 6702 PetscScalar val; 6703 PetscInt idx; 6704 6705 idx = idxs_V[i]; 6706 val = array[idxs_all[idxs_V[i]]]; 6707 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 6708 } 6709 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 6710 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 6711 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 6712 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 6713 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 6714 } 6715 sub_schurs->S_Ej_all = S_new; 6716 PetscCall(MatDestroy(&S_new)); 6717 if (sub_schurs->sum_S_Ej_all) { 6718 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6719 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 6720 PetscCall(PetscObjectReference((PetscObject)S_new)); 6721 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6722 sub_schurs->sum_S_Ej_all = S_new; 6723 PetscCall(MatDestroy(&S_new)); 6724 } 6725 PetscCall(ISDestroy(&is_V_Sall)); 6726 PetscCall(MatDestroy(&tmat)); 6727 } 6728 /* destroy any change of basis context in sub_schurs */ 6729 if (sub_schurs && sub_schurs->change) { 6730 PetscInt i; 6731 6732 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 6733 PetscCall(PetscFree(sub_schurs->change)); 6734 } 6735 } 6736 if (pcbddc->switch_static) { /* need to save the local change */ 6737 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6738 } else { 6739 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 6740 } 6741 /* determine if any process has changed the pressures locally */ 6742 pcbddc->change_interior = pcbddc->benign_have_null; 6743 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6744 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6745 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6746 pcbddc->use_qr_single = qr_needed; 6747 } 6748 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6749 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6750 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 6751 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6752 } else { 6753 Mat benign_global = NULL; 6754 if (pcbddc->benign_have_null) { 6755 Mat M; 6756 6757 pcbddc->change_interior = PETSC_TRUE; 6758 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 6759 PetscCall(VecReciprocal(pcis->vec1_N)); 6760 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 6761 if (pcbddc->benign_change) { 6762 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 6763 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 6764 } else { 6765 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 6766 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 6767 } 6768 PetscCall(MatISSetLocalMat(benign_global, M)); 6769 PetscCall(MatDestroy(&M)); 6770 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 6771 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 6772 } 6773 if (pcbddc->user_ChangeOfBasisMatrix) { 6774 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix)); 6775 PetscCall(MatDestroy(&benign_global)); 6776 } else if (pcbddc->benign_have_null) { 6777 pcbddc->ChangeOfBasisMatrix = benign_global; 6778 } 6779 } 6780 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6781 IS is_global; 6782 const PetscInt *gidxs; 6783 6784 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 6785 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 6786 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 6787 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 6788 PetscCall(ISDestroy(&is_global)); 6789 } 6790 } 6791 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 6792 6793 if (!pcbddc->fake_change) { 6794 /* add pressure dofs to set of primal nodes for numbering purposes */ 6795 for (i = 0; i < pcbddc->benign_n; i++) { 6796 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6797 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6798 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6799 pcbddc->local_primal_size_cc++; 6800 pcbddc->local_primal_size++; 6801 } 6802 6803 /* check if a new primal space has been introduced (also take into account benign trick) */ 6804 pcbddc->new_primal_space_local = PETSC_TRUE; 6805 if (olocal_primal_size == pcbddc->local_primal_size) { 6806 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6807 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6808 if (!pcbddc->new_primal_space_local) { 6809 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6810 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6811 } 6812 } 6813 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6814 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 6815 } 6816 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 6817 6818 /* flush dbg viewer */ 6819 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6820 6821 /* free workspace */ 6822 PetscCall(PetscBTDestroy(&qr_needed_idx)); 6823 PetscCall(PetscBTDestroy(&change_basis)); 6824 if (!pcbddc->adaptive_selection) { 6825 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 6826 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 6827 } else { 6828 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 6829 PetscCall(PetscFree(constraints_n)); 6830 PetscCall(PetscFree(constraints_idxs_B)); 6831 } 6832 PetscFunctionReturn(0); 6833 } 6834 6835 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6836 { 6837 ISLocalToGlobalMapping map; 6838 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6839 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6840 PetscInt i, N; 6841 PetscBool rcsr = PETSC_FALSE; 6842 6843 PetscFunctionBegin; 6844 if (pcbddc->recompute_topography) { 6845 pcbddc->graphanalyzed = PETSC_FALSE; 6846 /* Reset previously computed graph */ 6847 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 6848 /* Init local Graph struct */ 6849 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 6850 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 6851 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 6852 6853 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 6854 /* Check validity of the csr graph passed in by the user */ 6855 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 6856 pcbddc->mat_graph->nvtxs); 6857 6858 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6859 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6860 PetscInt *xadj, *adjncy; 6861 PetscInt nvtxs; 6862 PetscBool flg_row = PETSC_FALSE; 6863 6864 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6865 if (flg_row) { 6866 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 6867 pcbddc->computed_rowadj = PETSC_TRUE; 6868 } 6869 PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6870 rcsr = PETSC_TRUE; 6871 } 6872 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6873 6874 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6875 PetscReal *lcoords; 6876 PetscInt n; 6877 MPI_Datatype dimrealtype; 6878 6879 /* TODO: support for blocked */ 6880 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 6881 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6882 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 6883 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype)); 6884 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 6885 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6886 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6887 PetscCallMPI(MPI_Type_free(&dimrealtype)); 6888 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 6889 6890 pcbddc->mat_graph->coords = lcoords; 6891 pcbddc->mat_graph->cloc = PETSC_TRUE; 6892 pcbddc->mat_graph->cnloc = n; 6893 } 6894 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 6895 pcbddc->mat_graph->nvtxs); 6896 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 6897 6898 /* Setup of Graph */ 6899 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6900 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 6901 6902 /* attach info on disconnected subdomains if present */ 6903 if (pcbddc->n_local_subs) { 6904 PetscInt *local_subs, n, totn; 6905 6906 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6907 PetscCall(PetscMalloc1(n, &local_subs)); 6908 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 6909 for (i = 0; i < pcbddc->n_local_subs; i++) { 6910 const PetscInt *idxs; 6911 PetscInt nl, j; 6912 6913 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 6914 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 6915 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 6916 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 6917 } 6918 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 6919 pcbddc->mat_graph->n_local_subs = totn + 1; 6920 pcbddc->mat_graph->local_subs = local_subs; 6921 } 6922 } 6923 6924 if (!pcbddc->graphanalyzed) { 6925 /* Graph's connected components analysis */ 6926 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 6927 pcbddc->graphanalyzed = PETSC_TRUE; 6928 pcbddc->corner_selected = pcbddc->corner_selection; 6929 } 6930 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6931 PetscFunctionReturn(0); 6932 } 6933 6934 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 6935 { 6936 PetscInt i, j, n; 6937 PetscScalar *alphas; 6938 PetscReal norm, *onorms; 6939 6940 PetscFunctionBegin; 6941 n = *nio; 6942 if (!n) PetscFunctionReturn(0); 6943 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 6944 PetscCall(VecNormalize(vecs[0], &norm)); 6945 if (norm < PETSC_SMALL) { 6946 onorms[0] = 0.0; 6947 PetscCall(VecSet(vecs[0], 0.0)); 6948 } else { 6949 onorms[0] = norm; 6950 } 6951 6952 for (i = 1; i < n; i++) { 6953 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 6954 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 6955 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 6956 PetscCall(VecNormalize(vecs[i], &norm)); 6957 if (norm < PETSC_SMALL) { 6958 onorms[i] = 0.0; 6959 PetscCall(VecSet(vecs[i], 0.0)); 6960 } else { 6961 onorms[i] = norm; 6962 } 6963 } 6964 /* push nonzero vectors at the beginning */ 6965 for (i = 0; i < n; i++) { 6966 if (onorms[i] == 0.0) { 6967 for (j = i + 1; j < n; j++) { 6968 if (onorms[j] != 0.0) { 6969 PetscCall(VecCopy(vecs[j], vecs[i])); 6970 onorms[j] = 0.0; 6971 } 6972 } 6973 } 6974 } 6975 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 6976 PetscCall(PetscFree2(alphas, onorms)); 6977 PetscFunctionReturn(0); 6978 } 6979 6980 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 6981 { 6982 ISLocalToGlobalMapping mapping; 6983 Mat A; 6984 PetscInt n_neighs, *neighs, *n_shared, **shared; 6985 PetscMPIInt size, rank, color; 6986 PetscInt *xadj, *adjncy; 6987 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 6988 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 6989 PetscInt void_procs, *procs_candidates = NULL; 6990 PetscInt xadj_count, *count; 6991 PetscBool ismatis, use_vwgt = PETSC_FALSE; 6992 PetscSubcomm psubcomm; 6993 MPI_Comm subcomm; 6994 6995 PetscFunctionBegin; 6996 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 6997 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 6998 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 6999 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7000 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7001 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7002 7003 if (have_void) *have_void = PETSC_FALSE; 7004 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7005 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7006 PetscCall(MatISGetLocalMat(mat, &A)); 7007 PetscCall(MatGetLocalSize(A, &n, NULL)); 7008 im_active = !!n; 7009 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7010 void_procs = size - active_procs; 7011 /* get ranks of of non-active processes in mat communicator */ 7012 if (void_procs) { 7013 PetscInt ncand; 7014 7015 if (have_void) *have_void = PETSC_TRUE; 7016 PetscCall(PetscMalloc1(size, &procs_candidates)); 7017 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7018 for (i = 0, ncand = 0; i < size; i++) { 7019 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7020 } 7021 /* force n_subdomains to be not greater that the number of non-active processes */ 7022 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7023 } 7024 7025 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7026 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7027 PetscCall(MatGetSize(mat, &N, NULL)); 7028 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7029 PetscInt issize, isidx, dest; 7030 if (*n_subdomains == 1) dest = 0; 7031 else dest = rank; 7032 if (im_active) { 7033 issize = 1; 7034 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7035 isidx = procs_candidates[dest]; 7036 } else { 7037 isidx = dest; 7038 } 7039 } else { 7040 issize = 0; 7041 isidx = -1; 7042 } 7043 if (*n_subdomains != 1) *n_subdomains = active_procs; 7044 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7045 PetscCall(PetscFree(procs_candidates)); 7046 PetscFunctionReturn(0); 7047 } 7048 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL)); 7049 PetscCall(PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL)); 7050 threshold = PetscMax(threshold, 2); 7051 7052 /* Get info on mapping */ 7053 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7054 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7055 7056 /* build local CSR graph of subdomains' connectivity */ 7057 PetscCall(PetscMalloc1(2, &xadj)); 7058 xadj[0] = 0; 7059 xadj[1] = PetscMax(n_neighs - 1, 0); 7060 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7061 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7062 PetscCall(PetscCalloc1(n, &count)); 7063 for (i = 1; i < n_neighs; i++) 7064 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7065 7066 xadj_count = 0; 7067 for (i = 1; i < n_neighs; i++) { 7068 for (j = 0; j < n_shared[i]; j++) { 7069 if (count[shared[i][j]] < threshold) { 7070 adjncy[xadj_count] = neighs[i]; 7071 adjncy_wgt[xadj_count] = n_shared[i]; 7072 xadj_count++; 7073 break; 7074 } 7075 } 7076 } 7077 xadj[1] = xadj_count; 7078 PetscCall(PetscFree(count)); 7079 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7080 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7081 7082 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7083 7084 /* Restrict work on active processes only */ 7085 PetscCall(PetscMPIIntCast(im_active, &color)); 7086 if (void_procs) { 7087 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7088 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7089 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7090 subcomm = PetscSubcommChild(psubcomm); 7091 } else { 7092 psubcomm = NULL; 7093 subcomm = PetscObjectComm((PetscObject)mat); 7094 } 7095 7096 v_wgt = NULL; 7097 if (!color) { 7098 PetscCall(PetscFree(xadj)); 7099 PetscCall(PetscFree(adjncy)); 7100 PetscCall(PetscFree(adjncy_wgt)); 7101 } else { 7102 Mat subdomain_adj; 7103 IS new_ranks, new_ranks_contig; 7104 MatPartitioning partitioner; 7105 PetscInt rstart = 0, rend = 0; 7106 PetscInt *is_indices, *oldranks; 7107 PetscMPIInt size; 7108 PetscBool aggregate; 7109 7110 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7111 if (void_procs) { 7112 PetscInt prank = rank; 7113 PetscCall(PetscMalloc1(size, &oldranks)); 7114 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7115 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7116 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7117 } else { 7118 oldranks = NULL; 7119 } 7120 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7121 if (aggregate) { /* TODO: all this part could be made more efficient */ 7122 PetscInt lrows, row, ncols, *cols; 7123 PetscMPIInt nrank; 7124 PetscScalar *vals; 7125 7126 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7127 lrows = 0; 7128 if (nrank < redprocs) { 7129 lrows = size / redprocs; 7130 if (nrank < size % redprocs) lrows++; 7131 } 7132 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7133 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7134 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7135 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7136 row = nrank; 7137 ncols = xadj[1] - xadj[0]; 7138 cols = adjncy; 7139 PetscCall(PetscMalloc1(ncols, &vals)); 7140 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7141 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7142 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7143 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7144 PetscCall(PetscFree(xadj)); 7145 PetscCall(PetscFree(adjncy)); 7146 PetscCall(PetscFree(adjncy_wgt)); 7147 PetscCall(PetscFree(vals)); 7148 if (use_vwgt) { 7149 Vec v; 7150 const PetscScalar *array; 7151 PetscInt nl; 7152 7153 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7154 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7155 PetscCall(VecAssemblyBegin(v)); 7156 PetscCall(VecAssemblyEnd(v)); 7157 PetscCall(VecGetLocalSize(v, &nl)); 7158 PetscCall(VecGetArrayRead(v, &array)); 7159 PetscCall(PetscMalloc1(nl, &v_wgt)); 7160 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7161 PetscCall(VecRestoreArrayRead(v, &array)); 7162 PetscCall(VecDestroy(&v)); 7163 } 7164 } else { 7165 PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7166 if (use_vwgt) { 7167 PetscCall(PetscMalloc1(1, &v_wgt)); 7168 v_wgt[0] = n; 7169 } 7170 } 7171 /* PetscCall(MatView(subdomain_adj,0)); */ 7172 7173 /* Partition */ 7174 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7175 #if defined(PETSC_HAVE_PTSCOTCH) 7176 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7177 #elif defined(PETSC_HAVE_PARMETIS) 7178 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7179 #else 7180 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7181 #endif 7182 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7183 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7184 *n_subdomains = PetscMin((PetscInt)size, *n_subdomains); 7185 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7186 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7187 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7188 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7189 7190 /* renumber new_ranks to avoid "holes" in new set of processors */ 7191 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7192 PetscCall(ISDestroy(&new_ranks)); 7193 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7194 if (!aggregate) { 7195 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7196 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7197 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7198 } else if (oldranks) { 7199 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7200 } else { 7201 ranks_send_to_idx[0] = is_indices[0]; 7202 } 7203 } else { 7204 PetscInt idx = 0; 7205 PetscMPIInt tag; 7206 MPI_Request *reqs; 7207 7208 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7209 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7210 for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7211 PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7212 PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE)); 7213 PetscCall(PetscFree(reqs)); 7214 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7215 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7216 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7217 } else if (oldranks) { 7218 ranks_send_to_idx[0] = oldranks[idx]; 7219 } else { 7220 ranks_send_to_idx[0] = idx; 7221 } 7222 } 7223 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7224 /* clean up */ 7225 PetscCall(PetscFree(oldranks)); 7226 PetscCall(ISDestroy(&new_ranks_contig)); 7227 PetscCall(MatDestroy(&subdomain_adj)); 7228 PetscCall(MatPartitioningDestroy(&partitioner)); 7229 } 7230 PetscCall(PetscSubcommDestroy(&psubcomm)); 7231 PetscCall(PetscFree(procs_candidates)); 7232 7233 /* assemble parallel IS for sends */ 7234 i = 1; 7235 if (!color) i = 0; 7236 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7237 PetscFunctionReturn(0); 7238 } 7239 7240 typedef enum { 7241 MATDENSE_PRIVATE = 0, 7242 MATAIJ_PRIVATE, 7243 MATBAIJ_PRIVATE, 7244 MATSBAIJ_PRIVATE 7245 } MatTypePrivate; 7246 7247 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7248 { 7249 Mat local_mat; 7250 IS is_sends_internal; 7251 PetscInt rows, cols, new_local_rows; 7252 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7253 PetscBool ismatis, isdense, newisdense, destroy_mat; 7254 ISLocalToGlobalMapping l2gmap; 7255 PetscInt *l2gmap_indices; 7256 const PetscInt *is_indices; 7257 MatType new_local_type; 7258 /* buffers */ 7259 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7260 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7261 PetscInt *recv_buffer_idxs_local; 7262 PetscScalar *ptr_vals, *recv_buffer_vals; 7263 const PetscScalar *send_buffer_vals; 7264 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7265 /* MPI */ 7266 MPI_Comm comm, comm_n; 7267 PetscSubcomm subcomm; 7268 PetscMPIInt n_sends, n_recvs, size; 7269 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7270 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7271 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7272 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7273 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7274 7275 PetscFunctionBegin; 7276 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7277 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7278 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7279 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7280 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7281 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7282 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7283 PetscValidLogicalCollectiveInt(mat, nis, 8); 7284 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7285 if (nvecs) { 7286 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7287 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7288 } 7289 /* further checks */ 7290 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7291 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7292 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7293 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7294 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7295 if (reuse && *mat_n) { 7296 PetscInt mrows, mcols, mnrows, mncols; 7297 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7298 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7299 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7300 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7301 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7302 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7303 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7304 } 7305 PetscCall(MatGetBlockSize(local_mat, &bs)); 7306 PetscValidLogicalCollectiveInt(mat, bs, 1); 7307 7308 /* prepare IS for sending if not provided */ 7309 if (!is_sends) { 7310 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7311 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7312 } else { 7313 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7314 is_sends_internal = is_sends; 7315 } 7316 7317 /* get comm */ 7318 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7319 7320 /* compute number of sends */ 7321 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7322 PetscCall(PetscMPIIntCast(i, &n_sends)); 7323 7324 /* compute number of receives */ 7325 PetscCallMPI(MPI_Comm_size(comm, &size)); 7326 PetscCall(PetscMalloc1(size, &iflags)); 7327 PetscCall(PetscArrayzero(iflags, size)); 7328 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7329 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7330 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7331 PetscCall(PetscFree(iflags)); 7332 7333 /* restrict comm if requested */ 7334 subcomm = NULL; 7335 destroy_mat = PETSC_FALSE; 7336 if (restrict_comm) { 7337 PetscMPIInt color, subcommsize; 7338 7339 color = 0; 7340 if (restrict_full) { 7341 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7342 } else { 7343 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7344 } 7345 PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7346 subcommsize = size - subcommsize; 7347 /* check if reuse has been requested */ 7348 if (reuse) { 7349 if (*mat_n) { 7350 PetscMPIInt subcommsize2; 7351 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7352 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7353 comm_n = PetscObjectComm((PetscObject)*mat_n); 7354 } else { 7355 comm_n = PETSC_COMM_SELF; 7356 } 7357 } else { /* MAT_INITIAL_MATRIX */ 7358 PetscMPIInt rank; 7359 7360 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7361 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7362 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7363 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7364 comm_n = PetscSubcommChild(subcomm); 7365 } 7366 /* flag to destroy *mat_n if not significative */ 7367 if (color) destroy_mat = PETSC_TRUE; 7368 } else { 7369 comm_n = comm; 7370 } 7371 7372 /* prepare send/receive buffers */ 7373 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 7374 PetscCall(PetscArrayzero(ilengths_idxs, size)); 7375 PetscCall(PetscMalloc1(size, &ilengths_vals)); 7376 PetscCall(PetscArrayzero(ilengths_vals, size)); 7377 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 7378 7379 /* Get data from local matrices */ 7380 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 7381 /* TODO: See below some guidelines on how to prepare the local buffers */ 7382 /* 7383 send_buffer_vals should contain the raw values of the local matrix 7384 send_buffer_idxs should contain: 7385 - MatType_PRIVATE type 7386 - PetscInt size_of_l2gmap 7387 - PetscInt global_row_indices[size_of_l2gmap] 7388 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7389 */ 7390 { 7391 ISLocalToGlobalMapping mapping; 7392 7393 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7394 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 7395 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 7396 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 7397 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7398 send_buffer_idxs[1] = i; 7399 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 7400 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 7401 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 7402 PetscCall(PetscMPIIntCast(i, &len)); 7403 for (i = 0; i < n_sends; i++) { 7404 ilengths_vals[is_indices[i]] = len * len; 7405 ilengths_idxs[is_indices[i]] = len + 2; 7406 } 7407 } 7408 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 7409 /* additional is (if any) */ 7410 if (nis) { 7411 PetscMPIInt psum; 7412 PetscInt j; 7413 for (j = 0, psum = 0; j < nis; j++) { 7414 PetscInt plen; 7415 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7416 PetscCall(PetscMPIIntCast(plen, &len)); 7417 psum += len + 1; /* indices + length */ 7418 } 7419 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 7420 for (j = 0, psum = 0; j < nis; j++) { 7421 PetscInt plen; 7422 const PetscInt *is_array_idxs; 7423 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7424 send_buffer_idxs_is[psum] = plen; 7425 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 7426 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 7427 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 7428 psum += plen + 1; /* indices + length */ 7429 } 7430 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 7431 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 7432 } 7433 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7434 7435 buf_size_idxs = 0; 7436 buf_size_vals = 0; 7437 buf_size_idxs_is = 0; 7438 buf_size_vecs = 0; 7439 for (i = 0; i < n_recvs; i++) { 7440 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7441 buf_size_vals += (PetscInt)olengths_vals[i]; 7442 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7443 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7444 } 7445 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 7446 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 7447 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 7448 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 7449 7450 /* get new tags for clean communications */ 7451 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 7452 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 7453 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 7454 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 7455 7456 /* allocate for requests */ 7457 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 7458 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 7459 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 7460 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 7461 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 7462 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 7463 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 7464 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 7465 7466 /* communications */ 7467 ptr_idxs = recv_buffer_idxs; 7468 ptr_vals = recv_buffer_vals; 7469 ptr_idxs_is = recv_buffer_idxs_is; 7470 ptr_vecs = recv_buffer_vecs; 7471 for (i = 0; i < n_recvs; i++) { 7472 source_dest = onodes[i]; 7473 PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i])); 7474 PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i])); 7475 ptr_idxs += olengths_idxs[i]; 7476 ptr_vals += olengths_vals[i]; 7477 if (nis) { 7478 source_dest = onodes_is[i]; 7479 PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i])); 7480 ptr_idxs_is += olengths_idxs_is[i]; 7481 } 7482 if (nvecs) { 7483 source_dest = onodes[i]; 7484 PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i])); 7485 ptr_vecs += olengths_idxs[i] - 2; 7486 } 7487 } 7488 for (i = 0; i < n_sends; i++) { 7489 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 7490 PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 7491 PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 7492 if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 7493 if (nvecs) { 7494 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7495 PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 7496 } 7497 } 7498 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 7499 PetscCall(ISDestroy(&is_sends_internal)); 7500 7501 /* assemble new l2g map */ 7502 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 7503 ptr_idxs = recv_buffer_idxs; 7504 new_local_rows = 0; 7505 for (i = 0; i < n_recvs; i++) { 7506 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7507 ptr_idxs += olengths_idxs[i]; 7508 } 7509 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 7510 ptr_idxs = recv_buffer_idxs; 7511 new_local_rows = 0; 7512 for (i = 0; i < n_recvs; i++) { 7513 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 7514 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7515 ptr_idxs += olengths_idxs[i]; 7516 } 7517 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 7518 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 7519 PetscCall(PetscFree(l2gmap_indices)); 7520 7521 /* infer new local matrix type from received local matrices type */ 7522 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7523 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7524 if (n_recvs) { 7525 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7526 ptr_idxs = recv_buffer_idxs; 7527 for (i = 0; i < n_recvs; i++) { 7528 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7529 new_local_type_private = MATAIJ_PRIVATE; 7530 break; 7531 } 7532 ptr_idxs += olengths_idxs[i]; 7533 } 7534 switch (new_local_type_private) { 7535 case MATDENSE_PRIVATE: 7536 new_local_type = MATSEQAIJ; 7537 bs = 1; 7538 break; 7539 case MATAIJ_PRIVATE: 7540 new_local_type = MATSEQAIJ; 7541 bs = 1; 7542 break; 7543 case MATBAIJ_PRIVATE: 7544 new_local_type = MATSEQBAIJ; 7545 break; 7546 case MATSBAIJ_PRIVATE: 7547 new_local_type = MATSEQSBAIJ; 7548 break; 7549 default: 7550 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 7551 } 7552 } else { /* by default, new_local_type is seqaij */ 7553 new_local_type = MATSEQAIJ; 7554 bs = 1; 7555 } 7556 7557 /* create MATIS object if needed */ 7558 if (!reuse) { 7559 PetscCall(MatGetSize(mat, &rows, &cols)); 7560 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7561 } else { 7562 /* it also destroys the local matrices */ 7563 if (*mat_n) { 7564 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 7565 } else { /* this is a fake object */ 7566 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7567 } 7568 } 7569 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 7570 PetscCall(MatSetType(local_mat, new_local_type)); 7571 7572 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 7573 7574 /* Global to local map of received indices */ 7575 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 7576 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 7577 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7578 7579 /* restore attributes -> type of incoming data and its size */ 7580 buf_size_idxs = 0; 7581 for (i = 0; i < n_recvs; i++) { 7582 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7583 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 7584 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7585 } 7586 PetscCall(PetscFree(recv_buffer_idxs)); 7587 7588 /* set preallocation */ 7589 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 7590 if (!newisdense) { 7591 PetscInt *new_local_nnz = NULL; 7592 7593 ptr_idxs = recv_buffer_idxs_local; 7594 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 7595 for (i = 0; i < n_recvs; i++) { 7596 PetscInt j; 7597 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7598 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 7599 } else { 7600 /* TODO */ 7601 } 7602 ptr_idxs += olengths_idxs[i]; 7603 } 7604 if (new_local_nnz) { 7605 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 7606 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 7607 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 7608 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7609 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 7610 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7611 } else { 7612 PetscCall(MatSetUp(local_mat)); 7613 } 7614 PetscCall(PetscFree(new_local_nnz)); 7615 } else { 7616 PetscCall(MatSetUp(local_mat)); 7617 } 7618 7619 /* set values */ 7620 ptr_vals = recv_buffer_vals; 7621 ptr_idxs = recv_buffer_idxs_local; 7622 for (i = 0; i < n_recvs; i++) { 7623 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7624 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 7625 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 7626 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 7627 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 7628 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 7629 } else { 7630 /* TODO */ 7631 } 7632 ptr_idxs += olengths_idxs[i]; 7633 ptr_vals += olengths_vals[i]; 7634 } 7635 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 7636 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 7637 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 7638 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 7639 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 7640 PetscCall(PetscFree(recv_buffer_vals)); 7641 7642 #if 0 7643 if (!restrict_comm) { /* check */ 7644 Vec lvec,rvec; 7645 PetscReal infty_error; 7646 7647 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7648 PetscCall(VecSetRandom(rvec,NULL)); 7649 PetscCall(MatMult(mat,rvec,lvec)); 7650 PetscCall(VecScale(lvec,-1.0)); 7651 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7652 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7653 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7654 PetscCall(VecDestroy(&rvec)); 7655 PetscCall(VecDestroy(&lvec)); 7656 } 7657 #endif 7658 7659 /* assemble new additional is (if any) */ 7660 if (nis) { 7661 PetscInt **temp_idxs, *count_is, j, psum; 7662 7663 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 7664 PetscCall(PetscCalloc1(nis, &count_is)); 7665 ptr_idxs = recv_buffer_idxs_is; 7666 psum = 0; 7667 for (i = 0; i < n_recvs; i++) { 7668 for (j = 0; j < nis; j++) { 7669 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7670 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7671 psum += plen; 7672 ptr_idxs += plen + 1; /* shift pointer to received data */ 7673 } 7674 } 7675 PetscCall(PetscMalloc1(nis, &temp_idxs)); 7676 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 7677 for (i = 1; i < nis; i++) temp_idxs[i] = temp_idxs[i - 1] + count_is[i - 1]; 7678 PetscCall(PetscArrayzero(count_is, nis)); 7679 ptr_idxs = recv_buffer_idxs_is; 7680 for (i = 0; i < n_recvs; i++) { 7681 for (j = 0; j < nis; j++) { 7682 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7683 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 7684 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7685 ptr_idxs += plen + 1; /* shift pointer to received data */ 7686 } 7687 } 7688 for (i = 0; i < nis; i++) { 7689 PetscCall(ISDestroy(&isarray[i])); 7690 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 7691 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 7692 } 7693 PetscCall(PetscFree(count_is)); 7694 PetscCall(PetscFree(temp_idxs[0])); 7695 PetscCall(PetscFree(temp_idxs)); 7696 } 7697 /* free workspace */ 7698 PetscCall(PetscFree(recv_buffer_idxs_is)); 7699 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 7700 PetscCall(PetscFree(send_buffer_idxs)); 7701 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 7702 if (isdense) { 7703 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7704 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 7705 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7706 } else { 7707 /* PetscCall(PetscFree(send_buffer_vals)); */ 7708 } 7709 if (nis) { 7710 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 7711 PetscCall(PetscFree(send_buffer_idxs_is)); 7712 } 7713 7714 if (nvecs) { 7715 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 7716 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 7717 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7718 PetscCall(VecDestroy(&nnsp_vec[0])); 7719 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 7720 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 7721 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 7722 /* set values */ 7723 ptr_vals = recv_buffer_vecs; 7724 ptr_idxs = recv_buffer_idxs_local; 7725 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7726 for (i = 0; i < n_recvs; i++) { 7727 PetscInt j; 7728 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 7729 ptr_idxs += olengths_idxs[i]; 7730 ptr_vals += olengths_idxs[i] - 2; 7731 } 7732 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7733 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 7734 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 7735 } 7736 7737 PetscCall(PetscFree(recv_buffer_vecs)); 7738 PetscCall(PetscFree(recv_buffer_idxs_local)); 7739 PetscCall(PetscFree(recv_req_idxs)); 7740 PetscCall(PetscFree(recv_req_vals)); 7741 PetscCall(PetscFree(recv_req_vecs)); 7742 PetscCall(PetscFree(recv_req_idxs_is)); 7743 PetscCall(PetscFree(send_req_idxs)); 7744 PetscCall(PetscFree(send_req_vals)); 7745 PetscCall(PetscFree(send_req_vecs)); 7746 PetscCall(PetscFree(send_req_idxs_is)); 7747 PetscCall(PetscFree(ilengths_vals)); 7748 PetscCall(PetscFree(ilengths_idxs)); 7749 PetscCall(PetscFree(olengths_vals)); 7750 PetscCall(PetscFree(olengths_idxs)); 7751 PetscCall(PetscFree(onodes)); 7752 if (nis) { 7753 PetscCall(PetscFree(ilengths_idxs_is)); 7754 PetscCall(PetscFree(olengths_idxs_is)); 7755 PetscCall(PetscFree(onodes_is)); 7756 } 7757 PetscCall(PetscSubcommDestroy(&subcomm)); 7758 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 7759 PetscCall(MatDestroy(mat_n)); 7760 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 7761 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7762 PetscCall(VecDestroy(&nnsp_vec[0])); 7763 } 7764 *mat_n = NULL; 7765 } 7766 PetscFunctionReturn(0); 7767 } 7768 7769 /* temporary hack into ksp private data structure */ 7770 #include <petsc/private/kspimpl.h> 7771 7772 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals) 7773 { 7774 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7775 PC_IS *pcis = (PC_IS *)pc->data; 7776 Mat coarse_mat, coarse_mat_is, coarse_submat_dense; 7777 Mat coarsedivudotp = NULL; 7778 Mat coarseG, t_coarse_mat_is; 7779 MatNullSpace CoarseNullSpace = NULL; 7780 ISLocalToGlobalMapping coarse_islg; 7781 IS coarse_is, *isarray, corners; 7782 PetscInt i, im_active = -1, active_procs = -1; 7783 PetscInt nis, nisdofs, nisneu, nisvert; 7784 PetscInt coarse_eqs_per_proc; 7785 PC pc_temp; 7786 PCType coarse_pc_type; 7787 KSPType coarse_ksp_type; 7788 PetscBool multilevel_requested, multilevel_allowed; 7789 PetscBool coarse_reuse; 7790 PetscInt ncoarse, nedcfield; 7791 PetscBool compute_vecs = PETSC_FALSE; 7792 PetscScalar *array; 7793 MatReuse coarse_mat_reuse; 7794 PetscBool restr, full_restr, have_void; 7795 PetscMPIInt size; 7796 7797 PetscFunctionBegin; 7798 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 7799 /* Assign global numbering to coarse dofs */ 7800 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7801 PetscInt ocoarse_size; 7802 compute_vecs = PETSC_TRUE; 7803 7804 pcbddc->new_primal_space = PETSC_TRUE; 7805 ocoarse_size = pcbddc->coarse_size; 7806 PetscCall(PetscFree(pcbddc->global_primal_indices)); 7807 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 7808 /* see if we can avoid some work */ 7809 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7810 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7811 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7812 PetscCall(KSPReset(pcbddc->coarse_ksp)); 7813 coarse_reuse = PETSC_FALSE; 7814 } else { /* we can safely reuse already computed coarse matrix */ 7815 coarse_reuse = PETSC_TRUE; 7816 } 7817 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7818 coarse_reuse = PETSC_FALSE; 7819 } 7820 /* reset any subassembling information */ 7821 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 7822 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7823 coarse_reuse = PETSC_TRUE; 7824 } 7825 if (coarse_reuse && pcbddc->coarse_ksp) { 7826 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 7827 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 7828 coarse_mat_reuse = MAT_REUSE_MATRIX; 7829 } else { 7830 coarse_mat = NULL; 7831 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7832 } 7833 7834 /* creates temporary l2gmap and IS for coarse indexes */ 7835 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 7836 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 7837 7838 /* creates temporary MATIS object for coarse matrix */ 7839 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense)); 7840 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is)); 7841 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense)); 7842 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7843 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7844 PetscCall(MatDestroy(&coarse_submat_dense)); 7845 7846 /* count "active" (i.e. with positive local size) and "void" processes */ 7847 im_active = !!(pcis->n); 7848 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7849 7850 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7851 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 7852 /* full_restr : just use the receivers from the subassembling pattern */ 7853 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 7854 coarse_mat_is = NULL; 7855 multilevel_allowed = PETSC_FALSE; 7856 multilevel_requested = PETSC_FALSE; 7857 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 7858 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 7859 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7860 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 7861 if (multilevel_requested) { 7862 ncoarse = active_procs / pcbddc->coarsening_ratio; 7863 restr = PETSC_FALSE; 7864 full_restr = PETSC_FALSE; 7865 } else { 7866 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 7867 restr = PETSC_TRUE; 7868 full_restr = PETSC_TRUE; 7869 } 7870 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7871 ncoarse = PetscMax(1, ncoarse); 7872 if (!pcbddc->coarse_subassembling) { 7873 if (pcbddc->coarsening_ratio > 1) { 7874 if (multilevel_requested) { 7875 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7876 } else { 7877 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7878 } 7879 } else { 7880 PetscMPIInt rank; 7881 7882 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 7883 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7884 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 7885 } 7886 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7887 PetscInt psum; 7888 if (pcbddc->coarse_ksp) psum = 1; 7889 else psum = 0; 7890 PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7891 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 7892 } 7893 /* determine if we can go multilevel */ 7894 if (multilevel_requested) { 7895 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7896 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7897 } 7898 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7899 7900 /* dump subassembling pattern */ 7901 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 7902 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7903 nedcfield = -1; 7904 corners = NULL; 7905 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 7906 PetscInt *tidxs, *tidxs2, nout, tsize, i; 7907 const PetscInt *idxs; 7908 ISLocalToGlobalMapping tmap; 7909 7910 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7911 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 7912 /* allocate space for temporary storage */ 7913 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 7914 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 7915 /* allocate for IS array */ 7916 nisdofs = pcbddc->n_ISForDofsLocal; 7917 if (pcbddc->nedclocal) { 7918 if (pcbddc->nedfield > -1) { 7919 nedcfield = pcbddc->nedfield; 7920 } else { 7921 nedcfield = 0; 7922 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 7923 nisdofs = 1; 7924 } 7925 } 7926 nisneu = !!pcbddc->NeumannBoundariesLocal; 7927 nisvert = 0; /* nisvert is not used */ 7928 nis = nisdofs + nisneu + nisvert; 7929 PetscCall(PetscMalloc1(nis, &isarray)); 7930 /* dofs splitting */ 7931 for (i = 0; i < nisdofs; i++) { 7932 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 7933 if (nedcfield != i) { 7934 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 7935 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7936 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7937 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7938 } else { 7939 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 7940 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 7941 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7942 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 7943 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 7944 } 7945 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7946 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 7947 /* PetscCall(ISView(isarray[i],0)); */ 7948 } 7949 /* neumann boundaries */ 7950 if (pcbddc->NeumannBoundariesLocal) { 7951 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 7952 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 7953 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 7954 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7955 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 7956 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7957 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 7958 /* PetscCall(ISView(isarray[nisdofs],0)); */ 7959 } 7960 /* coordinates */ 7961 if (pcbddc->corner_selected) { 7962 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 7963 PetscCall(ISGetLocalSize(corners, &tsize)); 7964 PetscCall(ISGetIndices(corners, &idxs)); 7965 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7966 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 7967 PetscCall(ISRestoreIndices(corners, &idxs)); 7968 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 7969 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7970 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 7971 } 7972 PetscCall(PetscFree(tidxs)); 7973 PetscCall(PetscFree(tidxs2)); 7974 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 7975 } else { 7976 nis = 0; 7977 nisdofs = 0; 7978 nisneu = 0; 7979 nisvert = 0; 7980 isarray = NULL; 7981 } 7982 /* destroy no longer needed map */ 7983 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 7984 7985 /* subassemble */ 7986 if (multilevel_allowed) { 7987 Vec vp[1]; 7988 PetscInt nvecs = 0; 7989 PetscBool reuse, reuser; 7990 7991 if (coarse_mat) reuse = PETSC_TRUE; 7992 else reuse = PETSC_FALSE; 7993 PetscCall(MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7994 vp[0] = NULL; 7995 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7996 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 7997 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 7998 PetscCall(VecSetType(vp[0], VECSTANDARD)); 7999 nvecs = 1; 8000 8001 if (pcbddc->divudotp) { 8002 Mat B, loc_divudotp; 8003 Vec v, p; 8004 IS dummy; 8005 PetscInt np; 8006 8007 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8008 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8009 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8010 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8011 PetscCall(MatCreateVecs(B, &v, &p)); 8012 PetscCall(VecSet(p, 1.)); 8013 PetscCall(MatMultTranspose(B, p, v)); 8014 PetscCall(VecDestroy(&p)); 8015 PetscCall(MatDestroy(&B)); 8016 PetscCall(VecGetArray(vp[0], &array)); 8017 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8018 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8019 PetscCall(VecResetArray(pcbddc->vec1_P)); 8020 PetscCall(VecRestoreArray(vp[0], &array)); 8021 PetscCall(ISDestroy(&dummy)); 8022 PetscCall(VecDestroy(&v)); 8023 } 8024 } 8025 if (reuser) { 8026 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8027 } else { 8028 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8029 } 8030 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8031 PetscScalar *arraym; 8032 const PetscScalar *arrayv; 8033 PetscInt nl; 8034 PetscCall(VecGetLocalSize(vp[0], &nl)); 8035 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8036 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8037 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8038 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8039 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8040 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8041 PetscCall(VecDestroy(&vp[0])); 8042 } else { 8043 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8044 } 8045 } else { 8046 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8047 } 8048 if (coarse_mat_is || coarse_mat) { 8049 if (!multilevel_allowed) { 8050 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8051 } else { 8052 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8053 if (coarse_mat_is) { 8054 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8055 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8056 coarse_mat = coarse_mat_is; 8057 } 8058 } 8059 } 8060 PetscCall(MatDestroy(&t_coarse_mat_is)); 8061 PetscCall(MatDestroy(&coarse_mat_is)); 8062 8063 /* create local to global scatters for coarse problem */ 8064 if (compute_vecs) { 8065 PetscInt lrows; 8066 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8067 if (coarse_mat) { 8068 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8069 } else { 8070 lrows = 0; 8071 } 8072 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8073 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8074 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8075 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8076 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8077 } 8078 PetscCall(ISDestroy(&coarse_is)); 8079 8080 /* set defaults for coarse KSP and PC */ 8081 if (multilevel_allowed) { 8082 coarse_ksp_type = KSPRICHARDSON; 8083 coarse_pc_type = PCBDDC; 8084 } else { 8085 coarse_ksp_type = KSPPREONLY; 8086 coarse_pc_type = PCREDUNDANT; 8087 } 8088 8089 /* print some info if requested */ 8090 if (pcbddc->dbg_flag) { 8091 if (!multilevel_allowed) { 8092 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8093 if (multilevel_requested) { 8094 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio)); 8095 } else if (pcbddc->max_levels) { 8096 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8097 } 8098 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8099 } 8100 } 8101 8102 /* communicate coarse discrete gradient */ 8103 coarseG = NULL; 8104 if (pcbddc->nedcG && multilevel_allowed) { 8105 MPI_Comm ccomm; 8106 if (coarse_mat) { 8107 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8108 } else { 8109 ccomm = MPI_COMM_NULL; 8110 } 8111 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8112 } 8113 8114 /* create the coarse KSP object only once with defaults */ 8115 if (coarse_mat) { 8116 PetscBool isredundant, isbddc, force, valid; 8117 PetscViewer dbg_viewer = NULL; 8118 PetscBool isset, issym, isher, isspd; 8119 8120 if (pcbddc->dbg_flag) { 8121 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8122 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8123 } 8124 if (!pcbddc->coarse_ksp) { 8125 char prefix[256], str_level[16]; 8126 size_t len; 8127 8128 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8129 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8130 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8131 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1)); 8132 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8133 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8134 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8135 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8136 /* TODO is this logic correct? should check for coarse_mat type */ 8137 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8138 /* prefix */ 8139 PetscCall(PetscStrcpy(prefix, "")); 8140 PetscCall(PetscStrcpy(str_level, "")); 8141 if (!pcbddc->current_level) { 8142 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8143 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8144 } else { 8145 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8146 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8147 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8148 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8149 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8150 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level))); 8151 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8152 } 8153 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8154 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8155 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8156 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8157 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8158 /* allow user customization */ 8159 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8160 /* get some info after set from options */ 8161 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8162 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8163 force = PETSC_FALSE; 8164 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8165 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8166 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8167 if (multilevel_allowed && !force && !valid) { 8168 isbddc = PETSC_TRUE; 8169 PetscCall(PCSetType(pc_temp, PCBDDC)); 8170 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8171 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8172 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8173 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8174 PetscObjectOptionsBegin((PetscObject)pc_temp); 8175 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8176 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8177 PetscOptionsEnd(); 8178 pc_temp->setfromoptionscalled++; 8179 } 8180 } 8181 } 8182 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8183 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8184 if (nisdofs) { 8185 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8186 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8187 } 8188 if (nisneu) { 8189 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8190 PetscCall(ISDestroy(&isarray[nisdofs])); 8191 } 8192 if (nisvert) { 8193 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8194 PetscCall(ISDestroy(&isarray[nis - 1])); 8195 } 8196 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8197 8198 /* get some info after set from options */ 8199 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8200 8201 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8202 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8203 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8204 force = PETSC_FALSE; 8205 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8206 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8207 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8208 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8209 if (isredundant) { 8210 KSP inner_ksp; 8211 PC inner_pc; 8212 8213 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8214 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8215 } 8216 8217 /* parameters which miss an API */ 8218 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8219 if (isbddc) { 8220 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8221 8222 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8223 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8224 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8225 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8226 if (pcbddc_coarse->benign_saddle_point) { 8227 Mat coarsedivudotp_is; 8228 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8229 IS row, col; 8230 const PetscInt *gidxs; 8231 PetscInt n, st, M, N; 8232 8233 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8234 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8235 st = st - n; 8236 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8237 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8238 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8239 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8240 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8241 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8242 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8243 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8244 PetscCall(ISGetSize(row, &M)); 8245 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8246 PetscCall(ISDestroy(&row)); 8247 PetscCall(ISDestroy(&col)); 8248 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8249 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8250 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8251 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8252 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8253 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8254 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8255 PetscCall(MatDestroy(&coarsedivudotp)); 8256 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8257 PetscCall(MatDestroy(&coarsedivudotp_is)); 8258 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8259 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8260 } 8261 } 8262 8263 /* propagate symmetry info of coarse matrix */ 8264 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8265 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8266 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8267 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8268 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8269 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8270 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8271 8272 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8273 /* set operators */ 8274 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8275 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8276 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8277 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8278 } 8279 PetscCall(MatDestroy(&coarseG)); 8280 PetscCall(PetscFree(isarray)); 8281 #if 0 8282 { 8283 PetscViewer viewer; 8284 char filename[256]; 8285 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8286 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8287 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8288 PetscCall(MatView(coarse_mat,viewer)); 8289 PetscCall(PetscViewerPopFormat(viewer)); 8290 PetscCall(PetscViewerDestroy(&viewer)); 8291 } 8292 #endif 8293 8294 if (corners) { 8295 Vec gv; 8296 IS is; 8297 const PetscInt *idxs; 8298 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8299 PetscScalar *coords; 8300 8301 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8302 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8303 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8304 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8305 PetscCall(VecSetBlockSize(gv, cdim)); 8306 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8307 PetscCall(VecSetType(gv, VECSTANDARD)); 8308 PetscCall(VecSetFromOptions(gv)); 8309 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8310 8311 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8312 PetscCall(ISGetLocalSize(is, &n)); 8313 PetscCall(ISGetIndices(is, &idxs)); 8314 PetscCall(PetscMalloc1(n * cdim, &coords)); 8315 for (i = 0; i < n; i++) { 8316 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 8317 } 8318 PetscCall(ISRestoreIndices(is, &idxs)); 8319 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8320 8321 PetscCall(ISGetLocalSize(corners, &n)); 8322 PetscCall(ISGetIndices(corners, &idxs)); 8323 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8324 PetscCall(ISRestoreIndices(corners, &idxs)); 8325 PetscCall(PetscFree(coords)); 8326 PetscCall(VecAssemblyBegin(gv)); 8327 PetscCall(VecAssemblyEnd(gv)); 8328 PetscCall(VecGetArray(gv, &coords)); 8329 if (pcbddc->coarse_ksp) { 8330 PC coarse_pc; 8331 PetscBool isbddc; 8332 8333 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8334 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8335 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8336 PetscReal *realcoords; 8337 8338 PetscCall(VecGetLocalSize(gv, &n)); 8339 #if defined(PETSC_USE_COMPLEX) 8340 PetscCall(PetscMalloc1(n, &realcoords)); 8341 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8342 #else 8343 realcoords = coords; 8344 #endif 8345 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8346 #if defined(PETSC_USE_COMPLEX) 8347 PetscCall(PetscFree(realcoords)); 8348 #endif 8349 } 8350 } 8351 PetscCall(VecRestoreArray(gv, &coords)); 8352 PetscCall(VecDestroy(&gv)); 8353 } 8354 PetscCall(ISDestroy(&corners)); 8355 8356 if (pcbddc->coarse_ksp) { 8357 Vec crhs, csol; 8358 8359 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 8360 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 8361 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &((pcbddc->coarse_ksp)->vec_sol), NULL)); 8362 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &((pcbddc->coarse_ksp)->vec_rhs))); 8363 } 8364 PetscCall(MatDestroy(&coarsedivudotp)); 8365 8366 /* compute null space for coarse solver if the benign trick has been requested */ 8367 if (pcbddc->benign_null) { 8368 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 8369 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 8370 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8371 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8372 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8373 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8374 if (coarse_mat) { 8375 Vec nullv; 8376 PetscScalar *array, *array2; 8377 PetscInt nl; 8378 8379 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 8380 PetscCall(VecGetLocalSize(nullv, &nl)); 8381 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8382 PetscCall(VecGetArray(nullv, &array2)); 8383 PetscCall(PetscArraycpy(array2, array, nl)); 8384 PetscCall(VecRestoreArray(nullv, &array2)); 8385 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8386 PetscCall(VecNormalize(nullv, NULL)); 8387 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 8388 PetscCall(VecDestroy(&nullv)); 8389 } 8390 } 8391 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8392 8393 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8394 if (pcbddc->coarse_ksp) { 8395 PetscBool ispreonly; 8396 8397 if (CoarseNullSpace) { 8398 PetscBool isnull; 8399 8400 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 8401 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 8402 /* TODO: add local nullspaces (if any) */ 8403 } 8404 /* setup coarse ksp */ 8405 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8406 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8407 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 8408 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8409 KSP check_ksp; 8410 KSPType check_ksp_type; 8411 PC check_pc; 8412 Vec check_vec, coarse_vec; 8413 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 8414 PetscInt its; 8415 PetscBool compute_eigs; 8416 PetscReal *eigs_r, *eigs_c; 8417 PetscInt neigs; 8418 const char *prefix; 8419 8420 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8421 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 8422 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 8423 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 8424 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 8425 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size)); 8426 /* prevent from setup unneeded object */ 8427 PetscCall(KSPGetPC(check_ksp, &check_pc)); 8428 PetscCall(PCSetType(check_pc, PCNONE)); 8429 if (ispreonly) { 8430 check_ksp_type = KSPPREONLY; 8431 compute_eigs = PETSC_FALSE; 8432 } else { 8433 check_ksp_type = KSPGMRES; 8434 compute_eigs = PETSC_TRUE; 8435 } 8436 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 8437 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 8438 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 8439 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 8440 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 8441 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 8442 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 8443 PetscCall(KSPSetFromOptions(check_ksp)); 8444 PetscCall(KSPSetUp(check_ksp)); 8445 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 8446 PetscCall(KSPSetPC(check_ksp, check_pc)); 8447 /* create random vec */ 8448 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 8449 PetscCall(VecSetRandom(check_vec, NULL)); 8450 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8451 /* solve coarse problem */ 8452 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 8453 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 8454 /* set eigenvalue estimation if preonly has not been requested */ 8455 if (compute_eigs) { 8456 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 8457 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 8458 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 8459 if (neigs) { 8460 lambda_max = eigs_r[neigs - 1]; 8461 lambda_min = eigs_r[0]; 8462 if (pcbddc->use_coarse_estimates) { 8463 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8464 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 8465 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 8466 } 8467 } 8468 } 8469 } 8470 8471 /* check coarse problem residual error */ 8472 if (pcbddc->dbg_flag) { 8473 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8474 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8475 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 8476 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 8477 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8478 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 8479 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 8480 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp), dbg_viewer)); 8481 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer)); 8482 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 8483 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 8484 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 8485 if (compute_eigs) { 8486 PetscReal lambda_max_s, lambda_min_s; 8487 KSPConvergedReason reason; 8488 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 8489 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 8490 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 8491 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 8492 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 8493 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 8494 } 8495 PetscCall(PetscViewerFlush(dbg_viewer)); 8496 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8497 } 8498 PetscCall(VecDestroy(&check_vec)); 8499 PetscCall(VecDestroy(&coarse_vec)); 8500 PetscCall(KSPDestroy(&check_ksp)); 8501 if (compute_eigs) { 8502 PetscCall(PetscFree(eigs_r)); 8503 PetscCall(PetscFree(eigs_c)); 8504 } 8505 } 8506 } 8507 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8508 /* print additional info */ 8509 if (pcbddc->dbg_flag) { 8510 /* waits until all processes reaches this point */ 8511 PetscCall(PetscBarrier((PetscObject)pc)); 8512 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 8513 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8514 } 8515 8516 /* free memory */ 8517 PetscCall(MatDestroy(&coarse_mat)); 8518 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8519 PetscFunctionReturn(0); 8520 } 8521 8522 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 8523 { 8524 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8525 PC_IS *pcis = (PC_IS *)pc->data; 8526 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 8527 IS subset, subset_mult, subset_n; 8528 PetscInt local_size, coarse_size = 0; 8529 PetscInt *local_primal_indices = NULL; 8530 const PetscInt *t_local_primal_indices; 8531 8532 PetscFunctionBegin; 8533 /* Compute global number of coarse dofs */ 8534 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 8535 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 8536 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 8537 PetscCall(ISDestroy(&subset_n)); 8538 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 8539 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 8540 PetscCall(ISDestroy(&subset)); 8541 PetscCall(ISDestroy(&subset_mult)); 8542 PetscCall(ISGetLocalSize(subset_n, &local_size)); 8543 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 8544 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 8545 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 8546 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 8547 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 8548 PetscCall(ISDestroy(&subset_n)); 8549 8550 /* check numbering */ 8551 if (pcbddc->dbg_flag) { 8552 PetscScalar coarsesum, *array, *array2; 8553 PetscInt i; 8554 PetscBool set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE; 8555 8556 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8557 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8558 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n")); 8559 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8560 /* counter */ 8561 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8562 PetscCall(VecSet(pcis->vec1_N, 1.0)); 8563 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8564 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8565 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8566 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8567 PetscCall(VecSet(pcis->vec1_N, 0.0)); 8568 for (i = 0; i < pcbddc->local_primal_size; i++) PetscCall(VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES)); 8569 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8570 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8571 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8572 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8573 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8574 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8575 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8576 PetscCall(VecGetArray(pcis->vec1_N, &array)); 8577 PetscCall(VecGetArray(pcis->vec2_N, &array2)); 8578 for (i = 0; i < pcis->n; i++) { 8579 if (array[i] != 0.0 && array[i] != array2[i]) { 8580 PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi; 8581 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8582 set_error = PETSC_TRUE; 8583 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi)); 8584 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh)); 8585 } 8586 } 8587 PetscCall(VecRestoreArray(pcis->vec2_N, &array2)); 8588 PetscCall(MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8589 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8590 for (i = 0; i < pcis->n; i++) { 8591 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]); 8592 } 8593 PetscCall(VecRestoreArray(pcis->vec1_N, &array)); 8594 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8595 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8596 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8597 PetscCall(VecSum(pcis->vec1_global, &coarsesum)); 8598 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum))); 8599 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8600 PetscInt *gidxs; 8601 8602 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &gidxs)); 8603 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs)); 8604 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n")); 8605 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8606 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank)); 8607 for (i = 0; i < pcbddc->local_primal_size; i++) { 8608 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i])); 8609 } 8610 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8611 PetscCall(PetscFree(gidxs)); 8612 } 8613 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8614 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8615 PetscCheck(!set_error_reduced, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "BDDC Numbering of coarse dofs failed"); 8616 } 8617 8618 /* get back data */ 8619 *coarse_size_n = coarse_size; 8620 *local_primal_indices_n = local_primal_indices; 8621 PetscFunctionReturn(0); 8622 } 8623 8624 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 8625 { 8626 IS localis_t; 8627 PetscInt i, lsize, *idxs, n; 8628 PetscScalar *vals; 8629 8630 PetscFunctionBegin; 8631 /* get indices in local ordering exploiting local to global map */ 8632 PetscCall(ISGetLocalSize(globalis, &lsize)); 8633 PetscCall(PetscMalloc1(lsize, &vals)); 8634 for (i = 0; i < lsize; i++) vals[i] = 1.0; 8635 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 8636 PetscCall(VecSet(gwork, 0.0)); 8637 PetscCall(VecSet(lwork, 0.0)); 8638 if (idxs) { /* multilevel guard */ 8639 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 8640 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 8641 } 8642 PetscCall(VecAssemblyBegin(gwork)); 8643 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 8644 PetscCall(PetscFree(vals)); 8645 PetscCall(VecAssemblyEnd(gwork)); 8646 /* now compute set in local ordering */ 8647 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8648 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8649 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 8650 PetscCall(VecGetSize(lwork, &n)); 8651 for (i = 0, lsize = 0; i < n; i++) { 8652 if (PetscRealPart(vals[i]) > 0.5) lsize++; 8653 } 8654 PetscCall(PetscMalloc1(lsize, &idxs)); 8655 for (i = 0, lsize = 0; i < n; i++) { 8656 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 8657 } 8658 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 8659 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 8660 *localis = localis_t; 8661 PetscFunctionReturn(0); 8662 } 8663 8664 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 8665 { 8666 PC_IS *pcis = (PC_IS *)pc->data; 8667 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8668 PC_IS *pcisf; 8669 PC_BDDC *pcbddcf; 8670 PC pcf; 8671 8672 PetscFunctionBegin; 8673 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 8674 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 8675 PetscCall(PCSetType(pcf, PCBDDC)); 8676 8677 pcisf = (PC_IS *)pcf->data; 8678 pcbddcf = (PC_BDDC *)pcf->data; 8679 8680 pcisf->is_B_local = pcis->is_B_local; 8681 pcisf->vec1_N = pcis->vec1_N; 8682 pcisf->BtoNmap = pcis->BtoNmap; 8683 pcisf->n = pcis->n; 8684 pcisf->n_B = pcis->n_B; 8685 8686 PetscCall(PetscFree(pcbddcf->mat_graph)); 8687 PetscCall(PetscFree(pcbddcf->sub_schurs)); 8688 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 8689 pcbddcf->sub_schurs = schurs; 8690 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 8691 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 8692 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 8693 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 8694 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 8695 pcbddcf->use_faces = PETSC_TRUE; 8696 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 8697 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 8698 pcbddcf->use_qr_single = (PetscBool)!constraints; 8699 pcbddcf->fake_change = PETSC_TRUE; 8700 pcbddcf->dbg_flag = pcbddc->dbg_flag; 8701 8702 PetscCall(PCBDDCAdaptiveSelection(pcf)); 8703 PetscCall(PCBDDCConstraintsSetUp(pcf)); 8704 8705 *change = pcbddcf->ConstraintMatrix; 8706 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 8707 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 8708 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 8709 8710 if (schurs) pcbddcf->sub_schurs = NULL; 8711 pcbddcf->ConstraintMatrix = NULL; 8712 pcbddcf->mat_graph = NULL; 8713 pcisf->is_B_local = NULL; 8714 pcisf->vec1_N = NULL; 8715 pcisf->BtoNmap = NULL; 8716 PetscCall(PCDestroy(&pcf)); 8717 PetscFunctionReturn(0); 8718 } 8719 8720 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8721 { 8722 PC_IS *pcis = (PC_IS *)pc->data; 8723 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8724 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 8725 Mat S_j; 8726 PetscInt *used_xadj, *used_adjncy; 8727 PetscBool free_used_adj; 8728 8729 PetscFunctionBegin; 8730 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8731 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8732 free_used_adj = PETSC_FALSE; 8733 if (pcbddc->sub_schurs_layers == -1) { 8734 used_xadj = NULL; 8735 used_adjncy = NULL; 8736 } else { 8737 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8738 used_xadj = pcbddc->mat_graph->xadj; 8739 used_adjncy = pcbddc->mat_graph->adjncy; 8740 } else if (pcbddc->computed_rowadj) { 8741 used_xadj = pcbddc->mat_graph->xadj; 8742 used_adjncy = pcbddc->mat_graph->adjncy; 8743 } else { 8744 PetscBool flg_row = PETSC_FALSE; 8745 const PetscInt *xadj, *adjncy; 8746 PetscInt nvtxs; 8747 8748 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8749 if (flg_row) { 8750 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 8751 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 8752 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 8753 free_used_adj = PETSC_TRUE; 8754 } else { 8755 pcbddc->sub_schurs_layers = -1; 8756 used_xadj = NULL; 8757 used_adjncy = NULL; 8758 } 8759 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8760 } 8761 } 8762 8763 /* setup sub_schurs data */ 8764 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 8765 if (!sub_schurs->schur_explicit) { 8766 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8767 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 8768 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 8769 } else { 8770 Mat change = NULL; 8771 Vec scaling = NULL; 8772 IS change_primal = NULL, iP; 8773 PetscInt benign_n; 8774 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8775 PetscBool need_change = PETSC_FALSE; 8776 PetscBool discrete_harmonic = PETSC_FALSE; 8777 8778 if (!pcbddc->use_vertices && reuse_solvers) { 8779 PetscInt n_vertices; 8780 8781 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 8782 reuse_solvers = (PetscBool)!n_vertices; 8783 } 8784 if (!pcbddc->benign_change_explicit) { 8785 benign_n = pcbddc->benign_n; 8786 } else { 8787 benign_n = 0; 8788 } 8789 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8790 We need a global reduction to avoid possible deadlocks. 8791 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8792 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8793 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8794 PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8795 need_change = (PetscBool)(!need_change); 8796 } 8797 /* If the user defines additional constraints, we import them here */ 8798 if (need_change) { 8799 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 8800 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 8801 } 8802 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8803 8804 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 8805 if (iP) { 8806 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 8807 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 8808 PetscOptionsEnd(); 8809 } 8810 if (discrete_harmonic) { 8811 Mat A; 8812 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 8813 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 8814 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 8815 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 8816 pcbddc->benign_zerodiag_subs, change, change_primal)); 8817 PetscCall(MatDestroy(&A)); 8818 } else { 8819 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 8820 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 8821 } 8822 PetscCall(MatDestroy(&change)); 8823 PetscCall(ISDestroy(&change_primal)); 8824 } 8825 PetscCall(MatDestroy(&S_j)); 8826 8827 /* free adjacency */ 8828 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 8829 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8830 PetscFunctionReturn(0); 8831 } 8832 8833 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8834 { 8835 PC_IS *pcis = (PC_IS *)pc->data; 8836 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8837 PCBDDCGraph graph; 8838 8839 PetscFunctionBegin; 8840 /* attach interface graph for determining subsets */ 8841 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8842 IS verticesIS, verticescomm; 8843 PetscInt vsize, *idxs; 8844 8845 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8846 PetscCall(ISGetSize(verticesIS, &vsize)); 8847 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 8848 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 8849 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 8850 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8851 PetscCall(PCBDDCGraphCreate(&graph)); 8852 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 8853 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 8854 PetscCall(ISDestroy(&verticescomm)); 8855 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 8856 } else { 8857 graph = pcbddc->mat_graph; 8858 } 8859 /* print some info */ 8860 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8861 IS vertices; 8862 PetscInt nv, nedges, nfaces; 8863 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 8864 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8865 PetscCall(ISGetSize(vertices, &nv)); 8866 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8867 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 8868 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 8869 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 8870 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 8871 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8872 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 8873 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8874 } 8875 8876 /* sub_schurs init */ 8877 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 8878 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 8879 8880 /* free graph struct */ 8881 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 8882 PetscFunctionReturn(0); 8883 } 8884 8885 PetscErrorCode PCBDDCCheckOperator(PC pc) 8886 { 8887 PC_IS *pcis = (PC_IS *)pc->data; 8888 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8889 8890 PetscFunctionBegin; 8891 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8892 IS zerodiag = NULL; 8893 Mat S_j, B0_B = NULL; 8894 Vec dummy_vec = NULL, vec_check_B, vec_scale_P; 8895 PetscScalar *p0_check, *array, *array2; 8896 PetscReal norm; 8897 PetscInt i; 8898 8899 /* B0 and B0_B */ 8900 if (zerodiag) { 8901 IS dummy; 8902 8903 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &dummy)); 8904 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 8905 PetscCall(MatCreateVecs(B0_B, NULL, &dummy_vec)); 8906 PetscCall(ISDestroy(&dummy)); 8907 } 8908 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8909 PetscCall(VecDuplicate(pcbddc->vec1_P, &vec_scale_P)); 8910 PetscCall(VecSet(pcbddc->vec1_P, 1.0)); 8911 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8912 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8913 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE)); 8914 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE)); 8915 PetscCall(VecReciprocal(vec_scale_P)); 8916 /* S_j */ 8917 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 8918 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 8919 8920 /* mimic vector in \widetilde{W}_\Gamma */ 8921 PetscCall(VecSetRandom(pcis->vec1_N, NULL)); 8922 /* continuous in primal space */ 8923 PetscCall(VecSetRandom(pcbddc->coarse_vec, NULL)); 8924 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8925 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8926 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 8927 PetscCall(PetscCalloc1(pcbddc->benign_n, &p0_check)); 8928 for (i = 0; i < pcbddc->benign_n; i++) p0_check[i] = array[pcbddc->local_primal_size - pcbddc->benign_n + i]; 8929 PetscCall(VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES)); 8930 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 8931 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8932 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8933 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD)); 8934 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD)); 8935 PetscCall(VecDuplicate(pcis->vec2_B, &vec_check_B)); 8936 PetscCall(VecCopy(pcis->vec2_B, vec_check_B)); 8937 8938 /* assemble rhs for coarse problem */ 8939 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8940 /* local with Schur */ 8941 PetscCall(MatMult(S_j, pcis->vec2_B, pcis->vec1_B)); 8942 if (zerodiag) { 8943 PetscCall(VecGetArray(dummy_vec, &array)); 8944 for (i = 0; i < pcbddc->benign_n; i++) array[i] = p0_check[i]; 8945 PetscCall(VecRestoreArray(dummy_vec, &array)); 8946 PetscCall(MatMultTransposeAdd(B0_B, dummy_vec, pcis->vec1_B, pcis->vec1_B)); 8947 } 8948 /* sum on primal nodes the local contributions */ 8949 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE)); 8950 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE)); 8951 PetscCall(VecGetArray(pcis->vec1_N, &array)); 8952 PetscCall(VecGetArray(pcbddc->vec1_P, &array2)); 8953 for (i = 0; i < pcbddc->local_primal_size; i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8954 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array2)); 8955 PetscCall(VecRestoreArray(pcis->vec1_N, &array)); 8956 PetscCall(VecSet(pcbddc->coarse_vec, 0.)); 8957 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8958 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8959 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8960 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8961 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 8962 /* scale primal nodes (BDDC sums contibutions) */ 8963 PetscCall(VecPointwiseMult(pcbddc->vec1_P, vec_scale_P, pcbddc->vec1_P)); 8964 PetscCall(VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES)); 8965 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 8966 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8967 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8968 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 8969 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 8970 /* global: \widetilde{B0}_B w_\Gamma */ 8971 if (zerodiag) { 8972 PetscCall(MatMult(B0_B, pcis->vec2_B, dummy_vec)); 8973 PetscCall(VecGetArray(dummy_vec, &array)); 8974 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = array[i]; 8975 PetscCall(VecRestoreArray(dummy_vec, &array)); 8976 } 8977 /* BDDC */ 8978 PetscCall(VecSet(pcis->vec1_D, 0.)); 8979 PetscCall(PCBDDCApplyInterfacePreconditioner(pc, PETSC_FALSE)); 8980 8981 PetscCall(VecCopy(pcis->vec1_B, pcis->vec2_B)); 8982 PetscCall(VecAXPY(pcis->vec1_B, -1.0, vec_check_B)); 8983 PetscCall(VecNorm(pcis->vec1_B, NORM_INFINITY, &norm)); 8984 PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC local error is %1.4e\n", PetscGlobalRank, (double)norm)); 8985 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n", PetscGlobalRank, i, (double)PetscAbsScalar(pcbddc->benign_p0[i] - p0_check[i]))); 8986 PetscCall(PetscFree(p0_check)); 8987 PetscCall(VecDestroy(&vec_scale_P)); 8988 PetscCall(VecDestroy(&vec_check_B)); 8989 PetscCall(VecDestroy(&dummy_vec)); 8990 PetscCall(MatDestroy(&S_j)); 8991 PetscCall(MatDestroy(&B0_B)); 8992 } 8993 PetscFunctionReturn(0); 8994 } 8995 8996 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8997 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8998 { 8999 Mat At; 9000 IS rows; 9001 PetscInt rst, ren; 9002 PetscLayout rmap; 9003 9004 PetscFunctionBegin; 9005 rst = ren = 0; 9006 if (ccomm != MPI_COMM_NULL) { 9007 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9008 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9009 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9010 PetscCall(PetscLayoutSetUp(rmap)); 9011 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9012 } 9013 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9014 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9015 PetscCall(ISDestroy(&rows)); 9016 9017 if (ccomm != MPI_COMM_NULL) { 9018 Mat_MPIAIJ *a, *b; 9019 IS from, to; 9020 Vec gvec; 9021 PetscInt lsize; 9022 9023 PetscCall(MatCreate(ccomm, B)); 9024 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9025 PetscCall(MatSetType(*B, MATAIJ)); 9026 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9027 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9028 a = (Mat_MPIAIJ *)At->data; 9029 b = (Mat_MPIAIJ *)(*B)->data; 9030 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9031 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9032 PetscCall(PetscObjectReference((PetscObject)a->A)); 9033 PetscCall(PetscObjectReference((PetscObject)a->B)); 9034 b->A = a->A; 9035 b->B = a->B; 9036 9037 b->donotstash = a->donotstash; 9038 b->roworiented = a->roworiented; 9039 b->rowindices = NULL; 9040 b->rowvalues = NULL; 9041 b->getrowactive = PETSC_FALSE; 9042 9043 (*B)->rmap = rmap; 9044 (*B)->factortype = A->factortype; 9045 (*B)->assembled = PETSC_TRUE; 9046 (*B)->insertmode = NOT_SET_VALUES; 9047 (*B)->preallocated = PETSC_TRUE; 9048 9049 if (a->colmap) { 9050 #if defined(PETSC_USE_CTABLE) 9051 PetscCall(PetscTableCreateCopy(a->colmap, &b->colmap)); 9052 #else 9053 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9054 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9055 #endif 9056 } else b->colmap = NULL; 9057 if (a->garray) { 9058 PetscInt len; 9059 len = a->B->cmap->n; 9060 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9061 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9062 } else b->garray = NULL; 9063 9064 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9065 b->lvec = a->lvec; 9066 9067 /* cannot use VecScatterCopy */ 9068 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9069 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9070 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9071 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9072 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9073 PetscCall(ISDestroy(&from)); 9074 PetscCall(ISDestroy(&to)); 9075 PetscCall(VecDestroy(&gvec)); 9076 } 9077 PetscCall(MatDestroy(&At)); 9078 PetscFunctionReturn(0); 9079 } 9080