1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork, *data, *U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM, bN, lwork, lierr, di = 1; 20 PetscInt ulw, i, nr, nc, n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A, &nr, &nc)); 27 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 32 PetscCall(PetscMalloc1(ulw, &uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr, nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n, &sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr * nr, &U)); 46 PetscCall(PetscBLASIntCast(nr, &bM)); 47 PetscCall(PetscBLASIntCast(nc, &bN)); 48 PetscCall(PetscBLASIntCast(ulw, &lwork)); 49 PetscCall(MatDenseGetArray(A, &data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 53 #else 54 PetscCall(PetscMalloc1(5 * n, &rwork2)); 55 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 60 PetscCall(MatDenseRestoreArray(A, &data)); 61 for (i = 0; i < n; i++) 62 if (sing[i] < PETSC_SMALL) break; 63 if (!rwork) PetscCall(PetscFree(sing)); 64 if (!work) PetscCall(PetscFree(uwork)); 65 /* create B */ 66 if (!range) { 67 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 68 PetscCall(MatDenseGetArray(*B, &data)); 69 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 70 } else { 71 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 72 PetscCall(MatDenseGetArray(*B, &data)); 73 PetscCall(PetscArraycpy(data, U, i * nr)); 74 } 75 PetscCall(MatDenseRestoreArray(*B, &data)); 76 PetscCall(PetscFree(U)); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 Mat GE, GEd; 89 PetscInt rsize, csize, esize; 90 PetscScalar *ptr; 91 92 PetscFunctionBegin; 93 PetscCall(ISGetSize(edge, &esize)); 94 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 95 PetscCall(ISGetSize(extrow, &rsize)); 96 PetscCall(ISGetSize(extcol, &csize)); 97 98 /* gradients */ 99 ptr = work + 5 * esize; 100 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 101 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 102 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 103 PetscCall(MatDestroy(&GE)); 104 105 /* constants */ 106 ptr += rsize * csize; 107 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 108 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 109 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 110 PetscCall(MatDestroy(&GE)); 111 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 112 PetscCall(MatDestroy(&GEd)); 113 114 if (corners) { 115 Mat GEc; 116 const PetscScalar *vals; 117 PetscScalar v; 118 119 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 120 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 121 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 122 /* v = PetscAbsScalar(vals[0]) */; 123 v = 1.; 124 cvals[0] = vals[0] / v; 125 cvals[1] = vals[1] / v; 126 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 127 PetscCall(MatScale(*GKins, 1. / v)); 128 #if defined(PRINT_GDET) 129 { 130 PetscViewer viewer; 131 char filename[256]; 132 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 133 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 134 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 135 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 136 PetscCall(MatView(GEc, viewer)); 137 PetscCall(PetscObjectSetName((PetscObject)(*GKins), "GK")); 138 PetscCall(MatView(*GKins, viewer)); 139 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 140 PetscCall(MatView(GEd, viewer)); 141 PetscCall(PetscViewerDestroy(&viewer)); 142 } 143 #endif 144 PetscCall(MatDestroy(&GEd)); 145 PetscCall(MatDestroy(&GEc)); 146 } 147 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 PetscErrorCode PCBDDCNedelecSupport(PC pc) 152 { 153 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 154 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 155 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 156 Vec tvec; 157 PetscSF sfv; 158 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 159 MPI_Comm comm; 160 IS lned, primals, allprimals, nedfieldlocal; 161 IS *eedges, *extrows, *extcols, *alleedges; 162 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 163 PetscScalar *vals, *work; 164 PetscReal *rwork; 165 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 166 PetscInt ne, nv, Lv, order, n, field; 167 PetscInt n_neigh, *neigh, *n_shared, **shared; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, singular, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 singular = PETSC_FALSE; 187 188 /* Command line customization */ 189 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 190 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 191 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL)); 192 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 193 /* print debug info TODO: to be removed */ 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 195 PetscOptionsEnd(); 196 197 /* Return if there are no edges in the decomposition and the problem is not singular */ 198 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 199 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 200 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 201 if (!singular) { 202 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 203 lrc[0] = PETSC_FALSE; 204 for (i = 0; i < n; i++) { 205 if (PetscRealPart(vals[i]) > 2.) { 206 lrc[0] = PETSC_TRUE; 207 break; 208 } 209 } 210 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 211 PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 212 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 215 /* Get Nedelec field */ 216 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 217 if (pcbddc->n_ISForDofsLocal && field >= 0) { 218 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 219 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 220 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 221 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 222 ne = n; 223 nedfieldlocal = NULL; 224 global = PETSC_TRUE; 225 } else if (field == PETSC_DECIDE) { 226 PetscInt rst, ren, *idx; 227 228 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 229 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 230 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 231 for (i = rst; i < ren; i++) { 232 PetscInt nc; 233 234 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 235 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 236 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 237 } 238 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 239 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 240 PetscCall(PetscMalloc1(n, &idx)); 241 for (i = 0, ne = 0; i < n; i++) 242 if (matis->sf_leafdata[i]) idx[ne++] = i; 243 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 244 } else { 245 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 246 } 247 248 /* Sanity checks */ 249 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 250 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 251 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 252 253 /* Just set primal dofs and return */ 254 if (setprimal) { 255 IS enedfieldlocal; 256 PetscInt *eidxs; 257 258 PetscCall(PetscMalloc1(ne, &eidxs)); 259 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 260 if (nedfieldlocal) { 261 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 262 for (i = 0, cum = 0; i < ne; i++) { 263 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 264 } 265 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 266 } else { 267 for (i = 0, cum = 0; i < ne; i++) { 268 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 269 } 270 } 271 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 272 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 273 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 274 PetscCall(PetscFree(eidxs)); 275 PetscCall(ISDestroy(&nedfieldlocal)); 276 PetscCall(ISDestroy(&enedfieldlocal)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 /* Compute some l2g maps */ 281 if (nedfieldlocal) { 282 IS is; 283 284 /* need to map from the local Nedelec field to local numbering */ 285 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 286 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 287 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 288 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 289 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 290 if (global) { 291 PetscCall(PetscObjectReference((PetscObject)al2g)); 292 el2g = al2g; 293 } else { 294 IS gis; 295 296 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 297 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 298 PetscCall(ISDestroy(&gis)); 299 } 300 PetscCall(ISDestroy(&is)); 301 } else { 302 /* restore default */ 303 pcbddc->nedfield = -1; 304 /* one ref for the destruction of al2g, one for el2g */ 305 PetscCall(PetscObjectReference((PetscObject)al2g)); 306 PetscCall(PetscObjectReference((PetscObject)al2g)); 307 el2g = al2g; 308 fl2g = NULL; 309 } 310 311 /* Start communication to drop connections for interior edges (for cc analysis only) */ 312 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 313 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 314 if (nedfieldlocal) { 315 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 316 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 317 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 318 } else { 319 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 320 } 321 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 322 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 323 324 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 325 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 326 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 327 if (global) { 328 PetscInt rst; 329 330 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 331 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 332 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 333 } 334 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 335 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 336 } else { 337 PetscInt *tbz; 338 339 PetscCall(PetscMalloc1(ne, &tbz)); 340 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 342 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 343 for (i = 0, cum = 0; i < ne; i++) 344 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 345 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 346 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 347 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 348 PetscCall(PetscFree(tbz)); 349 } 350 } else { /* we need the entire G to infer the nullspace */ 351 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 352 G = pcbddc->discretegradient; 353 } 354 355 /* Extract subdomain relevant rows of G */ 356 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 357 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 358 PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); 359 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 360 PetscCall(ISDestroy(&lned)); 361 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 362 PetscCall(MatDestroy(&lGall)); 363 PetscCall(MatISGetLocalMat(lGis, &lG)); 364 365 /* SF for nodal dofs communications */ 366 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 367 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 368 PetscCall(PetscObjectReference((PetscObject)vl2g)); 369 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 370 PetscCall(PetscSFCreate(comm, &sfv)); 371 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 372 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 373 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 374 i = singular ? 2 : 1; 375 PetscCall(PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots)); 376 377 /* Destroy temporary G created in MATIS format and modified G */ 378 PetscCall(PetscObjectReference((PetscObject)lG)); 379 PetscCall(MatDestroy(&lGis)); 380 PetscCall(MatDestroy(&G)); 381 382 if (print) { 383 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 384 PetscCall(MatView(lG, NULL)); 385 } 386 387 /* Save lG for values insertion in change of basis */ 388 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 389 390 /* Analyze the edge-nodes connections (duplicate lG) */ 391 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 392 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 393 PetscCall(PetscBTCreate(nv, &btv)); 394 PetscCall(PetscBTCreate(ne, &bte)); 395 PetscCall(PetscBTCreate(ne, &btb)); 396 PetscCall(PetscBTCreate(ne, &btbd)); 397 PetscCall(PetscBTCreate(nv, &btvcand)); 398 /* need to import the boundary specification to ensure the 399 proper detection of coarse edges' endpoints */ 400 if (pcbddc->DirichletBoundariesLocal) { 401 IS is; 402 403 if (fl2g) { 404 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 405 } else { 406 is = pcbddc->DirichletBoundariesLocal; 407 } 408 PetscCall(ISGetLocalSize(is, &cum)); 409 PetscCall(ISGetIndices(is, &idxs)); 410 for (i = 0; i < cum; i++) { 411 if (idxs[i] >= 0) { 412 PetscCall(PetscBTSet(btb, idxs[i])); 413 PetscCall(PetscBTSet(btbd, idxs[i])); 414 } 415 } 416 PetscCall(ISRestoreIndices(is, &idxs)); 417 if (fl2g) PetscCall(ISDestroy(&is)); 418 } 419 if (pcbddc->NeumannBoundariesLocal) { 420 IS is; 421 422 if (fl2g) { 423 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 424 } else { 425 is = pcbddc->NeumannBoundariesLocal; 426 } 427 PetscCall(ISGetLocalSize(is, &cum)); 428 PetscCall(ISGetIndices(is, &idxs)); 429 for (i = 0; i < cum; i++) { 430 if (idxs[i] >= 0) PetscCall(PetscBTSet(btb, idxs[i])); 431 } 432 PetscCall(ISRestoreIndices(is, &idxs)); 433 if (fl2g) PetscCall(ISDestroy(&is)); 434 } 435 436 /* Count neighs per dof */ 437 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs)); 438 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs)); 439 440 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 441 for proper detection of coarse edges' endpoints */ 442 PetscCall(PetscBTCreate(ne, &btee)); 443 for (i = 0; i < ne; i++) { 444 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 445 } 446 PetscCall(PetscMalloc1(ne, &marks)); 447 if (!conforming) { 448 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 449 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 450 } 451 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 452 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 453 cum = 0; 454 for (i = 0; i < ne; i++) { 455 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 456 if (!PetscBTLookup(btee, i)) { 457 marks[cum++] = i; 458 continue; 459 } 460 /* set badly connected edge dofs as primal */ 461 if (!conforming) { 462 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 463 marks[cum++] = i; 464 PetscCall(PetscBTSet(bte, i)); 465 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 466 } else { 467 /* every edge dofs should be connected through a certain number of nodal dofs 468 to other edge dofs belonging to coarse edges 469 - at most 2 endpoints 470 - order-1 interior nodal dofs 471 - no undefined nodal dofs (nconn < order) 472 */ 473 PetscInt ends = 0, ints = 0, undef = 0; 474 for (j = ii[i]; j < ii[i + 1]; j++) { 475 PetscInt v = jj[j], k; 476 PetscInt nconn = iit[v + 1] - iit[v]; 477 for (k = iit[v]; k < iit[v + 1]; k++) 478 if (!PetscBTLookup(btee, jjt[k])) nconn--; 479 if (nconn > order) ends++; 480 else if (nconn == order) ints++; 481 else undef++; 482 } 483 if (undef || ends > 2 || ints != order - 1) { 484 marks[cum++] = i; 485 PetscCall(PetscBTSet(bte, i)); 486 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 487 } 488 } 489 } 490 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 491 if (!order && ii[i + 1] != ii[i]) { 492 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 493 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 494 } 495 } 496 PetscCall(PetscBTDestroy(&btee)); 497 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 498 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 499 if (!conforming) { 500 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 501 PetscCall(MatDestroy(&lGt)); 502 } 503 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 504 505 /* identify splitpoints and corner candidates */ 506 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 507 if (print) { 508 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 509 PetscCall(MatView(lGe, NULL)); 510 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 511 PetscCall(MatView(lGt, NULL)); 512 } 513 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 514 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 515 for (i = 0; i < nv; i++) { 516 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 517 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 518 if (!order) { /* variable order */ 519 PetscReal vorder = 0.; 520 521 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 522 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 523 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 524 ord = 1; 525 } 526 PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord); 527 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 528 if (PetscBTLookup(btbd, jj[j])) { 529 bdir = PETSC_TRUE; 530 break; 531 } 532 if (vc != ecount[jj[j]]) { 533 sneighs = PETSC_FALSE; 534 } else { 535 PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]]; 536 for (k = 0; k < vc; k++) { 537 if (vn[k] != en[k]) { 538 sneighs = PETSC_FALSE; 539 break; 540 } 541 } 542 } 543 } 544 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 545 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 546 PetscCall(PetscBTSet(btv, i)); 547 } else if (test == ord) { 548 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 549 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 550 PetscCall(PetscBTSet(btv, i)); 551 } else { 552 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 553 PetscCall(PetscBTSet(btvcand, i)); 554 } 555 } 556 } 557 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 558 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 559 PetscCall(PetscBTDestroy(&btbd)); 560 561 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 562 if (order != 1) { 563 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 564 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 565 for (i = 0; i < nv; i++) { 566 if (PetscBTLookup(btvcand, i)) { 567 PetscBool found = PETSC_FALSE; 568 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 569 PetscInt k, e = jj[j]; 570 if (PetscBTLookup(bte, e)) continue; 571 for (k = iit[e]; k < iit[e + 1]; k++) { 572 PetscInt v = jjt[k]; 573 if (v != i && PetscBTLookup(btvcand, v)) { 574 found = PETSC_TRUE; 575 break; 576 } 577 } 578 } 579 if (!found) { 580 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 581 PetscCall(PetscBTClear(btvcand, i)); 582 } else { 583 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 584 } 585 } 586 } 587 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 588 } 589 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 590 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 591 PetscCall(MatDestroy(&lGe)); 592 593 /* Get the local G^T explicitly */ 594 PetscCall(MatDestroy(&lGt)); 595 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 596 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 597 598 /* Mark interior nodal dofs */ 599 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 600 PetscCall(PetscBTCreate(nv, &btvi)); 601 for (i = 1; i < n_neigh; i++) { 602 for (j = 0; j < n_shared[i]; j++) PetscCall(PetscBTSet(btvi, shared[i][j])); 603 } 604 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 605 606 /* communicate corners and splitpoints */ 607 PetscCall(PetscMalloc1(nv, &vmarks)); 608 PetscCall(PetscArrayzero(sfvleaves, nv)); 609 PetscCall(PetscArrayzero(sfvroots, Lv)); 610 for (i = 0; i < nv; i++) 611 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 612 613 if (print) { 614 IS tbz; 615 616 cum = 0; 617 for (i = 0; i < nv; i++) 618 if (sfvleaves[i]) vmarks[cum++] = i; 619 620 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 621 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 622 PetscCall(ISView(tbz, NULL)); 623 PetscCall(ISDestroy(&tbz)); 624 } 625 626 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 627 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 628 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 629 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 630 631 /* Zero rows of lGt corresponding to identified corners 632 and interior nodal dofs */ 633 cum = 0; 634 for (i = 0; i < nv; i++) { 635 if (sfvleaves[i]) { 636 vmarks[cum++] = i; 637 PetscCall(PetscBTSet(btv, i)); 638 } 639 if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 640 } 641 PetscCall(PetscBTDestroy(&btvi)); 642 if (print) { 643 IS tbz; 644 645 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 646 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 647 PetscCall(ISView(tbz, NULL)); 648 PetscCall(ISDestroy(&tbz)); 649 } 650 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 651 PetscCall(PetscFree(vmarks)); 652 PetscCall(PetscSFDestroy(&sfv)); 653 PetscCall(PetscFree2(sfvleaves, sfvroots)); 654 655 /* Recompute G */ 656 PetscCall(MatDestroy(&lG)); 657 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 658 if (print) { 659 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 660 PetscCall(MatView(lG, NULL)); 661 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 662 PetscCall(MatView(lGt, NULL)); 663 } 664 665 /* Get primal dofs (if any) */ 666 cum = 0; 667 for (i = 0; i < ne; i++) { 668 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 669 } 670 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 671 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 672 if (print) { 673 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 674 PetscCall(ISView(primals, NULL)); 675 } 676 PetscCall(PetscBTDestroy(&bte)); 677 /* TODO: what if the user passed in some of them ? */ 678 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 679 PetscCall(ISDestroy(&primals)); 680 681 /* Compute edge connectivity */ 682 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 683 684 /* Symbolic conn = lG*lGt */ 685 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 686 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 687 PetscCall(MatProductSetAlgorithm(conn, "default")); 688 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 689 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 690 PetscCall(MatProductSetFromOptions(conn)); 691 PetscCall(MatProductSymbolic(conn)); 692 693 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 694 if (fl2g) { 695 PetscBT btf; 696 PetscInt *iia, *jja, *iiu, *jju; 697 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 698 699 /* create CSR for all local dofs */ 700 PetscCall(PetscMalloc1(n + 1, &iia)); 701 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 702 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 703 iiu = pcbddc->mat_graph->xadj; 704 jju = pcbddc->mat_graph->adjncy; 705 } else if (pcbddc->use_local_adj) { 706 rest = PETSC_TRUE; 707 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 708 } else { 709 free = PETSC_TRUE; 710 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 711 iiu[0] = 0; 712 for (i = 0; i < n; i++) { 713 iiu[i + 1] = i + 1; 714 jju[i] = -1; 715 } 716 } 717 718 /* import sizes of CSR */ 719 iia[0] = 0; 720 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 721 722 /* overwrite entries corresponding to the Nedelec field */ 723 PetscCall(PetscBTCreate(n, &btf)); 724 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 725 for (i = 0; i < ne; i++) { 726 PetscCall(PetscBTSet(btf, idxs[i])); 727 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 728 } 729 730 /* iia in CSR */ 731 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 732 733 /* jja in CSR */ 734 PetscCall(PetscMalloc1(iia[n], &jja)); 735 for (i = 0; i < n; i++) 736 if (!PetscBTLookup(btf, i)) 737 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 738 739 /* map edge dofs connectivity */ 740 if (jj) { 741 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 742 for (i = 0; i < ne; i++) { 743 PetscInt e = idxs[i]; 744 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 745 } 746 } 747 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 748 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER)); 749 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 750 if (free) PetscCall(PetscFree2(iiu, jju)); 751 PetscCall(PetscBTDestroy(&btf)); 752 } else { 753 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER)); 754 } 755 756 /* Analyze interface for edge dofs */ 757 PetscCall(PCBDDCAnalyzeInterface(pc)); 758 pcbddc->mat_graph->twodim = PETSC_FALSE; 759 760 /* Get coarse edges in the edge space */ 761 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 762 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 763 764 if (fl2g) { 765 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 766 PetscCall(PetscMalloc1(nee, &eedges)); 767 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 768 } else { 769 eedges = alleedges; 770 primals = allprimals; 771 } 772 773 /* Mark fine edge dofs with their coarse edge id */ 774 PetscCall(PetscArrayzero(marks, ne)); 775 PetscCall(ISGetLocalSize(primals, &cum)); 776 PetscCall(ISGetIndices(primals, &idxs)); 777 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 778 PetscCall(ISRestoreIndices(primals, &idxs)); 779 if (print) { 780 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 781 PetscCall(ISView(primals, NULL)); 782 } 783 784 maxsize = 0; 785 for (i = 0; i < nee; i++) { 786 PetscInt size, mark = i + 1; 787 788 PetscCall(ISGetLocalSize(eedges[i], &size)); 789 PetscCall(ISGetIndices(eedges[i], &idxs)); 790 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 791 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 792 maxsize = PetscMax(maxsize, size); 793 } 794 795 /* Find coarse edge endpoints */ 796 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 797 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 798 for (i = 0; i < nee; i++) { 799 PetscInt mark = i + 1, size; 800 801 PetscCall(ISGetLocalSize(eedges[i], &size)); 802 if (!size && nedfieldlocal) continue; 803 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 804 PetscCall(ISGetIndices(eedges[i], &idxs)); 805 if (print) { 806 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 807 PetscCall(ISView(eedges[i], NULL)); 808 } 809 for (j = 0; j < size; j++) { 810 PetscInt k, ee = idxs[j]; 811 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 812 for (k = ii[ee]; k < ii[ee + 1]; k++) { 813 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 814 if (PetscBTLookup(btv, jj[k])) { 815 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 816 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 817 PetscInt k2; 818 PetscBool corner = PETSC_FALSE; 819 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 820 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 821 /* it's a corner if either is connected with an edge dof belonging to a different cc or 822 if the edge dof lie on the natural part of the boundary */ 823 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 824 corner = PETSC_TRUE; 825 break; 826 } 827 } 828 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 829 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 830 PetscCall(PetscBTSet(btv, jj[k])); 831 } else { 832 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 833 } 834 } 835 } 836 } 837 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 838 } 839 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 840 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 841 PetscCall(PetscBTDestroy(&btb)); 842 843 /* Reset marked primal dofs */ 844 PetscCall(ISGetLocalSize(primals, &cum)); 845 PetscCall(ISGetIndices(primals, &idxs)); 846 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 847 PetscCall(ISRestoreIndices(primals, &idxs)); 848 849 /* Now use the initial lG */ 850 PetscCall(MatDestroy(&lG)); 851 PetscCall(MatDestroy(&lGt)); 852 lG = lGinit; 853 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 854 855 /* Compute extended cols indices */ 856 PetscCall(PetscBTCreate(nv, &btvc)); 857 PetscCall(PetscBTCreate(nee, &bter)); 858 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 859 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 860 i *= maxsize; 861 PetscCall(PetscCalloc1(nee, &extcols)); 862 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 863 eerr = PETSC_FALSE; 864 for (i = 0; i < nee; i++) { 865 PetscInt size, found = 0; 866 867 cum = 0; 868 PetscCall(ISGetLocalSize(eedges[i], &size)); 869 if (!size && nedfieldlocal) continue; 870 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 871 PetscCall(ISGetIndices(eedges[i], &idxs)); 872 PetscCall(PetscBTMemzero(nv, btvc)); 873 for (j = 0; j < size; j++) { 874 PetscInt k, ee = idxs[j]; 875 for (k = ii[ee]; k < ii[ee + 1]; k++) { 876 PetscInt vv = jj[k]; 877 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 878 else if (!PetscBTLookupSet(btvc, vv)) found++; 879 } 880 } 881 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 882 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 883 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 884 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 885 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 886 /* it may happen that endpoints are not defined at this point 887 if it is the case, mark this edge for a second pass */ 888 if (cum != size - 1 || found != 2) { 889 PetscCall(PetscBTSet(bter, i)); 890 if (print) { 891 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 892 PetscCall(ISView(eedges[i], NULL)); 893 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 894 PetscCall(ISView(extcols[i], NULL)); 895 } 896 eerr = PETSC_TRUE; 897 } 898 } 899 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 900 PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 901 if (done) { 902 PetscInt *newprimals; 903 904 PetscCall(PetscMalloc1(ne, &newprimals)); 905 PetscCall(ISGetLocalSize(primals, &cum)); 906 PetscCall(ISGetIndices(primals, &idxs)); 907 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 908 PetscCall(ISRestoreIndices(primals, &idxs)); 909 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 910 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 911 for (i = 0; i < nee; i++) { 912 PetscBool has_candidates = PETSC_FALSE; 913 if (PetscBTLookup(bter, i)) { 914 PetscInt size, mark = i + 1; 915 916 PetscCall(ISGetLocalSize(eedges[i], &size)); 917 PetscCall(ISGetIndices(eedges[i], &idxs)); 918 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 919 for (j = 0; j < size; j++) { 920 PetscInt k, ee = idxs[j]; 921 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 922 for (k = ii[ee]; k < ii[ee + 1]; k++) { 923 /* set all candidates located on the edge as corners */ 924 if (PetscBTLookup(btvcand, jj[k])) { 925 PetscInt k2, vv = jj[k]; 926 has_candidates = PETSC_TRUE; 927 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 928 PetscCall(PetscBTSet(btv, vv)); 929 /* set all edge dofs connected to candidate as primals */ 930 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 931 if (marks[jjt[k2]] == mark) { 932 PetscInt k3, ee2 = jjt[k2]; 933 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 934 newprimals[cum++] = ee2; 935 /* finally set the new corners */ 936 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 937 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 938 PetscCall(PetscBTSet(btv, jj[k3])); 939 } 940 } 941 } 942 } else { 943 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 944 } 945 } 946 } 947 if (!has_candidates) { /* circular edge */ 948 PetscInt k, ee = idxs[0], *tmarks; 949 950 PetscCall(PetscCalloc1(ne, &tmarks)); 951 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 952 for (k = ii[ee]; k < ii[ee + 1]; k++) { 953 PetscInt k2; 954 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 955 PetscCall(PetscBTSet(btv, jj[k])); 956 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 957 } 958 for (j = 0; j < size; j++) { 959 if (tmarks[idxs[j]] > 1) { 960 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 961 newprimals[cum++] = idxs[j]; 962 } 963 } 964 PetscCall(PetscFree(tmarks)); 965 } 966 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 967 } 968 PetscCall(ISDestroy(&extcols[i])); 969 } 970 PetscCall(PetscFree(extcols)); 971 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 972 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 973 if (fl2g) { 974 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 975 PetscCall(ISDestroy(&primals)); 976 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 977 PetscCall(PetscFree(eedges)); 978 } 979 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 980 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 981 PetscCall(PetscFree(newprimals)); 982 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 983 PetscCall(ISDestroy(&primals)); 984 PetscCall(PCBDDCAnalyzeInterface(pc)); 985 pcbddc->mat_graph->twodim = PETSC_FALSE; 986 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 987 if (fl2g) { 988 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 989 PetscCall(PetscMalloc1(nee, &eedges)); 990 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 991 } else { 992 eedges = alleedges; 993 primals = allprimals; 994 } 995 PetscCall(PetscCalloc1(nee, &extcols)); 996 997 /* Mark again */ 998 PetscCall(PetscArrayzero(marks, ne)); 999 for (i = 0; i < nee; i++) { 1000 PetscInt size, mark = i + 1; 1001 1002 PetscCall(ISGetLocalSize(eedges[i], &size)); 1003 PetscCall(ISGetIndices(eedges[i], &idxs)); 1004 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1005 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1006 } 1007 if (print) { 1008 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1009 PetscCall(ISView(primals, NULL)); 1010 } 1011 1012 /* Recompute extended cols */ 1013 eerr = PETSC_FALSE; 1014 for (i = 0; i < nee; i++) { 1015 PetscInt size; 1016 1017 cum = 0; 1018 PetscCall(ISGetLocalSize(eedges[i], &size)); 1019 if (!size && nedfieldlocal) continue; 1020 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1021 PetscCall(ISGetIndices(eedges[i], &idxs)); 1022 for (j = 0; j < size; j++) { 1023 PetscInt k, ee = idxs[j]; 1024 for (k = ii[ee]; k < ii[ee + 1]; k++) 1025 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1026 } 1027 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1028 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1029 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1030 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1031 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1032 if (cum != size - 1) { 1033 if (print) { 1034 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1035 PetscCall(ISView(eedges[i], NULL)); 1036 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1037 PetscCall(ISView(extcols[i], NULL)); 1038 } 1039 eerr = PETSC_TRUE; 1040 } 1041 } 1042 } 1043 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1044 PetscCall(PetscFree2(extrow, gidxs)); 1045 PetscCall(PetscBTDestroy(&bter)); 1046 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1047 /* an error should not occur at this point */ 1048 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1049 1050 /* Check the number of endpoints */ 1051 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1052 PetscCall(PetscMalloc1(2 * nee, &corners)); 1053 PetscCall(PetscMalloc1(nee, &cedges)); 1054 for (i = 0; i < nee; i++) { 1055 PetscInt size, found = 0, gc[2]; 1056 1057 /* init with defaults */ 1058 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1059 PetscCall(ISGetLocalSize(eedges[i], &size)); 1060 if (!size && nedfieldlocal) continue; 1061 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1062 PetscCall(ISGetIndices(eedges[i], &idxs)); 1063 PetscCall(PetscBTMemzero(nv, btvc)); 1064 for (j = 0; j < size; j++) { 1065 PetscInt k, ee = idxs[j]; 1066 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1067 PetscInt vv = jj[k]; 1068 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1069 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more then two corners for edge %" PetscInt_FMT, i); 1070 corners[i * 2 + found++] = vv; 1071 } 1072 } 1073 } 1074 if (found != 2) { 1075 PetscInt e; 1076 if (fl2g) { 1077 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1078 } else { 1079 e = idxs[0]; 1080 } 1081 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1082 } 1083 1084 /* get primal dof index on this coarse edge */ 1085 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1086 if (gc[0] > gc[1]) { 1087 PetscInt swap = corners[2 * i]; 1088 corners[2 * i] = corners[2 * i + 1]; 1089 corners[2 * i + 1] = swap; 1090 } 1091 cedges[i] = idxs[size - 1]; 1092 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1093 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1094 } 1095 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1096 PetscCall(PetscBTDestroy(&btvc)); 1097 1098 if (PetscDefined(USE_DEBUG)) { 1099 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1100 not interfere with neighbouring coarse edges */ 1101 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1102 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1103 for (i = 0; i < nv; i++) { 1104 PetscInt emax = 0, eemax = 0; 1105 1106 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1107 PetscCall(PetscArrayzero(emarks, nee + 1)); 1108 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1109 for (j = 1; j < nee + 1; j++) { 1110 if (emax < emarks[j]) { 1111 emax = emarks[j]; 1112 eemax = j; 1113 } 1114 } 1115 /* not relevant for edges */ 1116 if (!eemax) continue; 1117 1118 for (j = ii[i]; j < ii[i + 1]; j++) { 1119 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1120 } 1121 } 1122 PetscCall(PetscFree(emarks)); 1123 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1124 } 1125 1126 /* Compute extended rows indices for edge blocks of the change of basis */ 1127 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1128 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1129 extmem *= maxsize; 1130 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1131 PetscCall(PetscMalloc1(nee, &extrows)); 1132 PetscCall(PetscCalloc1(nee, &extrowcum)); 1133 for (i = 0; i < nv; i++) { 1134 PetscInt mark = 0, size, start; 1135 1136 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1137 for (j = ii[i]; j < ii[i + 1]; j++) 1138 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1139 1140 /* not relevant */ 1141 if (!mark) continue; 1142 1143 /* import extended row */ 1144 mark--; 1145 start = mark * extmem + extrowcum[mark]; 1146 size = ii[i + 1] - ii[i]; 1147 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1148 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1149 extrowcum[mark] += size; 1150 } 1151 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1152 PetscCall(MatDestroy(&lGt)); 1153 PetscCall(PetscFree(marks)); 1154 1155 /* Compress extrows */ 1156 cum = 0; 1157 for (i = 0; i < nee; i++) { 1158 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1159 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1160 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1161 cum = PetscMax(cum, size); 1162 } 1163 PetscCall(PetscFree(extrowcum)); 1164 PetscCall(PetscBTDestroy(&btv)); 1165 PetscCall(PetscBTDestroy(&btvcand)); 1166 1167 /* Workspace for lapack inner calls and VecSetValues */ 1168 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1169 1170 /* Create change of basis matrix (preallocation can be improved) */ 1171 PetscCall(MatCreate(comm, &T)); 1172 PetscCall(MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N)); 1173 PetscCall(MatSetType(T, MATAIJ)); 1174 PetscCall(MatSeqAIJSetPreallocation(T, 10, NULL)); 1175 PetscCall(MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL)); 1176 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1177 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1178 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1179 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1180 1181 /* Defaults to identity */ 1182 PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL)); 1183 PetscCall(VecSet(tvec, 1.0)); 1184 PetscCall(MatDiagonalSet(T, tvec, INSERT_VALUES)); 1185 PetscCall(VecDestroy(&tvec)); 1186 1187 /* Create discrete gradient for the coarser level if needed */ 1188 PetscCall(MatDestroy(&pcbddc->nedcG)); 1189 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1190 if (pcbddc->current_level < pcbddc->max_levels) { 1191 ISLocalToGlobalMapping cel2g, cvl2g; 1192 IS wis, gwis; 1193 PetscInt cnv, cne; 1194 1195 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1196 if (fl2g) { 1197 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1198 } else { 1199 PetscCall(PetscObjectReference((PetscObject)wis)); 1200 pcbddc->nedclocal = wis; 1201 } 1202 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1203 PetscCall(ISDestroy(&wis)); 1204 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1205 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1206 PetscCall(ISDestroy(&wis)); 1207 PetscCall(ISDestroy(&gwis)); 1208 1209 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1210 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1211 PetscCall(ISDestroy(&wis)); 1212 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1213 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1214 PetscCall(ISDestroy(&wis)); 1215 PetscCall(ISDestroy(&gwis)); 1216 1217 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1218 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1219 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1220 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1221 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1222 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1223 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1224 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1225 } 1226 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1227 1228 #if defined(PRINT_GDET) 1229 inc = 0; 1230 lev = pcbddc->current_level; 1231 #endif 1232 1233 /* Insert values in the change of basis matrix */ 1234 for (i = 0; i < nee; i++) { 1235 Mat Gins = NULL, GKins = NULL; 1236 IS cornersis = NULL; 1237 PetscScalar cvals[2]; 1238 1239 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1240 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1241 if (Gins && GKins) { 1242 const PetscScalar *data; 1243 const PetscInt *rows, *cols; 1244 PetscInt nrh, nch, nrc, ncc; 1245 1246 PetscCall(ISGetIndices(eedges[i], &cols)); 1247 /* H1 */ 1248 PetscCall(ISGetIndices(extrows[i], &rows)); 1249 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1250 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1251 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1252 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1253 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1254 /* complement */ 1255 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1256 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1257 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1258 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1259 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1260 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1261 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1262 1263 /* coarse discrete gradient */ 1264 if (pcbddc->nedcG) { 1265 PetscInt cols[2]; 1266 1267 cols[0] = 2 * i; 1268 cols[1] = 2 * i + 1; 1269 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1270 } 1271 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1272 } 1273 PetscCall(ISDestroy(&extrows[i])); 1274 PetscCall(ISDestroy(&extcols[i])); 1275 PetscCall(ISDestroy(&cornersis)); 1276 PetscCall(MatDestroy(&Gins)); 1277 PetscCall(MatDestroy(&GKins)); 1278 } 1279 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1280 1281 /* Start assembling */ 1282 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1283 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1284 1285 /* Free */ 1286 if (fl2g) { 1287 PetscCall(ISDestroy(&primals)); 1288 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1289 PetscCall(PetscFree(eedges)); 1290 } 1291 1292 /* hack mat_graph with primal dofs on the coarse edges */ 1293 { 1294 PCBDDCGraph graph = pcbddc->mat_graph; 1295 PetscInt *oqueue = graph->queue; 1296 PetscInt *ocptr = graph->cptr; 1297 PetscInt ncc, *idxs; 1298 1299 /* find first primal edge */ 1300 if (pcbddc->nedclocal) { 1301 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1302 } else { 1303 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1304 idxs = cedges; 1305 } 1306 cum = 0; 1307 while (cum < nee && cedges[cum] < 0) cum++; 1308 1309 /* adapt connected components */ 1310 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1311 graph->cptr[0] = 0; 1312 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1313 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1314 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1315 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1316 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1317 ncc++; 1318 lc--; 1319 cum++; 1320 while (cum < nee && cedges[cum] < 0) cum++; 1321 } 1322 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1323 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1324 ncc++; 1325 } 1326 graph->ncc = ncc; 1327 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1328 PetscCall(PetscFree2(ocptr, oqueue)); 1329 } 1330 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1331 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1332 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1333 PetscCall(MatDestroy(&conn)); 1334 1335 PetscCall(ISDestroy(&nedfieldlocal)); 1336 PetscCall(PetscFree(extrow)); 1337 PetscCall(PetscFree2(work, rwork)); 1338 PetscCall(PetscFree(corners)); 1339 PetscCall(PetscFree(cedges)); 1340 PetscCall(PetscFree(extrows)); 1341 PetscCall(PetscFree(extcols)); 1342 PetscCall(MatDestroy(&lG)); 1343 1344 /* Complete assembling */ 1345 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1346 if (pcbddc->nedcG) { 1347 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1348 #if 0 1349 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1350 PetscCall(MatView(pcbddc->nedcG,NULL)); 1351 #endif 1352 } 1353 1354 /* set change of basis */ 1355 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, singular)); 1356 PetscCall(MatDestroy(&T)); 1357 1358 PetscFunctionReturn(PETSC_SUCCESS); 1359 } 1360 1361 /* the near-null space of BDDC carries information on quadrature weights, 1362 and these can be collinear -> so cheat with MatNullSpaceCreate 1363 and create a suitable set of basis vectors first */ 1364 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1365 { 1366 PetscInt i; 1367 1368 PetscFunctionBegin; 1369 for (i = 0; i < nvecs; i++) { 1370 PetscInt first, last; 1371 1372 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1373 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1374 if (i >= first && i < last) { 1375 PetscScalar *data; 1376 PetscCall(VecGetArray(quad_vecs[i], &data)); 1377 if (!has_const) { 1378 data[i - first] = 1.; 1379 } else { 1380 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1381 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1382 } 1383 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1384 } 1385 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1386 } 1387 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1388 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1389 PetscInt first, last; 1390 PetscCall(VecLockReadPop(quad_vecs[i])); 1391 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1392 if (i >= first && i < last) { 1393 PetscScalar *data; 1394 PetscCall(VecGetArray(quad_vecs[i], &data)); 1395 if (!has_const) { 1396 data[i - first] = 0.; 1397 } else { 1398 data[2 * i - first] = 0.; 1399 data[2 * i - first + 1] = 0.; 1400 } 1401 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1402 } 1403 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1404 PetscCall(VecLockReadPush(quad_vecs[i])); 1405 } 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1410 { 1411 Mat loc_divudotp; 1412 Vec p, v, vins, quad_vec, *quad_vecs; 1413 ISLocalToGlobalMapping map; 1414 PetscScalar *vals; 1415 const PetscScalar *array; 1416 PetscInt i, maxneighs = 0, maxsize, *gidxs; 1417 PetscInt n_neigh, *neigh, *n_shared, **shared; 1418 PetscMPIInt rank; 1419 1420 PetscFunctionBegin; 1421 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1422 for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs); 1423 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A))); 1424 if (!maxneighs) { 1425 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1426 *nnsp = NULL; 1427 PetscFunctionReturn(PETSC_SUCCESS); 1428 } 1429 maxsize = 0; 1430 for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize); 1431 PetscCall(PetscMalloc2(maxsize, &gidxs, maxsize, &vals)); 1432 /* create vectors to hold quadrature weights */ 1433 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1434 if (!transpose) { 1435 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1436 } else { 1437 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1438 } 1439 PetscCall(VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs)); 1440 PetscCall(VecDestroy(&quad_vec)); 1441 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp)); 1442 for (i = 0; i < maxneighs; i++) PetscCall(VecLockReadPop(quad_vecs[i])); 1443 1444 /* compute local quad vec */ 1445 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1446 if (!transpose) { 1447 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1448 } else { 1449 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1450 } 1451 PetscCall(VecSet(p, 1.)); 1452 if (!transpose) { 1453 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1454 } else { 1455 PetscCall(MatMult(loc_divudotp, p, v)); 1456 } 1457 if (vl2l) { 1458 Mat lA; 1459 VecScatter sc; 1460 1461 PetscCall(MatISGetLocalMat(A, &lA)); 1462 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1463 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1464 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1466 PetscCall(VecScatterDestroy(&sc)); 1467 } else { 1468 vins = v; 1469 } 1470 PetscCall(VecGetArrayRead(vins, &array)); 1471 PetscCall(VecDestroy(&p)); 1472 1473 /* insert in global quadrature vecs */ 1474 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1475 for (i = 1; i < n_neigh; i++) { 1476 const PetscInt *idxs; 1477 PetscInt idx, nn, j; 1478 1479 idxs = shared[i]; 1480 nn = n_shared[i]; 1481 for (j = 0; j < nn; j++) vals[j] = array[idxs[j]]; 1482 PetscCall(PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx)); 1483 idx = -(idx + 1); 1484 PetscCheck(idx >= 0 && idx < maxneighs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")", idx, maxneighs); 1485 PetscCall(ISLocalToGlobalMappingApply(map, nn, idxs, gidxs)); 1486 PetscCall(VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES)); 1487 } 1488 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1489 PetscCall(VecRestoreArrayRead(vins, &array)); 1490 if (vl2l) PetscCall(VecDestroy(&vins)); 1491 PetscCall(VecDestroy(&v)); 1492 PetscCall(PetscFree2(gidxs, vals)); 1493 1494 /* assemble near null space */ 1495 for (i = 0; i < maxneighs; i++) PetscCall(VecAssemblyBegin(quad_vecs[i])); 1496 for (i = 0; i < maxneighs; i++) { 1497 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1498 PetscCall(VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view")); 1499 PetscCall(VecLockReadPush(quad_vecs[i])); 1500 } 1501 PetscCall(VecDestroyVecs(maxneighs, &quad_vecs)); 1502 PetscFunctionReturn(PETSC_SUCCESS); 1503 } 1504 1505 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1506 { 1507 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1508 1509 PetscFunctionBegin; 1510 if (primalv) { 1511 if (pcbddc->user_primal_vertices_local) { 1512 IS list[2], newp; 1513 1514 list[0] = primalv; 1515 list[1] = pcbddc->user_primal_vertices_local; 1516 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1517 PetscCall(ISSortRemoveDups(newp)); 1518 PetscCall(ISDestroy(&list[1])); 1519 pcbddc->user_primal_vertices_local = newp; 1520 } else { 1521 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1522 } 1523 } 1524 PetscFunctionReturn(PETSC_SUCCESS); 1525 } 1526 1527 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1528 { 1529 PetscInt f, *comp = (PetscInt *)ctx; 1530 1531 PetscFunctionBegin; 1532 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1533 PetscFunctionReturn(PETSC_SUCCESS); 1534 } 1535 1536 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1537 { 1538 Vec local, global; 1539 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1540 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1541 PetscBool monolithic = PETSC_FALSE; 1542 1543 PetscFunctionBegin; 1544 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1545 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1546 PetscOptionsEnd(); 1547 /* need to convert from global to local topology information and remove references to information in global ordering */ 1548 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1549 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1550 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1551 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1552 if (monolithic) { /* just get block size to properly compute vertices */ 1553 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1554 goto boundary; 1555 } 1556 1557 if (pcbddc->user_provided_isfordofs) { 1558 if (pcbddc->n_ISForDofs) { 1559 PetscInt i; 1560 1561 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1562 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1563 PetscInt bs; 1564 1565 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1566 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1567 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1568 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1569 } 1570 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1571 pcbddc->n_ISForDofs = 0; 1572 PetscCall(PetscFree(pcbddc->ISForDofs)); 1573 } 1574 } else { 1575 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1576 DM dm; 1577 1578 PetscCall(MatGetDM(pc->pmat, &dm)); 1579 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1580 if (dm) { 1581 IS *fields; 1582 PetscInt nf, i; 1583 1584 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1585 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1586 for (i = 0; i < nf; i++) { 1587 PetscInt bs; 1588 1589 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1590 PetscCall(ISGetBlockSize(fields[i], &bs)); 1591 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1592 PetscCall(ISDestroy(&fields[i])); 1593 } 1594 PetscCall(PetscFree(fields)); 1595 pcbddc->n_ISForDofsLocal = nf; 1596 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1597 PetscContainer c; 1598 1599 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1600 if (c) { 1601 MatISLocalFields lf; 1602 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1603 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1604 } else { /* fallback, create the default fields if bs > 1 */ 1605 PetscInt i, n = matis->A->rmap->n; 1606 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1607 if (i > 1) { 1608 pcbddc->n_ISForDofsLocal = i; 1609 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1610 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1611 } 1612 } 1613 } 1614 } else { 1615 PetscInt i; 1616 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1617 } 1618 } 1619 1620 boundary: 1621 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1622 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1623 } else if (pcbddc->DirichletBoundariesLocal) { 1624 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1625 } 1626 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1627 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1628 } else if (pcbddc->NeumannBoundariesLocal) { 1629 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1630 } 1631 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1632 PetscCall(VecDestroy(&global)); 1633 PetscCall(VecDestroy(&local)); 1634 /* detect local disconnected subdomains if requested (use matis->A) */ 1635 if (pcbddc->detect_disconnected) { 1636 IS primalv = NULL; 1637 PetscInt i; 1638 PetscBool filter = pcbddc->detect_disconnected_filter; 1639 1640 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1641 PetscCall(PetscFree(pcbddc->local_subs)); 1642 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1643 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1644 PetscCall(ISDestroy(&primalv)); 1645 } 1646 /* early stage corner detection */ 1647 { 1648 DM dm; 1649 1650 PetscCall(MatGetDM(pc->pmat, &dm)); 1651 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1652 if (dm) { 1653 PetscBool isda; 1654 1655 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1656 if (isda) { 1657 ISLocalToGlobalMapping l2l; 1658 IS corners; 1659 Mat lA; 1660 PetscBool gl, lo; 1661 1662 { 1663 Vec cvec; 1664 const PetscScalar *coords; 1665 PetscInt dof, n, cdim; 1666 PetscBool memc = PETSC_TRUE; 1667 1668 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1669 PetscCall(DMGetCoordinates(dm, &cvec)); 1670 PetscCall(VecGetLocalSize(cvec, &n)); 1671 PetscCall(VecGetBlockSize(cvec, &cdim)); 1672 n /= cdim; 1673 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1674 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1675 PetscCall(VecGetArrayRead(cvec, &coords)); 1676 #if defined(PETSC_USE_COMPLEX) 1677 memc = PETSC_FALSE; 1678 #endif 1679 if (dof != 1) memc = PETSC_FALSE; 1680 if (memc) { 1681 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1682 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1683 PetscReal *bcoords = pcbddc->mat_graph->coords; 1684 PetscInt i, b, d; 1685 1686 for (i = 0; i < n; i++) { 1687 for (b = 0; b < dof; b++) { 1688 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1689 } 1690 } 1691 } 1692 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1693 pcbddc->mat_graph->cdim = cdim; 1694 pcbddc->mat_graph->cnloc = dof * n; 1695 pcbddc->mat_graph->cloc = PETSC_FALSE; 1696 } 1697 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1698 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1699 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1700 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1701 lo = (PetscBool)(l2l && corners); 1702 PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1703 if (gl) { /* From PETSc's DMDA */ 1704 const PetscInt *idx; 1705 PetscInt dof, bs, *idxout, n; 1706 1707 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1708 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1709 PetscCall(ISGetLocalSize(corners, &n)); 1710 PetscCall(ISGetIndices(corners, &idx)); 1711 if (bs == dof) { 1712 PetscCall(PetscMalloc1(n, &idxout)); 1713 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1714 } else { /* the original DMDA local-to-local map have been modified */ 1715 PetscInt i, d; 1716 1717 PetscCall(PetscMalloc1(dof * n, &idxout)); 1718 for (i = 0; i < n; i++) 1719 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1720 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1721 1722 bs = 1; 1723 n *= dof; 1724 } 1725 PetscCall(ISRestoreIndices(corners, &idx)); 1726 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1727 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1728 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1729 PetscCall(ISDestroy(&corners)); 1730 pcbddc->corner_selected = PETSC_TRUE; 1731 pcbddc->corner_selection = PETSC_TRUE; 1732 } 1733 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1734 } 1735 } 1736 } 1737 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1738 DM dm; 1739 1740 PetscCall(MatGetDM(pc->pmat, &dm)); 1741 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1742 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1743 Vec vcoords; 1744 PetscSection section; 1745 PetscReal *coords; 1746 PetscInt d, cdim, nl, nf, **ctxs; 1747 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1748 /* debug coordinates */ 1749 PetscViewer viewer; 1750 PetscBool flg; 1751 PetscViewerFormat format; 1752 const char *prefix; 1753 1754 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1755 PetscCall(DMGetLocalSection(dm, §ion)); 1756 PetscCall(PetscSectionGetNumFields(section, &nf)); 1757 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1758 PetscCall(VecGetLocalSize(vcoords, &nl)); 1759 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1760 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1761 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1762 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1763 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1764 1765 /* debug coordinates */ 1766 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1767 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1768 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1769 for (d = 0; d < cdim; d++) { 1770 PetscInt i; 1771 const PetscScalar *v; 1772 char name[16]; 1773 1774 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1775 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d)); 1776 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1777 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1778 if (flg) PetscCall(VecView(vcoords, viewer)); 1779 PetscCall(VecGetArrayRead(vcoords, &v)); 1780 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1781 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1782 } 1783 PetscCall(VecDestroy(&vcoords)); 1784 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1785 PetscCall(PetscFree(coords)); 1786 PetscCall(PetscFree(ctxs[0])); 1787 PetscCall(PetscFree2(funcs, ctxs)); 1788 if (flg) { 1789 PetscCall(PetscViewerPopFormat(viewer)); 1790 PetscCall(PetscViewerDestroy(&viewer)); 1791 } 1792 } 1793 } 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1798 { 1799 Mat_IS *matis = (Mat_IS *)(pc->pmat->data); 1800 IS nis; 1801 const PetscInt *idxs; 1802 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1803 1804 PetscFunctionBegin; 1805 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1806 if (mop == MPI_LAND) { 1807 /* init rootdata with true */ 1808 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1809 } else { 1810 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1811 } 1812 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1813 PetscCall(ISGetLocalSize(*is, &nd)); 1814 PetscCall(ISGetIndices(*is, &idxs)); 1815 for (i = 0; i < nd; i++) 1816 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1817 PetscCall(ISRestoreIndices(*is, &idxs)); 1818 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1819 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1820 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1821 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1822 if (mop == MPI_LAND) { 1823 PetscCall(PetscMalloc1(nd, &nidxs)); 1824 } else { 1825 PetscCall(PetscMalloc1(n, &nidxs)); 1826 } 1827 for (i = 0, nnd = 0; i < n; i++) 1828 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 1829 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 1830 PetscCall(ISDestroy(is)); 1831 *is = nis; 1832 PetscFunctionReturn(PETSC_SUCCESS); 1833 } 1834 1835 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 1836 { 1837 PC_IS *pcis = (PC_IS *)(pc->data); 1838 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 1839 1840 PetscFunctionBegin; 1841 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 1842 if (pcbddc->ChangeOfBasisMatrix) { 1843 Vec swap; 1844 1845 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 1846 swap = pcbddc->work_change; 1847 pcbddc->work_change = r; 1848 r = swap; 1849 } 1850 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1851 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1852 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1853 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 1854 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1855 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 1856 PetscCall(VecSet(z, 0.)); 1857 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1858 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1859 if (pcbddc->ChangeOfBasisMatrix) { 1860 pcbddc->work_change = r; 1861 PetscCall(VecCopy(z, pcbddc->work_change)); 1862 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 1863 } 1864 PetscFunctionReturn(PETSC_SUCCESS); 1865 } 1866 1867 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1868 { 1869 PCBDDCBenignMatMult_ctx ctx; 1870 PetscBool apply_right, apply_left, reset_x; 1871 1872 PetscFunctionBegin; 1873 PetscCall(MatShellGetContext(A, &ctx)); 1874 if (transpose) { 1875 apply_right = ctx->apply_left; 1876 apply_left = ctx->apply_right; 1877 } else { 1878 apply_right = ctx->apply_right; 1879 apply_left = ctx->apply_left; 1880 } 1881 reset_x = PETSC_FALSE; 1882 if (apply_right) { 1883 const PetscScalar *ax; 1884 PetscInt nl, i; 1885 1886 PetscCall(VecGetLocalSize(x, &nl)); 1887 PetscCall(VecGetArrayRead(x, &ax)); 1888 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 1889 PetscCall(VecRestoreArrayRead(x, &ax)); 1890 for (i = 0; i < ctx->benign_n; i++) { 1891 PetscScalar sum, val; 1892 const PetscInt *idxs; 1893 PetscInt nz, j; 1894 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1895 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1896 sum = 0.; 1897 if (ctx->apply_p0) { 1898 val = ctx->work[idxs[nz - 1]]; 1899 for (j = 0; j < nz - 1; j++) { 1900 sum += ctx->work[idxs[j]]; 1901 ctx->work[idxs[j]] += val; 1902 } 1903 } else { 1904 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 1905 } 1906 ctx->work[idxs[nz - 1]] -= sum; 1907 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1908 } 1909 PetscCall(VecPlaceArray(x, ctx->work)); 1910 reset_x = PETSC_TRUE; 1911 } 1912 if (transpose) { 1913 PetscCall(MatMultTranspose(ctx->A, x, y)); 1914 } else { 1915 PetscCall(MatMult(ctx->A, x, y)); 1916 } 1917 if (reset_x) PetscCall(VecResetArray(x)); 1918 if (apply_left) { 1919 PetscScalar *ay; 1920 PetscInt i; 1921 1922 PetscCall(VecGetArray(y, &ay)); 1923 for (i = 0; i < ctx->benign_n; i++) { 1924 PetscScalar sum, val; 1925 const PetscInt *idxs; 1926 PetscInt nz, j; 1927 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1928 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1929 val = -ay[idxs[nz - 1]]; 1930 if (ctx->apply_p0) { 1931 sum = 0.; 1932 for (j = 0; j < nz - 1; j++) { 1933 sum += ay[idxs[j]]; 1934 ay[idxs[j]] += val; 1935 } 1936 ay[idxs[nz - 1]] += sum; 1937 } else { 1938 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 1939 ay[idxs[nz - 1]] = 0.; 1940 } 1941 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1942 } 1943 PetscCall(VecRestoreArray(y, &ay)); 1944 } 1945 PetscFunctionReturn(PETSC_SUCCESS); 1946 } 1947 1948 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1949 { 1950 PetscFunctionBegin; 1951 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 1952 PetscFunctionReturn(PETSC_SUCCESS); 1953 } 1954 1955 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1956 { 1957 PetscFunctionBegin; 1958 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 1959 PetscFunctionReturn(PETSC_SUCCESS); 1960 } 1961 1962 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1963 { 1964 PC_IS *pcis = (PC_IS *)pc->data; 1965 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1966 PCBDDCBenignMatMult_ctx ctx; 1967 1968 PetscFunctionBegin; 1969 if (!restore) { 1970 Mat A_IB, A_BI; 1971 PetscScalar *work; 1972 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1973 1974 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 1975 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 1976 PetscCall(PetscMalloc1(pcis->n, &work)); 1977 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 1978 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 1979 PetscCall(MatSetType(A_IB, MATSHELL)); 1980 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 1981 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 1982 PetscCall(PetscNew(&ctx)); 1983 PetscCall(MatShellSetContext(A_IB, ctx)); 1984 ctx->apply_left = PETSC_TRUE; 1985 ctx->apply_right = PETSC_FALSE; 1986 ctx->apply_p0 = PETSC_FALSE; 1987 ctx->benign_n = pcbddc->benign_n; 1988 if (reuse) { 1989 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1990 ctx->free = PETSC_FALSE; 1991 } else { /* TODO: could be optimized for successive solves */ 1992 ISLocalToGlobalMapping N_to_D; 1993 PetscInt i; 1994 1995 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 1996 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 1997 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 1998 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 1999 ctx->free = PETSC_TRUE; 2000 } 2001 ctx->A = pcis->A_IB; 2002 ctx->work = work; 2003 PetscCall(MatSetUp(A_IB)); 2004 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2005 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2006 pcis->A_IB = A_IB; 2007 2008 /* A_BI as A_IB^T */ 2009 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2010 pcbddc->benign_original_mat = pcis->A_BI; 2011 pcis->A_BI = A_BI; 2012 } else { 2013 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2014 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2015 PetscCall(MatDestroy(&pcis->A_IB)); 2016 pcis->A_IB = ctx->A; 2017 ctx->A = NULL; 2018 PetscCall(MatDestroy(&pcis->A_BI)); 2019 pcis->A_BI = pcbddc->benign_original_mat; 2020 pcbddc->benign_original_mat = NULL; 2021 if (ctx->free) { 2022 PetscInt i; 2023 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2024 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2025 } 2026 PetscCall(PetscFree(ctx->work)); 2027 PetscCall(PetscFree(ctx)); 2028 } 2029 PetscFunctionReturn(PETSC_SUCCESS); 2030 } 2031 2032 /* used just in bddc debug mode */ 2033 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2034 { 2035 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2036 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2037 Mat An; 2038 2039 PetscFunctionBegin; 2040 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2041 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2042 if (is1) { 2043 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2044 PetscCall(MatDestroy(&An)); 2045 } else { 2046 *B = An; 2047 } 2048 PetscFunctionReturn(PETSC_SUCCESS); 2049 } 2050 2051 /* TODO: add reuse flag */ 2052 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2053 { 2054 Mat Bt; 2055 PetscScalar *a, *bdata; 2056 const PetscInt *ii, *ij; 2057 PetscInt m, n, i, nnz, *bii, *bij; 2058 PetscBool flg_row; 2059 2060 PetscFunctionBegin; 2061 PetscCall(MatGetSize(A, &n, &m)); 2062 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2063 PetscCall(MatSeqAIJGetArray(A, &a)); 2064 nnz = n; 2065 for (i = 0; i < ii[n]; i++) { 2066 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2067 } 2068 PetscCall(PetscMalloc1(n + 1, &bii)); 2069 PetscCall(PetscMalloc1(nnz, &bij)); 2070 PetscCall(PetscMalloc1(nnz, &bdata)); 2071 nnz = 0; 2072 bii[0] = 0; 2073 for (i = 0; i < n; i++) { 2074 PetscInt j; 2075 for (j = ii[i]; j < ii[i + 1]; j++) { 2076 PetscScalar entry = a[j]; 2077 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2078 bij[nnz] = ij[j]; 2079 bdata[nnz] = entry; 2080 nnz++; 2081 } 2082 } 2083 bii[i + 1] = nnz; 2084 } 2085 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2086 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2087 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2088 { 2089 Mat_SeqAIJ *b = (Mat_SeqAIJ *)(Bt->data); 2090 b->free_a = PETSC_TRUE; 2091 b->free_ij = PETSC_TRUE; 2092 } 2093 if (*B == A) PetscCall(MatDestroy(&A)); 2094 *B = Bt; 2095 PetscFunctionReturn(PETSC_SUCCESS); 2096 } 2097 2098 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2099 { 2100 Mat B = NULL; 2101 DM dm; 2102 IS is_dummy, *cc_n; 2103 ISLocalToGlobalMapping l2gmap_dummy; 2104 PCBDDCGraph graph; 2105 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2106 PetscInt i, n; 2107 PetscInt *xadj, *adjncy; 2108 PetscBool isplex = PETSC_FALSE; 2109 2110 PetscFunctionBegin; 2111 if (ncc) *ncc = 0; 2112 if (cc) *cc = NULL; 2113 if (primalv) *primalv = NULL; 2114 PetscCall(PCBDDCGraphCreate(&graph)); 2115 PetscCall(MatGetDM(pc->pmat, &dm)); 2116 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2117 if (dm) PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex)); 2118 if (filter) isplex = PETSC_FALSE; 2119 2120 if (isplex) { /* this code has been modified from plexpartition.c */ 2121 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2122 PetscInt *adj = NULL; 2123 IS cellNumbering; 2124 const PetscInt *cellNum; 2125 PetscBool useCone, useClosure; 2126 PetscSection section; 2127 PetscSegBuffer adjBuffer; 2128 PetscSF sfPoint; 2129 2130 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2131 PetscCall(DMGetPointSF(dm, &sfPoint)); 2132 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2133 /* Build adjacency graph via a section/segbuffer */ 2134 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2135 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2136 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2137 /* Always use FVM adjacency to create partitioner graph */ 2138 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2139 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2140 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2141 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2142 for (n = 0, p = pStart; p < pEnd; p++) { 2143 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2144 if (nroots > 0) { 2145 if (cellNum[p] < 0) continue; 2146 } 2147 adjSize = PETSC_DETERMINE; 2148 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2149 for (a = 0; a < adjSize; ++a) { 2150 const PetscInt point = adj[a]; 2151 if (pStart <= point && point < pEnd) { 2152 PetscInt *PETSC_RESTRICT pBuf; 2153 PetscCall(PetscSectionAddDof(section, p, 1)); 2154 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2155 *pBuf = point; 2156 } 2157 } 2158 n++; 2159 } 2160 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2161 /* Derive CSR graph from section/segbuffer */ 2162 PetscCall(PetscSectionSetUp(section)); 2163 PetscCall(PetscSectionGetStorageSize(section, &size)); 2164 PetscCall(PetscMalloc1(n + 1, &xadj)); 2165 for (idx = 0, p = pStart; p < pEnd; p++) { 2166 if (nroots > 0) { 2167 if (cellNum[p] < 0) continue; 2168 } 2169 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2170 } 2171 xadj[n] = size; 2172 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2173 /* Clean up */ 2174 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2175 PetscCall(PetscSectionDestroy(§ion)); 2176 PetscCall(PetscFree(adj)); 2177 graph->xadj = xadj; 2178 graph->adjncy = adjncy; 2179 } else { 2180 Mat A; 2181 PetscBool isseqaij, flg_row; 2182 2183 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2184 if (!A->rmap->N || !A->cmap->N) { 2185 PetscCall(PCBDDCGraphDestroy(&graph)); 2186 PetscFunctionReturn(PETSC_SUCCESS); 2187 } 2188 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2189 if (!isseqaij && filter) { 2190 PetscBool isseqdense; 2191 2192 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2193 if (!isseqdense) { 2194 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2195 } else { /* TODO: rectangular case and LDA */ 2196 PetscScalar *array; 2197 PetscReal chop = 1.e-6; 2198 2199 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2200 PetscCall(MatDenseGetArray(B, &array)); 2201 PetscCall(MatGetSize(B, &n, NULL)); 2202 for (i = 0; i < n; i++) { 2203 PetscInt j; 2204 for (j = i + 1; j < n; j++) { 2205 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2206 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2207 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2208 } 2209 } 2210 PetscCall(MatDenseRestoreArray(B, &array)); 2211 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2212 } 2213 } else { 2214 PetscCall(PetscObjectReference((PetscObject)A)); 2215 B = A; 2216 } 2217 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2218 2219 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2220 if (filter) { 2221 PetscScalar *data; 2222 PetscInt j, cum; 2223 2224 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2225 PetscCall(MatSeqAIJGetArray(B, &data)); 2226 cum = 0; 2227 for (i = 0; i < n; i++) { 2228 PetscInt t; 2229 2230 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2231 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2232 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2233 } 2234 t = xadj_filtered[i]; 2235 xadj_filtered[i] = cum; 2236 cum += t; 2237 } 2238 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2239 graph->xadj = xadj_filtered; 2240 graph->adjncy = adjncy_filtered; 2241 } else { 2242 graph->xadj = xadj; 2243 graph->adjncy = adjncy; 2244 } 2245 } 2246 /* compute local connected components using PCBDDCGraph */ 2247 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2248 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2249 PetscCall(ISDestroy(&is_dummy)); 2250 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT)); 2251 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2252 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2253 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2254 2255 /* partial clean up */ 2256 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2257 if (B) { 2258 PetscBool flg_row; 2259 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2260 PetscCall(MatDestroy(&B)); 2261 } 2262 if (isplex) { 2263 PetscCall(PetscFree(xadj)); 2264 PetscCall(PetscFree(adjncy)); 2265 } 2266 2267 /* get back data */ 2268 if (isplex) { 2269 if (ncc) *ncc = graph->ncc; 2270 if (cc || primalv) { 2271 Mat A; 2272 PetscBT btv, btvt; 2273 PetscSection subSection; 2274 PetscInt *ids, cum, cump, *cids, *pids; 2275 2276 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2277 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2278 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2279 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2280 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2281 2282 cids[0] = 0; 2283 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2284 PetscInt j; 2285 2286 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2287 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2288 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2289 2290 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2291 for (k = 0; k < 2 * size; k += 2) { 2292 PetscInt s, pp, p = closure[k], off, dof, cdof; 2293 2294 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2295 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2296 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2297 for (s = 0; s < dof - cdof; s++) { 2298 if (PetscBTLookupSet(btvt, off + s)) continue; 2299 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2300 else pids[cump++] = off + s; /* cross-vertex */ 2301 } 2302 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2303 if (pp != p) { 2304 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2305 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2306 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2307 for (s = 0; s < dof - cdof; s++) { 2308 if (PetscBTLookupSet(btvt, off + s)) continue; 2309 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2310 else pids[cump++] = off + s; /* cross-vertex */ 2311 } 2312 } 2313 } 2314 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2315 } 2316 cids[i + 1] = cum; 2317 /* mark dofs as already assigned */ 2318 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2319 } 2320 if (cc) { 2321 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2322 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2323 *cc = cc_n; 2324 } 2325 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2326 PetscCall(PetscFree3(ids, cids, pids)); 2327 PetscCall(PetscBTDestroy(&btv)); 2328 PetscCall(PetscBTDestroy(&btvt)); 2329 } 2330 } else { 2331 if (ncc) *ncc = graph->ncc; 2332 if (cc) { 2333 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2334 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2335 *cc = cc_n; 2336 } 2337 } 2338 /* clean up graph */ 2339 graph->xadj = NULL; 2340 graph->adjncy = NULL; 2341 PetscCall(PCBDDCGraphDestroy(&graph)); 2342 PetscFunctionReturn(PETSC_SUCCESS); 2343 } 2344 2345 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2346 { 2347 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2348 PC_IS *pcis = (PC_IS *)(pc->data); 2349 IS dirIS = NULL; 2350 PetscInt i; 2351 2352 PetscFunctionBegin; 2353 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2354 if (zerodiag) { 2355 Mat A; 2356 Vec vec3_N; 2357 PetscScalar *vals; 2358 const PetscInt *idxs; 2359 PetscInt nz, *count; 2360 2361 /* p0 */ 2362 PetscCall(VecSet(pcis->vec1_N, 0.)); 2363 PetscCall(PetscMalloc1(pcis->n, &vals)); 2364 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2365 PetscCall(ISGetIndices(zerodiag, &idxs)); 2366 for (i = 0; i < nz; i++) vals[i] = 1.; 2367 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2368 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2369 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2370 /* v_I */ 2371 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2372 for (i = 0; i < nz; i++) vals[i] = 0.; 2373 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2374 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2375 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2376 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2377 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2378 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2379 if (dirIS) { 2380 PetscInt n; 2381 2382 PetscCall(ISGetLocalSize(dirIS, &n)); 2383 PetscCall(ISGetIndices(dirIS, &idxs)); 2384 for (i = 0; i < n; i++) vals[i] = 0.; 2385 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2386 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2387 } 2388 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2389 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2390 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2391 PetscCall(VecSet(vec3_N, 0.)); 2392 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2393 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2394 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2395 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2396 PetscCall(PetscFree(vals)); 2397 PetscCall(VecDestroy(&vec3_N)); 2398 2399 /* there should not be any pressure dofs lying on the interface */ 2400 PetscCall(PetscCalloc1(pcis->n, &count)); 2401 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2402 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2403 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2404 PetscCall(ISGetIndices(zerodiag, &idxs)); 2405 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2406 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2407 PetscCall(PetscFree(count)); 2408 } 2409 PetscCall(ISDestroy(&dirIS)); 2410 2411 /* check PCBDDCBenignGetOrSetP0 */ 2412 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2413 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2414 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2415 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2416 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2417 for (i = 0; i < pcbddc->benign_n; i++) { 2418 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2419 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2420 } 2421 PetscFunctionReturn(PETSC_SUCCESS); 2422 } 2423 2424 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2425 { 2426 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2427 Mat_IS *matis = (Mat_IS *)(pc->pmat->data); 2428 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2429 PetscInt nz, n, benign_n, bsp = 1; 2430 PetscInt *interior_dofs, n_interior_dofs, nneu; 2431 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2432 2433 PetscFunctionBegin; 2434 if (reuse) goto project_b0; 2435 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2436 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2437 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2438 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2439 has_null_pressures = PETSC_TRUE; 2440 have_null = PETSC_TRUE; 2441 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2442 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2443 Checks if all the pressure dofs in each subdomain have a zero diagonal 2444 If not, a change of basis on pressures is not needed 2445 since the local Schur complements are already SPD 2446 */ 2447 if (pcbddc->n_ISForDofsLocal) { 2448 IS iP = NULL; 2449 PetscInt p, *pp; 2450 PetscBool flg; 2451 2452 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2453 n = pcbddc->n_ISForDofsLocal; 2454 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2455 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2456 PetscOptionsEnd(); 2457 if (!flg) { 2458 n = 1; 2459 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2460 } 2461 2462 bsp = 0; 2463 for (p = 0; p < n; p++) { 2464 PetscInt bs; 2465 2466 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2467 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2468 bsp += bs; 2469 } 2470 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2471 bsp = 0; 2472 for (p = 0; p < n; p++) { 2473 const PetscInt *idxs; 2474 PetscInt b, bs, npl, *bidxs; 2475 2476 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2477 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2478 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2479 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2480 for (b = 0; b < bs; b++) { 2481 PetscInt i; 2482 2483 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2484 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2485 bsp++; 2486 } 2487 PetscCall(PetscFree(bidxs)); 2488 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2489 } 2490 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2491 2492 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2493 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2494 if (iP) { 2495 IS newpressures; 2496 2497 PetscCall(ISDifference(pressures, iP, &newpressures)); 2498 PetscCall(ISDestroy(&pressures)); 2499 pressures = newpressures; 2500 } 2501 PetscCall(ISSorted(pressures, &sorted)); 2502 if (!sorted) PetscCall(ISSort(pressures)); 2503 PetscCall(PetscFree(pp)); 2504 } 2505 2506 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2507 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2508 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2509 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2510 PetscCall(ISSorted(zerodiag, &sorted)); 2511 if (!sorted) PetscCall(ISSort(zerodiag)); 2512 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2513 zerodiag_save = zerodiag; 2514 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2515 if (!nz) { 2516 if (n) have_null = PETSC_FALSE; 2517 has_null_pressures = PETSC_FALSE; 2518 PetscCall(ISDestroy(&zerodiag)); 2519 } 2520 recompute_zerodiag = PETSC_FALSE; 2521 2522 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2523 zerodiag_subs = NULL; 2524 benign_n = 0; 2525 n_interior_dofs = 0; 2526 interior_dofs = NULL; 2527 nneu = 0; 2528 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2529 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2530 if (checkb) { /* need to compute interior nodes */ 2531 PetscInt n, i, j; 2532 PetscInt n_neigh, *neigh, *n_shared, **shared; 2533 PetscInt *iwork; 2534 2535 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping, &n)); 2536 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2537 PetscCall(PetscCalloc1(n, &iwork)); 2538 PetscCall(PetscMalloc1(n, &interior_dofs)); 2539 for (i = 1; i < n_neigh; i++) 2540 for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1; 2541 for (i = 0; i < n; i++) 2542 if (!iwork[i]) interior_dofs[n_interior_dofs++] = i; 2543 PetscCall(PetscFree(iwork)); 2544 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2545 } 2546 if (has_null_pressures) { 2547 IS *subs; 2548 PetscInt nsubs, i, j, nl; 2549 const PetscInt *idxs; 2550 PetscScalar *array; 2551 Vec *work; 2552 2553 subs = pcbddc->local_subs; 2554 nsubs = pcbddc->n_local_subs; 2555 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2556 if (checkb) { 2557 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2558 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2559 PetscCall(ISGetIndices(zerodiag, &idxs)); 2560 /* work[0] = 1_p */ 2561 PetscCall(VecSet(work[0], 0.)); 2562 PetscCall(VecGetArray(work[0], &array)); 2563 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2564 PetscCall(VecRestoreArray(work[0], &array)); 2565 /* work[0] = 1_v */ 2566 PetscCall(VecSet(work[1], 1.)); 2567 PetscCall(VecGetArray(work[1], &array)); 2568 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2569 PetscCall(VecRestoreArray(work[1], &array)); 2570 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2571 } 2572 2573 if (nsubs > 1 || bsp > 1) { 2574 IS *is; 2575 PetscInt b, totb; 2576 2577 totb = bsp; 2578 is = bsp > 1 ? bzerodiag : &zerodiag; 2579 nsubs = PetscMax(nsubs, 1); 2580 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2581 for (b = 0; b < totb; b++) { 2582 for (i = 0; i < nsubs; i++) { 2583 ISLocalToGlobalMapping l2g; 2584 IS t_zerodiag_subs; 2585 PetscInt nl; 2586 2587 if (subs) { 2588 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2589 } else { 2590 IS tis; 2591 2592 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2593 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2594 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2595 PetscCall(ISDestroy(&tis)); 2596 } 2597 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2598 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2599 if (nl) { 2600 PetscBool valid = PETSC_TRUE; 2601 2602 if (checkb) { 2603 PetscCall(VecSet(matis->x, 0)); 2604 PetscCall(ISGetLocalSize(subs[i], &nl)); 2605 PetscCall(ISGetIndices(subs[i], &idxs)); 2606 PetscCall(VecGetArray(matis->x, &array)); 2607 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2608 PetscCall(VecRestoreArray(matis->x, &array)); 2609 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2610 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2611 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2612 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2613 PetscCall(VecGetArray(matis->y, &array)); 2614 for (j = 0; j < n_interior_dofs; j++) { 2615 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2616 valid = PETSC_FALSE; 2617 break; 2618 } 2619 } 2620 PetscCall(VecRestoreArray(matis->y, &array)); 2621 } 2622 if (valid && nneu) { 2623 const PetscInt *idxs; 2624 PetscInt nzb; 2625 2626 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2627 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2628 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2629 if (nzb) valid = PETSC_FALSE; 2630 } 2631 if (valid && pressures) { 2632 IS t_pressure_subs, tmp; 2633 PetscInt i1, i2; 2634 2635 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2636 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2637 PetscCall(ISGetLocalSize(tmp, &i1)); 2638 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2639 if (i2 != i1) valid = PETSC_FALSE; 2640 PetscCall(ISDestroy(&t_pressure_subs)); 2641 PetscCall(ISDestroy(&tmp)); 2642 } 2643 if (valid) { 2644 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2645 benign_n++; 2646 } else recompute_zerodiag = PETSC_TRUE; 2647 } 2648 PetscCall(ISDestroy(&t_zerodiag_subs)); 2649 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2650 } 2651 } 2652 } else { /* there's just one subdomain (or zero if they have not been detected */ 2653 PetscBool valid = PETSC_TRUE; 2654 2655 if (nneu) valid = PETSC_FALSE; 2656 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2657 if (valid && checkb) { 2658 PetscCall(MatMult(matis->A, work[0], matis->x)); 2659 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2660 PetscCall(VecGetArray(matis->x, &array)); 2661 for (j = 0; j < n_interior_dofs; j++) { 2662 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2663 valid = PETSC_FALSE; 2664 break; 2665 } 2666 } 2667 PetscCall(VecRestoreArray(matis->x, &array)); 2668 } 2669 if (valid) { 2670 benign_n = 1; 2671 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2672 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2673 zerodiag_subs[0] = zerodiag; 2674 } 2675 } 2676 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2677 } 2678 PetscCall(PetscFree(interior_dofs)); 2679 2680 if (!benign_n) { 2681 PetscInt n; 2682 2683 PetscCall(ISDestroy(&zerodiag)); 2684 recompute_zerodiag = PETSC_FALSE; 2685 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2686 if (n) have_null = PETSC_FALSE; 2687 } 2688 2689 /* final check for null pressures */ 2690 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2691 2692 if (recompute_zerodiag) { 2693 PetscCall(ISDestroy(&zerodiag)); 2694 if (benign_n == 1) { 2695 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2696 zerodiag = zerodiag_subs[0]; 2697 } else { 2698 PetscInt i, nzn, *new_idxs; 2699 2700 nzn = 0; 2701 for (i = 0; i < benign_n; i++) { 2702 PetscInt ns; 2703 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2704 nzn += ns; 2705 } 2706 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2707 nzn = 0; 2708 for (i = 0; i < benign_n; i++) { 2709 PetscInt ns, *idxs; 2710 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2711 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2712 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2713 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2714 nzn += ns; 2715 } 2716 PetscCall(PetscSortInt(nzn, new_idxs)); 2717 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2718 } 2719 have_null = PETSC_FALSE; 2720 } 2721 2722 /* determines if the coarse solver will be singular or not */ 2723 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2724 2725 /* Prepare matrix to compute no-net-flux */ 2726 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2727 Mat A, loc_divudotp; 2728 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2729 IS row, col, isused = NULL; 2730 PetscInt M, N, n, st, n_isused; 2731 2732 if (pressures) { 2733 isused = pressures; 2734 } else { 2735 isused = zerodiag_save; 2736 } 2737 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2738 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2739 PetscCall(MatGetLocalSize(A, &n, NULL)); 2740 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2741 n_isused = 0; 2742 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 2743 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2744 st = st - n_isused; 2745 if (n) { 2746 const PetscInt *gidxs; 2747 2748 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2749 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2750 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2751 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2752 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2753 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2754 } else { 2755 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 2756 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2757 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 2758 } 2759 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 2760 PetscCall(ISGetSize(row, &M)); 2761 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 2762 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 2763 PetscCall(ISDestroy(&row)); 2764 PetscCall(ISDestroy(&col)); 2765 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 2766 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 2767 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 2768 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 2769 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2770 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2771 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 2772 PetscCall(MatDestroy(&loc_divudotp)); 2773 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2774 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2775 } 2776 PetscCall(ISDestroy(&zerodiag_save)); 2777 PetscCall(ISDestroy(&pressures)); 2778 if (bzerodiag) { 2779 PetscInt i; 2780 2781 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 2782 PetscCall(PetscFree(bzerodiag)); 2783 } 2784 pcbddc->benign_n = benign_n; 2785 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2786 2787 /* determines if the problem has subdomains with 0 pressure block */ 2788 have_null = (PetscBool)(!!pcbddc->benign_n); 2789 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 2790 2791 project_b0: 2792 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2793 /* change of basis and p0 dofs */ 2794 if (pcbddc->benign_n) { 2795 PetscInt i, s, *nnz; 2796 2797 /* local change of basis for pressures */ 2798 PetscCall(MatDestroy(&pcbddc->benign_change)); 2799 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 2800 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 2801 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 2802 PetscCall(PetscMalloc1(n, &nnz)); 2803 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 2804 for (i = 0; i < pcbddc->benign_n; i++) { 2805 const PetscInt *idxs; 2806 PetscInt nzs, j; 2807 2808 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 2809 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2810 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 2811 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 2812 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2813 } 2814 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 2815 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2816 PetscCall(PetscFree(nnz)); 2817 /* set identity by default */ 2818 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 2819 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 2820 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 2821 /* set change on pressures */ 2822 for (s = 0; s < pcbddc->benign_n; s++) { 2823 PetscScalar *array; 2824 const PetscInt *idxs; 2825 PetscInt nzs; 2826 2827 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 2828 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2829 for (i = 0; i < nzs - 1; i++) { 2830 PetscScalar vals[2]; 2831 PetscInt cols[2]; 2832 2833 cols[0] = idxs[i]; 2834 cols[1] = idxs[nzs - 1]; 2835 vals[0] = 1.; 2836 vals[1] = 1.; 2837 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 2838 } 2839 PetscCall(PetscMalloc1(nzs, &array)); 2840 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 2841 array[nzs - 1] = 1.; 2842 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 2843 /* store local idxs for p0 */ 2844 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 2845 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2846 PetscCall(PetscFree(array)); 2847 } 2848 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2849 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2850 2851 /* project if needed */ 2852 if (pcbddc->benign_change_explicit) { 2853 Mat M; 2854 2855 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 2856 PetscCall(MatDestroy(&pcbddc->local_mat)); 2857 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 2858 PetscCall(MatDestroy(&M)); 2859 } 2860 /* store global idxs for p0 */ 2861 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 2862 } 2863 *zerodiaglocal = zerodiag; 2864 PetscFunctionReturn(PETSC_SUCCESS); 2865 } 2866 2867 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2868 { 2869 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2870 PetscScalar *array; 2871 2872 PetscFunctionBegin; 2873 if (!pcbddc->benign_sf) { 2874 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 2875 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 2876 } 2877 if (get) { 2878 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 2879 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2880 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2881 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 2882 } else { 2883 PetscCall(VecGetArray(v, &array)); 2884 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2885 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2886 PetscCall(VecRestoreArray(v, &array)); 2887 } 2888 PetscFunctionReturn(PETSC_SUCCESS); 2889 } 2890 2891 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2892 { 2893 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2894 2895 PetscFunctionBegin; 2896 /* TODO: add error checking 2897 - avoid nested pop (or push) calls. 2898 - cannot push before pop. 2899 - cannot call this if pcbddc->local_mat is NULL 2900 */ 2901 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 2902 if (pop) { 2903 if (pcbddc->benign_change_explicit) { 2904 IS is_p0; 2905 MatReuse reuse; 2906 2907 /* extract B_0 */ 2908 reuse = MAT_INITIAL_MATRIX; 2909 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 2910 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 2911 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 2912 /* remove rows and cols from local problem */ 2913 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 2914 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 2915 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 2916 PetscCall(ISDestroy(&is_p0)); 2917 } else { 2918 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2919 PetscScalar *vals; 2920 PetscInt i, n, *idxs_ins; 2921 2922 PetscCall(VecGetLocalSize(matis->y, &n)); 2923 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 2924 if (!pcbddc->benign_B0) { 2925 PetscInt *nnz; 2926 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 2927 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 2928 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 2929 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 2930 for (i = 0; i < pcbddc->benign_n; i++) { 2931 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 2932 nnz[i] = n - nnz[i]; 2933 } 2934 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 2935 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2936 PetscCall(PetscFree(nnz)); 2937 } 2938 2939 for (i = 0; i < pcbddc->benign_n; i++) { 2940 PetscScalar *array; 2941 PetscInt *idxs, j, nz, cum; 2942 2943 PetscCall(VecSet(matis->x, 0.)); 2944 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 2945 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 2946 for (j = 0; j < nz; j++) vals[j] = 1.; 2947 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 2948 PetscCall(VecAssemblyBegin(matis->x)); 2949 PetscCall(VecAssemblyEnd(matis->x)); 2950 PetscCall(VecSet(matis->y, 0.)); 2951 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2952 PetscCall(VecGetArray(matis->y, &array)); 2953 cum = 0; 2954 for (j = 0; j < n; j++) { 2955 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2956 vals[cum] = array[j]; 2957 idxs_ins[cum] = j; 2958 cum++; 2959 } 2960 } 2961 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 2962 PetscCall(VecRestoreArray(matis->y, &array)); 2963 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 2964 } 2965 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 2966 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 2967 PetscCall(PetscFree2(idxs_ins, vals)); 2968 } 2969 } else { /* push */ 2970 2971 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 2972 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 2973 PetscScalar *B0_vals; 2974 PetscInt *B0_cols, B0_ncol; 2975 2976 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 2977 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 2978 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 2979 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 2980 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 2981 } 2982 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 2983 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 2984 } 2985 PetscFunctionReturn(PETSC_SUCCESS); 2986 } 2987 2988 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2989 { 2990 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2991 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2992 PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork; 2993 PetscBLASInt *B_iwork, *B_ifail; 2994 PetscScalar *work, lwork; 2995 PetscScalar *St, *S, *eigv; 2996 PetscScalar *Sarray, *Starray; 2997 PetscReal *eigs, thresh, lthresh, uthresh; 2998 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 2999 PetscBool allocated_S_St, upart; 3000 #if defined(PETSC_USE_COMPLEX) 3001 PetscReal *rwork; 3002 #endif 3003 3004 PetscFunctionBegin; 3005 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3006 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3007 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3008 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3009 sub_schurs->is_posdef); 3010 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3011 3012 if (pcbddc->dbg_flag) { 3013 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3014 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3015 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3016 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3017 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3018 } 3019 3020 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3021 3022 /* max size of subsets */ 3023 mss = 0; 3024 for (i = 0; i < sub_schurs->n_subs; i++) { 3025 PetscInt subset_size; 3026 3027 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3028 mss = PetscMax(mss, subset_size); 3029 } 3030 3031 /* min/max and threshold */ 3032 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3033 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3034 nmax = PetscMax(nmin, nmax); 3035 allocated_S_St = PETSC_FALSE; 3036 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3037 allocated_S_St = PETSC_TRUE; 3038 } 3039 3040 /* allocate lapack workspace */ 3041 cum = cum2 = 0; 3042 maxneigs = 0; 3043 for (i = 0; i < sub_schurs->n_subs; i++) { 3044 PetscInt n, subset_size; 3045 3046 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3047 n = PetscMin(subset_size, nmax); 3048 cum += subset_size; 3049 cum2 += subset_size * n; 3050 maxneigs = PetscMax(maxneigs, n); 3051 } 3052 lwork = 0; 3053 if (mss) { 3054 PetscScalar sdummy = 0.; 3055 PetscBLASInt B_itype = 1; 3056 PetscBLASInt B_N = mss, idummy = 0; 3057 PetscReal rdummy = 0., zero = 0.0; 3058 PetscReal eps = 0.0; /* dlamch? */ 3059 3060 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3061 B_lwork = -1; 3062 /* some implementations may complain about NULL pointers, even if we are querying */ 3063 S = &sdummy; 3064 St = &sdummy; 3065 eigs = &rdummy; 3066 eigv = &sdummy; 3067 B_iwork = &idummy; 3068 B_ifail = &idummy; 3069 #if defined(PETSC_USE_COMPLEX) 3070 rwork = &rdummy; 3071 #endif 3072 thresh = 1.0; 3073 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3074 #if defined(PETSC_USE_COMPLEX) 3075 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3076 #else 3077 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3078 #endif 3079 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr); 3080 PetscCall(PetscFPTrapPop()); 3081 } 3082 3083 nv = 0; 3084 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3085 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3086 } 3087 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3088 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3089 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3090 #if defined(PETSC_USE_COMPLEX) 3091 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3092 #endif 3093 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3094 &pcbddc->adaptive_constraints_data)); 3095 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3096 3097 maxneigs = 0; 3098 cum = cumarray = 0; 3099 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3100 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3101 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3102 const PetscInt *idxs; 3103 3104 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3105 for (cum = 0; cum < nv; cum++) { 3106 pcbddc->adaptive_constraints_n[cum] = 1; 3107 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3108 pcbddc->adaptive_constraints_data[cum] = 1.0; 3109 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3110 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3111 } 3112 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3113 } 3114 3115 if (mss) { /* multilevel */ 3116 if (sub_schurs->gdsw) { 3117 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3118 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3119 } else { 3120 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3121 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3122 } 3123 } 3124 3125 lthresh = pcbddc->adaptive_threshold[0]; 3126 uthresh = pcbddc->adaptive_threshold[1]; 3127 upart = pcbddc->use_deluxe_scaling; 3128 for (i = 0; i < sub_schurs->n_subs; i++) { 3129 const PetscInt *idxs; 3130 PetscReal upper, lower; 3131 PetscInt j, subset_size, eigs_start = 0; 3132 PetscBLASInt B_N; 3133 PetscBool same_data = PETSC_FALSE; 3134 PetscBool scal = PETSC_FALSE; 3135 3136 if (upart) { 3137 upper = PETSC_MAX_REAL; 3138 lower = uthresh; 3139 } else { 3140 if (sub_schurs->gdsw) { 3141 upper = uthresh; 3142 lower = PETSC_MIN_REAL; 3143 } else { 3144 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3145 upper = 1. / uthresh; 3146 lower = 0.; 3147 } 3148 } 3149 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3150 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3151 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3152 /* this is experimental: we assume the dofs have been properly grouped to have 3153 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3154 if (!sub_schurs->is_posdef) { 3155 Mat T; 3156 3157 for (j = 0; j < subset_size; j++) { 3158 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3159 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3160 PetscCall(MatScale(T, -1.0)); 3161 PetscCall(MatDestroy(&T)); 3162 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3163 PetscCall(MatScale(T, -1.0)); 3164 PetscCall(MatDestroy(&T)); 3165 if (sub_schurs->change_primal_sub) { 3166 PetscInt nz, k; 3167 const PetscInt *idxs; 3168 3169 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3170 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3171 for (k = 0; k < nz; k++) { 3172 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3173 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3174 } 3175 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3176 } 3177 scal = PETSC_TRUE; 3178 break; 3179 } 3180 } 3181 } 3182 3183 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3184 if (sub_schurs->is_symmetric) { 3185 PetscInt j, k; 3186 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3187 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3188 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3189 } 3190 for (j = 0; j < subset_size; j++) { 3191 for (k = j; k < subset_size; k++) { 3192 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3193 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3194 } 3195 } 3196 } else { 3197 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3198 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3199 } 3200 } else { 3201 S = Sarray + cumarray; 3202 St = Starray + cumarray; 3203 } 3204 /* see if we can save some work */ 3205 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3206 3207 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3208 B_neigs = 0; 3209 } else { 3210 PetscBLASInt B_itype = 1; 3211 PetscBLASInt B_IL, B_IU; 3212 PetscReal eps = -1.0; /* dlamch? */ 3213 PetscInt nmin_s; 3214 PetscBool compute_range; 3215 3216 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3217 B_neigs = 0; 3218 compute_range = (PetscBool)!same_data; 3219 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3220 3221 if (pcbddc->dbg_flag) { 3222 PetscInt nc = 0; 3223 3224 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3225 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, 3226 sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc)); 3227 } 3228 3229 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3230 if (compute_range) { 3231 /* ask for eigenvalues larger than thresh */ 3232 if (sub_schurs->is_posdef) { 3233 #if defined(PETSC_USE_COMPLEX) 3234 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3235 #else 3236 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3237 #endif 3238 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3239 } else { /* no theory so far, but it works nicely */ 3240 PetscInt recipe = 0, recipe_m = 1; 3241 PetscReal bb[2]; 3242 3243 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3244 switch (recipe) { 3245 case 0: 3246 if (scal) { 3247 bb[0] = PETSC_MIN_REAL; 3248 bb[1] = lthresh; 3249 } else { 3250 bb[0] = uthresh; 3251 bb[1] = PETSC_MAX_REAL; 3252 } 3253 #if defined(PETSC_USE_COMPLEX) 3254 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3255 #else 3256 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3257 #endif 3258 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3259 break; 3260 case 1: 3261 bb[0] = PETSC_MIN_REAL; 3262 bb[1] = lthresh * lthresh; 3263 #if defined(PETSC_USE_COMPLEX) 3264 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3265 #else 3266 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3267 #endif 3268 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3269 if (!scal) { 3270 PetscBLASInt B_neigs2 = 0; 3271 3272 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3273 bb[1] = PETSC_MAX_REAL; 3274 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3275 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3276 #if defined(PETSC_USE_COMPLEX) 3277 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3278 #else 3279 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3280 #endif 3281 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3282 B_neigs += B_neigs2; 3283 } 3284 break; 3285 case 2: 3286 if (scal) { 3287 bb[0] = PETSC_MIN_REAL; 3288 bb[1] = 0; 3289 #if defined(PETSC_USE_COMPLEX) 3290 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3291 #else 3292 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3293 #endif 3294 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3295 } else { 3296 PetscBLASInt B_neigs2 = 0; 3297 PetscBool do_copy = PETSC_FALSE; 3298 3299 lthresh = PetscMax(lthresh, 0.0); 3300 if (lthresh > 0.0) { 3301 bb[0] = PETSC_MIN_REAL; 3302 bb[1] = lthresh * lthresh; 3303 3304 do_copy = PETSC_TRUE; 3305 #if defined(PETSC_USE_COMPLEX) 3306 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3307 #else 3308 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3309 #endif 3310 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3311 } 3312 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3313 bb[1] = PETSC_MAX_REAL; 3314 if (do_copy) { 3315 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3316 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3317 } 3318 #if defined(PETSC_USE_COMPLEX) 3319 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3320 #else 3321 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3322 #endif 3323 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3324 B_neigs += B_neigs2; 3325 } 3326 break; 3327 case 3: 3328 if (scal) { 3329 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3330 } else { 3331 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3332 } 3333 if (!scal) { 3334 bb[0] = uthresh; 3335 bb[1] = PETSC_MAX_REAL; 3336 #if defined(PETSC_USE_COMPLEX) 3337 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3338 #else 3339 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3340 #endif 3341 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3342 } 3343 if (recipe_m > 0 && B_N - B_neigs > 0) { 3344 PetscBLASInt B_neigs2 = 0; 3345 3346 B_IL = 1; 3347 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3348 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3349 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3350 #if defined(PETSC_USE_COMPLEX) 3351 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3352 #else 3353 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3354 #endif 3355 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3356 B_neigs += B_neigs2; 3357 } 3358 break; 3359 case 4: 3360 bb[0] = PETSC_MIN_REAL; 3361 bb[1] = lthresh; 3362 #if defined(PETSC_USE_COMPLEX) 3363 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3364 #else 3365 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3366 #endif 3367 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3368 { 3369 PetscBLASInt B_neigs2 = 0; 3370 3371 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3372 bb[1] = PETSC_MAX_REAL; 3373 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3374 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3375 #if defined(PETSC_USE_COMPLEX) 3376 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3377 #else 3378 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3379 #endif 3380 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3381 B_neigs += B_neigs2; 3382 } 3383 break; 3384 case 5: /* same as before: first compute all eigenvalues, then filter */ 3385 #if defined(PETSC_USE_COMPLEX) 3386 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3387 #else 3388 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3389 #endif 3390 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3391 { 3392 PetscInt e, k, ne; 3393 for (e = 0, ne = 0; e < B_neigs; e++) { 3394 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3395 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3396 eigs[ne] = eigs[e]; 3397 ne++; 3398 } 3399 } 3400 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3401 B_neigs = ne; 3402 } 3403 break; 3404 default: 3405 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3406 } 3407 } 3408 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3409 B_IU = PetscMax(1, PetscMin(B_N, nmax)); 3410 B_IL = 1; 3411 #if defined(PETSC_USE_COMPLEX) 3412 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3413 #else 3414 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3415 #endif 3416 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3417 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3418 PetscInt k; 3419 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3420 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3421 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3422 nmin = nmax; 3423 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3424 for (k = 0; k < nmax; k++) { 3425 eigs[k] = 1. / PETSC_SMALL; 3426 eigv[k * (subset_size + 1)] = 1.0; 3427 } 3428 } 3429 PetscCall(PetscFPTrapPop()); 3430 if (B_ierr) { 3431 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3432 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3433 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3434 } 3435 3436 if (B_neigs > nmax) { 3437 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3438 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3439 B_neigs = nmax; 3440 } 3441 3442 nmin_s = PetscMin(nmin, B_N); 3443 if (B_neigs < nmin_s) { 3444 PetscBLASInt B_neigs2 = 0; 3445 3446 if (upart) { 3447 if (scal) { 3448 B_IU = nmin_s; 3449 B_IL = B_neigs + 1; 3450 } else { 3451 B_IL = B_N - nmin_s + 1; 3452 B_IU = B_N - B_neigs; 3453 } 3454 } else { 3455 B_IL = B_neigs + 1; 3456 B_IU = nmin_s; 3457 } 3458 if (pcbddc->dbg_flag) { 3459 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3460 } 3461 if (sub_schurs->is_symmetric) { 3462 PetscInt j, k; 3463 for (j = 0; j < subset_size; j++) { 3464 for (k = j; k < subset_size; k++) { 3465 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3466 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3467 } 3468 } 3469 } else { 3470 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3471 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3472 } 3473 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3474 #if defined(PETSC_USE_COMPLEX) 3475 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3476 #else 3477 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3478 #endif 3479 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3480 PetscCall(PetscFPTrapPop()); 3481 B_neigs += B_neigs2; 3482 } 3483 if (B_ierr) { 3484 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3485 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3486 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3487 } 3488 if (pcbddc->dbg_flag) { 3489 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3490 for (j = 0; j < B_neigs; j++) { 3491 if (!sub_schurs->gdsw) { 3492 if (eigs[j] == 0.0) { 3493 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3494 } else { 3495 if (upart) { 3496 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3497 } else { 3498 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start]))); 3499 } 3500 } 3501 } else { 3502 double pg = (double)eigs[j + eigs_start]; 3503 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3504 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3505 } 3506 } 3507 } 3508 } 3509 /* change the basis back to the original one */ 3510 if (sub_schurs->change) { 3511 Mat change, phi, phit; 3512 3513 if (pcbddc->dbg_flag > 2) { 3514 PetscInt ii; 3515 for (ii = 0; ii < B_neigs; ii++) { 3516 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3517 for (j = 0; j < B_N; j++) { 3518 #if defined(PETSC_USE_COMPLEX) 3519 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3520 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3521 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3522 #else 3523 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3524 #endif 3525 } 3526 } 3527 } 3528 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3529 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3530 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi)); 3531 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3532 PetscCall(MatDestroy(&phit)); 3533 PetscCall(MatDestroy(&phi)); 3534 } 3535 maxneigs = PetscMax(B_neigs, maxneigs); 3536 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3537 if (B_neigs) { 3538 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3539 3540 if (pcbddc->dbg_flag > 1) { 3541 PetscInt ii; 3542 for (ii = 0; ii < B_neigs; ii++) { 3543 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3544 for (j = 0; j < B_N; j++) { 3545 #if defined(PETSC_USE_COMPLEX) 3546 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3547 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3548 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3549 #else 3550 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3551 #endif 3552 } 3553 } 3554 } 3555 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3556 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3557 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3558 cum++; 3559 } 3560 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3561 /* shift for next computation */ 3562 cumarray += subset_size * subset_size; 3563 } 3564 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3565 3566 if (mss) { 3567 if (sub_schurs->gdsw) { 3568 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3569 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3570 } else { 3571 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3572 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3573 /* destroy matrices (junk) */ 3574 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3575 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3576 } 3577 } 3578 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3579 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3580 #if defined(PETSC_USE_COMPLEX) 3581 PetscCall(PetscFree(rwork)); 3582 #endif 3583 if (pcbddc->dbg_flag) { 3584 PetscInt maxneigs_r; 3585 PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3586 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3587 } 3588 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3589 PetscFunctionReturn(PETSC_SUCCESS); 3590 } 3591 3592 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3593 { 3594 PetscScalar *coarse_submat_vals; 3595 3596 PetscFunctionBegin; 3597 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3598 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3599 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3600 3601 /* Setup local neumann solver ksp_R */ 3602 /* PCBDDCSetUpLocalScatters should be called first! */ 3603 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3604 3605 /* 3606 Setup local correction and local part of coarse basis. 3607 Gives back the dense local part of the coarse matrix in column major ordering 3608 */ 3609 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat_vals)); 3610 3611 /* Compute total number of coarse nodes and setup coarse solver */ 3612 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals)); 3613 3614 /* free */ 3615 PetscCall(PetscFree(coarse_submat_vals)); 3616 PetscFunctionReturn(PETSC_SUCCESS); 3617 } 3618 3619 PetscErrorCode PCBDDCResetCustomization(PC pc) 3620 { 3621 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3622 3623 PetscFunctionBegin; 3624 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3625 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3626 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3627 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3628 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3629 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3630 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3631 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3632 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3633 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3634 PetscFunctionReturn(PETSC_SUCCESS); 3635 } 3636 3637 PetscErrorCode PCBDDCResetTopography(PC pc) 3638 { 3639 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3640 PetscInt i; 3641 3642 PetscFunctionBegin; 3643 PetscCall(MatDestroy(&pcbddc->nedcG)); 3644 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3645 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3646 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3647 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3648 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3649 PetscCall(VecDestroy(&pcbddc->work_change)); 3650 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3651 PetscCall(MatDestroy(&pcbddc->divudotp)); 3652 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3653 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3654 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3655 pcbddc->n_local_subs = 0; 3656 PetscCall(PetscFree(pcbddc->local_subs)); 3657 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3658 pcbddc->graphanalyzed = PETSC_FALSE; 3659 pcbddc->recompute_topography = PETSC_TRUE; 3660 pcbddc->corner_selected = PETSC_FALSE; 3661 PetscFunctionReturn(PETSC_SUCCESS); 3662 } 3663 3664 PetscErrorCode PCBDDCResetSolvers(PC pc) 3665 { 3666 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3667 3668 PetscFunctionBegin; 3669 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3670 if (pcbddc->coarse_phi_B) { 3671 PetscScalar *array; 3672 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &array)); 3673 PetscCall(PetscFree(array)); 3674 } 3675 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3676 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3677 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3678 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3679 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3680 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3681 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3682 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3683 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3684 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3685 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3686 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3687 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3688 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3689 PetscCall(KSPReset(pcbddc->ksp_D)); 3690 PetscCall(KSPReset(pcbddc->ksp_R)); 3691 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3692 PetscCall(MatDestroy(&pcbddc->local_mat)); 3693 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3694 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3695 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3696 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3697 PetscCall(MatDestroy(&pcbddc->benign_change)); 3698 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3699 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3700 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3701 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3702 if (pcbddc->benign_zerodiag_subs) { 3703 PetscInt i; 3704 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3705 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3706 } 3707 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3708 PetscFunctionReturn(PETSC_SUCCESS); 3709 } 3710 3711 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3712 { 3713 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3714 PC_IS *pcis = (PC_IS *)pc->data; 3715 VecType impVecType; 3716 PetscInt n_constraints, n_R, old_size; 3717 3718 PetscFunctionBegin; 3719 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3720 n_R = pcis->n - pcbddc->n_vertices; 3721 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3722 /* local work vectors (try to avoid unneeded work)*/ 3723 /* R nodes */ 3724 old_size = -1; 3725 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3726 if (n_R != old_size) { 3727 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3728 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3729 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3730 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3731 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3732 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3733 } 3734 /* local primal dofs */ 3735 old_size = -1; 3736 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3737 if (pcbddc->local_primal_size != old_size) { 3738 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3739 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3740 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3741 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3742 } 3743 /* local explicit constraints */ 3744 old_size = -1; 3745 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 3746 if (n_constraints && n_constraints != old_size) { 3747 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3748 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3749 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3750 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3751 } 3752 PetscFunctionReturn(PETSC_SUCCESS); 3753 } 3754 3755 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3756 { 3757 /* pointers to pcis and pcbddc */ 3758 PC_IS *pcis = (PC_IS *)pc->data; 3759 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3760 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3761 /* submatrices of local problem */ 3762 Mat A_RV, A_VR, A_VV, local_auxmat2_R; 3763 /* submatrices of local coarse problem */ 3764 Mat S_VV, S_CV, S_VC, S_CC; 3765 /* working matrices */ 3766 Mat C_CR; 3767 /* additional working stuff */ 3768 PC pc_R; 3769 Mat F, Brhs = NULL; 3770 Vec dummy_vec; 3771 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 3772 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3773 PetscScalar *work; 3774 PetscInt *idx_V_B; 3775 PetscInt lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I; 3776 PetscInt i, n_R, n_D, n_B; 3777 PetscScalar one = 1.0, m_one = -1.0; 3778 3779 PetscFunctionBegin; 3780 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 3781 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3782 3783 /* Set Non-overlapping dimensions */ 3784 n_vertices = pcbddc->n_vertices; 3785 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3786 n_B = pcis->n_B; 3787 n_D = pcis->n - n_B; 3788 n_R = pcis->n - n_vertices; 3789 3790 /* vertices in boundary numbering */ 3791 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 3792 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 3793 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 3794 3795 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3796 PetscCall(PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals)); 3797 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV)); 3798 PetscCall(MatDenseSetLDA(S_VV, pcbddc->local_primal_size)); 3799 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, coarse_submat_vals + n_vertices, &S_CV)); 3800 PetscCall(MatDenseSetLDA(S_CV, pcbddc->local_primal_size)); 3801 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, coarse_submat_vals + pcbddc->local_primal_size * n_vertices, &S_VC)); 3802 PetscCall(MatDenseSetLDA(S_VC, pcbddc->local_primal_size)); 3803 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, coarse_submat_vals + (pcbddc->local_primal_size + 1) * n_vertices, &S_CC)); 3804 PetscCall(MatDenseSetLDA(S_CC, pcbddc->local_primal_size)); 3805 3806 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3807 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 3808 PetscCall(PCSetUp(pc_R)); 3809 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 3810 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 3811 lda_rhs = n_R; 3812 need_benign_correction = PETSC_FALSE; 3813 if (isLU || isCHOL) { 3814 PetscCall(PCFactorGetMatrix(pc_R, &F)); 3815 } else if (sub_schurs && sub_schurs->reuse_solver) { 3816 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3817 MatFactorType type; 3818 3819 F = reuse_solver->F; 3820 PetscCall(MatGetFactorType(F, &type)); 3821 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3822 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3823 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 3824 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3825 } else F = NULL; 3826 3827 /* determine if we can use a sparse right-hand side */ 3828 sparserhs = PETSC_FALSE; 3829 if (F) { 3830 MatSolverType solver; 3831 3832 PetscCall(MatFactorGetSolverType(F, &solver)); 3833 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 3834 } 3835 3836 /* allocate workspace */ 3837 n = 0; 3838 if (n_constraints) n += lda_rhs * n_constraints; 3839 if (n_vertices) { 3840 n = PetscMax(2 * lda_rhs * n_vertices, n); 3841 n = PetscMax((lda_rhs + n_B) * n_vertices, n); 3842 } 3843 if (!pcbddc->symmetric_primal) n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n); 3844 PetscCall(PetscMalloc1(n, &work)); 3845 3846 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3847 dummy_vec = NULL; 3848 if (need_benign_correction && lda_rhs != n_R && F) { 3849 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 3850 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 3851 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 3852 } 3853 3854 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3855 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3856 3857 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3858 if (n_constraints) { 3859 Mat M3, C_B; 3860 IS is_aux; 3861 3862 /* Extract constraints on R nodes: C_{CR} */ 3863 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux)); 3864 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 3865 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 3866 3867 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3868 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3869 if (!sparserhs) { 3870 PetscCall(PetscArrayzero(work, lda_rhs * n_constraints)); 3871 for (i = 0; i < n_constraints; i++) { 3872 const PetscScalar *row_cmat_values; 3873 const PetscInt *row_cmat_indices; 3874 PetscInt size_of_constraint, j; 3875 3876 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3877 for (j = 0; j < size_of_constraint; j++) work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j]; 3878 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3879 } 3880 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs)); 3881 } else { 3882 Mat tC_CR; 3883 3884 PetscCall(MatScale(C_CR, -1.0)); 3885 if (lda_rhs != n_R) { 3886 PetscScalar *aa; 3887 PetscInt r, *ii, *jj; 3888 PetscBool done; 3889 3890 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3891 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 3892 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 3893 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 3894 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3895 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 3896 } else { 3897 PetscCall(PetscObjectReference((PetscObject)C_CR)); 3898 tC_CR = C_CR; 3899 } 3900 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 3901 PetscCall(MatDestroy(&tC_CR)); 3902 } 3903 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R)); 3904 if (F) { 3905 if (need_benign_correction) { 3906 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3907 3908 /* rhs is already zero on interior dofs, no need to change the rhs */ 3909 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 3910 } 3911 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 3912 if (need_benign_correction) { 3913 PetscScalar *marr; 3914 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3915 3916 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 3917 if (lda_rhs != n_R) { 3918 for (i = 0; i < n_constraints; i++) { 3919 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 3920 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 3921 PetscCall(VecResetArray(dummy_vec)); 3922 } 3923 } else { 3924 for (i = 0; i < n_constraints; i++) { 3925 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 3926 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 3927 PetscCall(VecResetArray(pcbddc->vec1_R)); 3928 } 3929 } 3930 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 3931 } 3932 } else { 3933 PetscScalar *marr; 3934 3935 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 3936 for (i = 0; i < n_constraints; i++) { 3937 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 3938 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 3939 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 3940 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 3941 PetscCall(VecResetArray(pcbddc->vec1_R)); 3942 PetscCall(VecResetArray(pcbddc->vec2_R)); 3943 } 3944 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 3945 } 3946 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 3947 PetscCall(MatDestroy(&Brhs)); 3948 if (!pcbddc->switch_static) { 3949 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2)); 3950 for (i = 0; i < n_constraints; i++) { 3951 Vec r, b; 3952 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 3953 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 3954 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 3955 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 3956 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 3957 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 3958 } 3959 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 3960 } else { 3961 if (lda_rhs != n_R) { 3962 IS dummy; 3963 3964 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy)); 3965 PetscCall(MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 3966 PetscCall(ISDestroy(&dummy)); 3967 } else { 3968 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 3969 pcbddc->local_auxmat2 = local_auxmat2_R; 3970 } 3971 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 3972 } 3973 PetscCall(ISDestroy(&is_aux)); 3974 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 3975 PetscCall(MatScale(M3, m_one)); 3976 if (isCHOL) { 3977 PetscCall(MatCholeskyFactor(M3, NULL, NULL)); 3978 } else { 3979 PetscCall(MatLUFactor(M3, NULL, NULL, NULL)); 3980 } 3981 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 3982 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3983 PetscCall(MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1)); 3984 PetscCall(MatDestroy(&C_B)); 3985 PetscCall(MatCopy(M3, S_CC, SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3986 PetscCall(MatDestroy(&M3)); 3987 } 3988 3989 /* Get submatrices from subdomain matrix */ 3990 if (n_vertices) { 3991 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 3992 PetscBool oldpin; 3993 #endif 3994 PetscBool isaij; 3995 IS is_aux; 3996 3997 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3998 IS tis; 3999 4000 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4001 PetscCall(ISSort(tis)); 4002 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4003 PetscCall(ISDestroy(&tis)); 4004 } else { 4005 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4006 } 4007 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4008 oldpin = pcbddc->local_mat->boundtocpu; 4009 #endif 4010 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4011 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4012 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4013 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij)); 4014 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4015 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4016 } 4017 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4018 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4019 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4020 #endif 4021 PetscCall(ISDestroy(&is_aux)); 4022 } 4023 4024 /* Matrix of coarse basis functions (local) */ 4025 if (pcbddc->coarse_phi_B) { 4026 PetscInt on_B, on_primal, on_D = n_D; 4027 if (pcbddc->coarse_phi_D) PetscCall(MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL)); 4028 PetscCall(MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal)); 4029 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4030 PetscScalar *marray; 4031 4032 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &marray)); 4033 PetscCall(PetscFree(marray)); 4034 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4035 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4036 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4037 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4038 } 4039 } 4040 4041 if (!pcbddc->coarse_phi_B) { 4042 PetscScalar *marr; 4043 4044 /* memory size */ 4045 n = n_B * pcbddc->local_primal_size; 4046 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size; 4047 if (!pcbddc->symmetric_primal) n *= 2; 4048 PetscCall(PetscCalloc1(n, &marr)); 4049 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B)); 4050 marr += n_B * pcbddc->local_primal_size; 4051 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4052 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D)); 4053 marr += n_D * pcbddc->local_primal_size; 4054 } 4055 if (!pcbddc->symmetric_primal) { 4056 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B)); 4057 marr += n_B * pcbddc->local_primal_size; 4058 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D)); 4059 } else { 4060 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4061 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4062 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4063 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4064 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4065 } 4066 } 4067 } 4068 4069 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4070 p0_lidx_I = NULL; 4071 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4072 const PetscInt *idxs; 4073 4074 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4075 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4076 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4077 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4078 } 4079 4080 /* vertices */ 4081 if (n_vertices) { 4082 PetscBool restoreavr = PETSC_FALSE; 4083 4084 PetscCall(MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV)); 4085 4086 if (n_R) { 4087 Mat A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */ 4088 PetscBLASInt B_N, B_one = 1; 4089 const PetscScalar *x; 4090 PetscScalar *y; 4091 4092 PetscCall(MatScale(A_RV, m_one)); 4093 if (need_benign_correction) { 4094 ISLocalToGlobalMapping RtoN; 4095 IS is_p0; 4096 PetscInt *idxs_p0, n; 4097 4098 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4099 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4100 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4101 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4102 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4103 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4104 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4105 PetscCall(ISDestroy(&is_p0)); 4106 } 4107 4108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV)); 4109 if (!sparserhs || need_benign_correction) { 4110 if (lda_rhs == n_R) { 4111 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4112 } else { 4113 PetscScalar *av, *array; 4114 const PetscInt *xadj, *adjncy; 4115 PetscInt n; 4116 PetscBool flg_row; 4117 4118 array = work + lda_rhs * n_vertices; 4119 PetscCall(PetscArrayzero(array, lda_rhs * n_vertices)); 4120 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4121 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4122 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4123 for (i = 0; i < n; i++) { 4124 PetscInt j; 4125 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j]; 4126 } 4127 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4128 PetscCall(MatDestroy(&A_RV)); 4129 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV)); 4130 } 4131 if (need_benign_correction) { 4132 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4133 PetscScalar *marr; 4134 4135 PetscCall(MatDenseGetArray(A_RV, &marr)); 4136 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4137 4138 | 0 0 0 | (V) 4139 L = | 0 0 -1 | (P-p0) 4140 | 0 0 -1 | (p0) 4141 4142 */ 4143 for (i = 0; i < reuse_solver->benign_n; i++) { 4144 const PetscScalar *vals; 4145 const PetscInt *idxs, *idxs_zero; 4146 PetscInt n, j, nz; 4147 4148 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4149 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4150 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4151 for (j = 0; j < n; j++) { 4152 PetscScalar val = vals[j]; 4153 PetscInt k, col = idxs[j]; 4154 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4155 } 4156 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4157 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4158 } 4159 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4160 } 4161 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4162 Brhs = A_RV; 4163 } else { 4164 Mat tA_RVT, A_RVT; 4165 4166 if (!pcbddc->symmetric_primal) { 4167 /* A_RV already scaled by -1 */ 4168 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4169 } else { 4170 restoreavr = PETSC_TRUE; 4171 PetscCall(MatScale(A_VR, -1.0)); 4172 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4173 A_RVT = A_VR; 4174 } 4175 if (lda_rhs != n_R) { 4176 PetscScalar *aa; 4177 PetscInt r, *ii, *jj; 4178 PetscBool done; 4179 4180 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4181 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4182 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4183 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4184 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4185 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4186 } else { 4187 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4188 tA_RVT = A_RVT; 4189 } 4190 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4191 PetscCall(MatDestroy(&tA_RVT)); 4192 PetscCall(MatDestroy(&A_RVT)); 4193 } 4194 if (F) { 4195 /* need to correct the rhs */ 4196 if (need_benign_correction) { 4197 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4198 PetscScalar *marr; 4199 4200 PetscCall(MatDenseGetArray(Brhs, &marr)); 4201 if (lda_rhs != n_R) { 4202 for (i = 0; i < n_vertices; i++) { 4203 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4204 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4205 PetscCall(VecResetArray(dummy_vec)); 4206 } 4207 } else { 4208 for (i = 0; i < n_vertices; i++) { 4209 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4210 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4211 PetscCall(VecResetArray(pcbddc->vec1_R)); 4212 } 4213 } 4214 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4215 } 4216 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4217 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4218 /* need to correct the solution */ 4219 if (need_benign_correction) { 4220 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4221 PetscScalar *marr; 4222 4223 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4224 if (lda_rhs != n_R) { 4225 for (i = 0; i < n_vertices; i++) { 4226 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4227 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4228 PetscCall(VecResetArray(dummy_vec)); 4229 } 4230 } else { 4231 for (i = 0; i < n_vertices; i++) { 4232 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4233 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4234 PetscCall(VecResetArray(pcbddc->vec1_R)); 4235 } 4236 } 4237 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4238 } 4239 } else { 4240 PetscCall(MatDenseGetArray(Brhs, &y)); 4241 for (i = 0; i < n_vertices; i++) { 4242 PetscCall(VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs)); 4243 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs)); 4244 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4245 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4246 PetscCall(VecResetArray(pcbddc->vec1_R)); 4247 PetscCall(VecResetArray(pcbddc->vec2_R)); 4248 } 4249 PetscCall(MatDenseRestoreArray(Brhs, &y)); 4250 } 4251 PetscCall(MatDestroy(&A_RV)); 4252 PetscCall(MatDestroy(&Brhs)); 4253 /* S_VV and S_CV */ 4254 if (n_constraints) { 4255 Mat B; 4256 4257 PetscCall(PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices)); 4258 for (i = 0; i < n_vertices; i++) { 4259 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 4260 PetscCall(VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B)); 4261 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4262 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4263 PetscCall(VecResetArray(pcis->vec1_B)); 4264 PetscCall(VecResetArray(pcbddc->vec1_R)); 4265 } 4266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B)); 4267 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4268 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV)); 4269 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4270 PetscCall(MatProductSetFromOptions(S_CV)); 4271 PetscCall(MatProductSymbolic(S_CV)); 4272 PetscCall(MatProductNumeric(S_CV)); 4273 PetscCall(MatProductClear(S_CV)); 4274 4275 PetscCall(MatDestroy(&B)); 4276 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B)); 4277 /* Reuse B = local_auxmat2_R * S_CV */ 4278 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B)); 4279 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4280 PetscCall(MatProductSetFromOptions(B)); 4281 PetscCall(MatProductSymbolic(B)); 4282 PetscCall(MatProductNumeric(B)); 4283 4284 PetscCall(MatScale(S_CV, m_one)); 4285 PetscCall(PetscBLASIntCast(lda_rhs * n_vertices, &B_N)); 4286 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one)); 4287 PetscCall(MatDestroy(&B)); 4288 } 4289 if (lda_rhs != n_R) { 4290 PetscCall(MatDestroy(&A_RRmA_RV)); 4291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV)); 4292 PetscCall(MatDenseSetLDA(A_RRmA_RV, lda_rhs)); 4293 } 4294 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt)); 4295 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4296 if (need_benign_correction) { 4297 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4298 PetscScalar *marr, *sums; 4299 4300 PetscCall(PetscMalloc1(n_vertices, &sums)); 4301 PetscCall(MatDenseGetArray(S_VVt, &marr)); 4302 for (i = 0; i < reuse_solver->benign_n; i++) { 4303 const PetscScalar *vals; 4304 const PetscInt *idxs, *idxs_zero; 4305 PetscInt n, j, nz; 4306 4307 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4308 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4309 for (j = 0; j < n_vertices; j++) { 4310 PetscInt k; 4311 sums[j] = 0.; 4312 for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs]; 4313 } 4314 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4315 for (j = 0; j < n; j++) { 4316 PetscScalar val = vals[j]; 4317 PetscInt k; 4318 for (k = 0; k < n_vertices; k++) marr[idxs[j] + k * n_vertices] += val * sums[k]; 4319 } 4320 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4321 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4322 } 4323 PetscCall(PetscFree(sums)); 4324 PetscCall(MatDenseRestoreArray(S_VVt, &marr)); 4325 PetscCall(MatDestroy(&A_RV_bcorr)); 4326 } 4327 PetscCall(MatDestroy(&A_RRmA_RV)); 4328 PetscCall(PetscBLASIntCast(n_vertices * n_vertices, &B_N)); 4329 PetscCall(MatDenseGetArrayRead(A_VV, &x)); 4330 PetscCall(MatDenseGetArray(S_VVt, &y)); 4331 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one)); 4332 PetscCall(MatDenseRestoreArrayRead(A_VV, &x)); 4333 PetscCall(MatDenseRestoreArray(S_VVt, &y)); 4334 PetscCall(MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN)); 4335 PetscCall(MatDestroy(&S_VVt)); 4336 } else { 4337 PetscCall(MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN)); 4338 } 4339 PetscCall(MatDestroy(&A_VV)); 4340 4341 /* coarse basis functions */ 4342 for (i = 0; i < n_vertices; i++) { 4343 Vec v; 4344 PetscScalar one = 1.0, zero = 0.0; 4345 4346 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4347 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v)); 4348 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4349 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4350 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4351 PetscMPIInt rank; 4352 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank)); 4353 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4354 } 4355 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4356 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4357 PetscCall(VecAssemblyEnd(v)); 4358 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v)); 4359 4360 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4361 PetscInt j; 4362 4363 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v)); 4364 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4365 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4366 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4367 PetscMPIInt rank; 4368 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank)); 4369 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4370 } 4371 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4372 PetscCall(VecAssemblyBegin(v)); 4373 PetscCall(VecAssemblyEnd(v)); 4374 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v)); 4375 } 4376 PetscCall(VecResetArray(pcbddc->vec1_R)); 4377 } 4378 /* if n_R == 0 the object is not destroyed */ 4379 PetscCall(MatDestroy(&A_RV)); 4380 } 4381 PetscCall(VecDestroy(&dummy_vec)); 4382 4383 if (n_constraints) { 4384 Mat B; 4385 4386 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B)); 4387 PetscCall(MatScale(S_CC, m_one)); 4388 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B)); 4389 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4390 PetscCall(MatProductSetFromOptions(B)); 4391 PetscCall(MatProductSymbolic(B)); 4392 PetscCall(MatProductNumeric(B)); 4393 4394 PetscCall(MatScale(S_CC, m_one)); 4395 if (n_vertices) { 4396 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4397 PetscCall(MatTransposeSetPrecursor(S_CV, S_VC)); 4398 PetscCall(MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC)); 4399 } else { 4400 Mat S_VCt; 4401 4402 if (lda_rhs != n_R) { 4403 PetscCall(MatDestroy(&B)); 4404 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B)); 4405 PetscCall(MatDenseSetLDA(B, lda_rhs)); 4406 } 4407 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt)); 4408 PetscCall(MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN)); 4409 PetscCall(MatDestroy(&S_VCt)); 4410 } 4411 } 4412 PetscCall(MatDestroy(&B)); 4413 /* coarse basis functions */ 4414 for (i = 0; i < n_constraints; i++) { 4415 Vec v; 4416 4417 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4418 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4419 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4420 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4421 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4422 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4423 PetscInt j; 4424 PetscScalar zero = 0.0; 4425 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4426 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4427 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4428 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4429 PetscCall(VecAssemblyBegin(v)); 4430 PetscCall(VecAssemblyEnd(v)); 4431 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4432 } 4433 PetscCall(VecResetArray(pcbddc->vec1_R)); 4434 } 4435 } 4436 if (n_constraints) PetscCall(MatDestroy(&local_auxmat2_R)); 4437 PetscCall(PetscFree(p0_lidx_I)); 4438 4439 /* coarse matrix entries relative to B_0 */ 4440 if (pcbddc->benign_n) { 4441 Mat B0_B, B0_BPHI; 4442 IS is_dummy; 4443 const PetscScalar *data; 4444 PetscInt j; 4445 4446 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4447 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4448 PetscCall(ISDestroy(&is_dummy)); 4449 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4450 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4451 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 4452 for (j = 0; j < pcbddc->benign_n; j++) { 4453 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4454 for (i = 0; i < pcbddc->local_primal_size; i++) { 4455 coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j]; 4456 coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j]; 4457 } 4458 } 4459 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 4460 PetscCall(MatDestroy(&B0_B)); 4461 PetscCall(MatDestroy(&B0_BPHI)); 4462 } 4463 4464 /* compute other basis functions for non-symmetric problems */ 4465 if (!pcbddc->symmetric_primal) { 4466 Mat B_V = NULL, B_C = NULL; 4467 PetscScalar *marray; 4468 4469 if (n_constraints) { 4470 Mat S_CCT, C_CRT; 4471 4472 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 4473 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 4474 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C)); 4475 PetscCall(MatDestroy(&S_CCT)); 4476 if (n_vertices) { 4477 Mat S_VCT; 4478 4479 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 4480 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V)); 4481 PetscCall(MatDestroy(&S_VCT)); 4482 } 4483 PetscCall(MatDestroy(&C_CRT)); 4484 } else { 4485 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 4486 } 4487 if (n_vertices && n_R) { 4488 PetscScalar *av, *marray; 4489 const PetscInt *xadj, *adjncy; 4490 PetscInt n; 4491 PetscBool flg_row; 4492 4493 /* B_V = B_V - A_VR^T */ 4494 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4495 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4496 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 4497 PetscCall(MatDenseGetArray(B_V, &marray)); 4498 for (i = 0; i < n; i++) { 4499 PetscInt j; 4500 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 4501 } 4502 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4503 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4504 PetscCall(MatDestroy(&A_VR)); 4505 } 4506 4507 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4508 if (n_vertices) { 4509 PetscCall(MatDenseGetArray(B_V, &marray)); 4510 for (i = 0; i < n_vertices; i++) { 4511 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 4512 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4513 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4514 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4515 PetscCall(VecResetArray(pcbddc->vec1_R)); 4516 PetscCall(VecResetArray(pcbddc->vec2_R)); 4517 } 4518 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4519 } 4520 if (B_C) { 4521 PetscCall(MatDenseGetArray(B_C, &marray)); 4522 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 4523 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 4524 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4525 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4526 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4527 PetscCall(VecResetArray(pcbddc->vec1_R)); 4528 PetscCall(VecResetArray(pcbddc->vec2_R)); 4529 } 4530 PetscCall(MatDenseRestoreArray(B_C, &marray)); 4531 } 4532 /* coarse basis functions */ 4533 for (i = 0; i < pcbddc->local_primal_size; i++) { 4534 Vec v; 4535 4536 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 4537 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 4538 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4539 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4540 if (i < n_vertices) { 4541 PetscScalar one = 1.0; 4542 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4543 PetscCall(VecAssemblyBegin(v)); 4544 PetscCall(VecAssemblyEnd(v)); 4545 } 4546 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 4547 4548 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4549 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 4550 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4551 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4552 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 4553 } 4554 PetscCall(VecResetArray(pcbddc->vec1_R)); 4555 } 4556 PetscCall(MatDestroy(&B_V)); 4557 PetscCall(MatDestroy(&B_C)); 4558 } 4559 4560 /* free memory */ 4561 PetscCall(PetscFree(idx_V_B)); 4562 PetscCall(MatDestroy(&S_VV)); 4563 PetscCall(MatDestroy(&S_CV)); 4564 PetscCall(MatDestroy(&S_VC)); 4565 PetscCall(MatDestroy(&S_CC)); 4566 PetscCall(PetscFree(work)); 4567 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 4568 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 4569 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4570 4571 /* Checking coarse_sub_mat and coarse basis functions */ 4572 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4573 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4574 if (pcbddc->dbg_flag) { 4575 Mat coarse_sub_mat; 4576 Mat AUXMAT, TM1, TM2, TM3, TM4; 4577 Mat coarse_phi_D, coarse_phi_B; 4578 Mat coarse_psi_D, coarse_psi_B; 4579 Mat A_II, A_BB, A_IB, A_BI; 4580 Mat C_B, CPHI; 4581 IS is_dummy; 4582 Vec mones; 4583 MatType checkmattype = MATSEQAIJ; 4584 PetscReal real_value; 4585 4586 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4587 Mat A; 4588 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 4589 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 4590 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 4591 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 4592 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 4593 PetscCall(MatDestroy(&A)); 4594 } else { 4595 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 4596 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 4597 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 4598 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 4599 } 4600 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 4601 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 4602 if (!pcbddc->symmetric_primal) { 4603 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 4604 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 4605 } 4606 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat)); 4607 4608 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 4609 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 4610 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4611 if (!pcbddc->symmetric_primal) { 4612 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4613 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4614 PetscCall(MatDestroy(&AUXMAT)); 4615 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4616 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4617 PetscCall(MatDestroy(&AUXMAT)); 4618 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4619 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4620 PetscCall(MatDestroy(&AUXMAT)); 4621 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4622 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4623 PetscCall(MatDestroy(&AUXMAT)); 4624 } else { 4625 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4626 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4627 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4628 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4629 PetscCall(MatDestroy(&AUXMAT)); 4630 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4631 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4632 PetscCall(MatDestroy(&AUXMAT)); 4633 } 4634 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 4635 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 4636 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 4637 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 4638 if (pcbddc->benign_n) { 4639 Mat B0_B, B0_BPHI; 4640 const PetscScalar *data2; 4641 PetscScalar *data; 4642 PetscInt j; 4643 4644 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4645 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4646 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4647 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4648 PetscCall(MatDenseGetArray(TM1, &data)); 4649 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 4650 for (j = 0; j < pcbddc->benign_n; j++) { 4651 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4652 for (i = 0; i < pcbddc->local_primal_size; i++) { 4653 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 4654 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 4655 } 4656 } 4657 PetscCall(MatDenseRestoreArray(TM1, &data)); 4658 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 4659 PetscCall(MatDestroy(&B0_B)); 4660 PetscCall(ISDestroy(&is_dummy)); 4661 PetscCall(MatDestroy(&B0_BPHI)); 4662 } 4663 #if 0 4664 { 4665 PetscViewer viewer; 4666 char filename[256]; 4667 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 4668 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4669 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4670 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4671 PetscCall(MatView(coarse_sub_mat,viewer)); 4672 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4673 PetscCall(MatView(TM1,viewer)); 4674 if (pcbddc->coarse_phi_B) { 4675 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4676 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4677 } 4678 if (pcbddc->coarse_phi_D) { 4679 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4680 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4681 } 4682 if (pcbddc->coarse_psi_B) { 4683 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4684 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4685 } 4686 if (pcbddc->coarse_psi_D) { 4687 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4688 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4689 } 4690 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4691 PetscCall(MatView(pcbddc->local_mat,viewer)); 4692 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4693 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4694 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4695 PetscCall(ISView(pcis->is_I_local,viewer)); 4696 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4697 PetscCall(ISView(pcis->is_B_local,viewer)); 4698 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4699 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4700 PetscCall(PetscViewerDestroy(&viewer)); 4701 } 4702 #endif 4703 PetscCall(MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN)); 4704 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 4705 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4706 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4707 4708 /* check constraints */ 4709 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 4710 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4711 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4712 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4713 } else { 4714 PetscScalar *data; 4715 Mat tmat; 4716 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 4717 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 4718 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 4719 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4720 PetscCall(MatDestroy(&tmat)); 4721 } 4722 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 4723 PetscCall(VecSet(mones, -1.0)); 4724 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4725 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4726 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4727 if (!pcbddc->symmetric_primal) { 4728 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 4729 PetscCall(VecSet(mones, -1.0)); 4730 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4731 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4732 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4733 } 4734 PetscCall(MatDestroy(&C_B)); 4735 PetscCall(MatDestroy(&CPHI)); 4736 PetscCall(ISDestroy(&is_dummy)); 4737 PetscCall(VecDestroy(&mones)); 4738 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4739 PetscCall(MatDestroy(&A_II)); 4740 PetscCall(MatDestroy(&A_BB)); 4741 PetscCall(MatDestroy(&A_IB)); 4742 PetscCall(MatDestroy(&A_BI)); 4743 PetscCall(MatDestroy(&TM1)); 4744 PetscCall(MatDestroy(&TM2)); 4745 PetscCall(MatDestroy(&TM3)); 4746 PetscCall(MatDestroy(&TM4)); 4747 PetscCall(MatDestroy(&coarse_phi_D)); 4748 PetscCall(MatDestroy(&coarse_phi_B)); 4749 if (!pcbddc->symmetric_primal) { 4750 PetscCall(MatDestroy(&coarse_psi_D)); 4751 PetscCall(MatDestroy(&coarse_psi_B)); 4752 } 4753 PetscCall(MatDestroy(&coarse_sub_mat)); 4754 } 4755 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4756 { 4757 PetscBool gpu; 4758 4759 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu)); 4760 if (gpu) { 4761 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 4762 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 4763 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 4764 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 4765 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 4766 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 4767 } 4768 } 4769 /* get back data */ 4770 *coarse_submat_vals_n = coarse_submat_vals; 4771 PetscFunctionReturn(PETSC_SUCCESS); 4772 } 4773 4774 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 4775 { 4776 Mat *work_mat; 4777 IS isrow_s, iscol_s; 4778 PetscBool rsorted, csorted; 4779 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 4780 4781 PetscFunctionBegin; 4782 PetscCall(ISSorted(isrow, &rsorted)); 4783 PetscCall(ISSorted(iscol, &csorted)); 4784 PetscCall(ISGetLocalSize(isrow, &rsize)); 4785 PetscCall(ISGetLocalSize(iscol, &csize)); 4786 4787 if (!rsorted) { 4788 const PetscInt *idxs; 4789 PetscInt *idxs_sorted, i; 4790 4791 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 4792 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 4793 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 4794 PetscCall(ISGetIndices(isrow, &idxs)); 4795 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 4796 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4797 PetscCall(ISRestoreIndices(isrow, &idxs)); 4798 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 4799 } else { 4800 PetscCall(PetscObjectReference((PetscObject)isrow)); 4801 isrow_s = isrow; 4802 } 4803 4804 if (!csorted) { 4805 if (isrow == iscol) { 4806 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4807 iscol_s = isrow_s; 4808 } else { 4809 const PetscInt *idxs; 4810 PetscInt *idxs_sorted, i; 4811 4812 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 4813 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 4814 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 4815 PetscCall(ISGetIndices(iscol, &idxs)); 4816 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 4817 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4818 PetscCall(ISRestoreIndices(iscol, &idxs)); 4819 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 4820 } 4821 } else { 4822 PetscCall(PetscObjectReference((PetscObject)iscol)); 4823 iscol_s = iscol; 4824 } 4825 4826 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 4827 4828 if (!rsorted || !csorted) { 4829 Mat new_mat; 4830 IS is_perm_r, is_perm_c; 4831 4832 if (!rsorted) { 4833 PetscInt *idxs_r, i; 4834 PetscCall(PetscMalloc1(rsize, &idxs_r)); 4835 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 4836 PetscCall(PetscFree(idxs_perm_r)); 4837 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 4838 } else { 4839 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 4840 } 4841 PetscCall(ISSetPermutation(is_perm_r)); 4842 4843 if (!csorted) { 4844 if (isrow_s == iscol_s) { 4845 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 4846 is_perm_c = is_perm_r; 4847 } else { 4848 PetscInt *idxs_c, i; 4849 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 4850 PetscCall(PetscMalloc1(csize, &idxs_c)); 4851 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 4852 PetscCall(PetscFree(idxs_perm_c)); 4853 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 4854 } 4855 } else { 4856 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 4857 } 4858 PetscCall(ISSetPermutation(is_perm_c)); 4859 4860 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 4861 PetscCall(MatDestroy(&work_mat[0])); 4862 work_mat[0] = new_mat; 4863 PetscCall(ISDestroy(&is_perm_r)); 4864 PetscCall(ISDestroy(&is_perm_c)); 4865 } 4866 4867 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 4868 *B = work_mat[0]; 4869 PetscCall(MatDestroyMatrices(1, &work_mat)); 4870 PetscCall(ISDestroy(&isrow_s)); 4871 PetscCall(ISDestroy(&iscol_s)); 4872 PetscFunctionReturn(PETSC_SUCCESS); 4873 } 4874 4875 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4876 { 4877 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 4878 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4879 Mat new_mat, lA; 4880 IS is_local, is_global; 4881 PetscInt local_size; 4882 PetscBool isseqaij, issym, isset; 4883 4884 PetscFunctionBegin; 4885 PetscCall(MatDestroy(&pcbddc->local_mat)); 4886 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 4887 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 4888 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 4889 PetscCall(ISDestroy(&is_local)); 4890 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 4891 PetscCall(ISDestroy(&is_global)); 4892 4893 if (pcbddc->dbg_flag) { 4894 Vec x, x_change; 4895 PetscReal error; 4896 4897 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 4898 PetscCall(VecSetRandom(x, NULL)); 4899 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 4900 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4901 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4902 PetscCall(MatMult(new_mat, matis->x, matis->y)); 4903 if (!pcbddc->change_interior) { 4904 const PetscScalar *x, *y, *v; 4905 PetscReal lerror = 0.; 4906 PetscInt i; 4907 4908 PetscCall(VecGetArrayRead(matis->x, &x)); 4909 PetscCall(VecGetArrayRead(matis->y, &y)); 4910 PetscCall(VecGetArrayRead(matis->counter, &v)); 4911 for (i = 0; i < local_size; i++) 4912 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 4913 PetscCall(VecRestoreArrayRead(matis->x, &x)); 4914 PetscCall(VecRestoreArrayRead(matis->y, &y)); 4915 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 4916 PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 4917 if (error > PETSC_SMALL) { 4918 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4919 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 4920 } else { 4921 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 4922 } 4923 } 4924 } 4925 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4926 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4927 PetscCall(VecAXPY(x, -1.0, x_change)); 4928 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 4929 if (error > PETSC_SMALL) { 4930 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4931 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 4932 } else { 4933 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 4934 } 4935 } 4936 PetscCall(VecDestroy(&x)); 4937 PetscCall(VecDestroy(&x_change)); 4938 } 4939 4940 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4941 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 4942 4943 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4944 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 4945 if (isseqaij) { 4946 PetscCall(MatDestroy(&pcbddc->local_mat)); 4947 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 4948 if (lA) { 4949 Mat work; 4950 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 4951 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 4952 PetscCall(MatDestroy(&work)); 4953 } 4954 } else { 4955 Mat work_mat; 4956 4957 PetscCall(MatDestroy(&pcbddc->local_mat)); 4958 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 4959 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 4960 PetscCall(MatDestroy(&work_mat)); 4961 if (lA) { 4962 Mat work; 4963 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 4964 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 4965 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 4966 PetscCall(MatDestroy(&work)); 4967 } 4968 } 4969 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 4970 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 4971 PetscCall(MatDestroy(&new_mat)); 4972 PetscFunctionReturn(PETSC_SUCCESS); 4973 } 4974 4975 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4976 { 4977 PC_IS *pcis = (PC_IS *)(pc->data); 4978 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4979 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4980 PetscInt *idx_R_local = NULL; 4981 PetscInt n_vertices, i, j, n_R, n_D, n_B; 4982 PetscInt vbs, bs; 4983 PetscBT bitmask = NULL; 4984 4985 PetscFunctionBegin; 4986 /* 4987 No need to setup local scatters if 4988 - primal space is unchanged 4989 AND 4990 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4991 AND 4992 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4993 */ 4994 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 4995 /* destroy old objects */ 4996 PetscCall(ISDestroy(&pcbddc->is_R_local)); 4997 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 4998 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 4999 /* Set Non-overlapping dimensions */ 5000 n_B = pcis->n_B; 5001 n_D = pcis->n - n_B; 5002 n_vertices = pcbddc->n_vertices; 5003 5004 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5005 5006 /* create auxiliary bitmask and allocate workspace */ 5007 if (!sub_schurs || !sub_schurs->reuse_solver) { 5008 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5009 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5010 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5011 5012 for (i = 0, n_R = 0; i < pcis->n; i++) { 5013 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5014 } 5015 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5016 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5017 5018 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5019 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5020 } 5021 5022 /* Block code */ 5023 vbs = 1; 5024 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5025 if (bs > 1 && !(n_vertices % bs)) { 5026 PetscBool is_blocked = PETSC_TRUE; 5027 PetscInt *vary; 5028 if (!sub_schurs || !sub_schurs->reuse_solver) { 5029 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5030 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5031 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5032 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5033 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5034 for (i = 0; i < pcis->n / bs; i++) { 5035 if (vary[i] != 0 && vary[i] != bs) { 5036 is_blocked = PETSC_FALSE; 5037 break; 5038 } 5039 } 5040 PetscCall(PetscFree(vary)); 5041 } else { 5042 /* Verify directly the R set */ 5043 for (i = 0; i < n_R / bs; i++) { 5044 PetscInt j, node = idx_R_local[bs * i]; 5045 for (j = 1; j < bs; j++) { 5046 if (node != idx_R_local[bs * i + j] - j) { 5047 is_blocked = PETSC_FALSE; 5048 break; 5049 } 5050 } 5051 } 5052 } 5053 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5054 vbs = bs; 5055 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5056 } 5057 } 5058 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5059 if (sub_schurs && sub_schurs->reuse_solver) { 5060 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5061 5062 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5063 PetscCall(ISDestroy(&reuse_solver->is_R)); 5064 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5065 reuse_solver->is_R = pcbddc->is_R_local; 5066 } else { 5067 PetscCall(PetscFree(idx_R_local)); 5068 } 5069 5070 /* print some info if requested */ 5071 if (pcbddc->dbg_flag) { 5072 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5073 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5074 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5075 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5076 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5077 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5078 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5079 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5080 } 5081 5082 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5083 if (!sub_schurs || !sub_schurs->reuse_solver) { 5084 IS is_aux1, is_aux2; 5085 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5086 5087 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5088 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5089 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5090 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5091 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5092 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5093 for (i = 0, j = 0; i < n_R; i++) { 5094 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5095 } 5096 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5097 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5098 for (i = 0, j = 0; i < n_B; i++) { 5099 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5100 } 5101 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5102 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5103 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5104 PetscCall(ISDestroy(&is_aux1)); 5105 PetscCall(ISDestroy(&is_aux2)); 5106 5107 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5108 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5109 for (i = 0, j = 0; i < n_R; i++) { 5110 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5111 } 5112 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5113 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5114 PetscCall(ISDestroy(&is_aux1)); 5115 } 5116 PetscCall(PetscBTDestroy(&bitmask)); 5117 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5118 } else { 5119 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5120 IS tis; 5121 PetscInt schur_size; 5122 5123 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5124 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5125 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5126 PetscCall(ISDestroy(&tis)); 5127 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5128 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5129 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5130 PetscCall(ISDestroy(&tis)); 5131 } 5132 } 5133 PetscFunctionReturn(PETSC_SUCCESS); 5134 } 5135 5136 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5137 { 5138 MatNullSpace NullSpace; 5139 Mat dmat; 5140 const Vec *nullvecs; 5141 Vec v, v2, *nullvecs2; 5142 VecScatter sct = NULL; 5143 PetscContainer c; 5144 PetscScalar *ddata; 5145 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5146 PetscBool nnsp_has_cnst; 5147 5148 PetscFunctionBegin; 5149 if (!is && !B) { /* MATIS */ 5150 Mat_IS *matis = (Mat_IS *)A->data; 5151 5152 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5153 sct = matis->cctx; 5154 PetscCall(PetscObjectReference((PetscObject)sct)); 5155 } else { 5156 PetscCall(MatGetNullSpace(B, &NullSpace)); 5157 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5158 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5159 } 5160 PetscCall(MatGetNullSpace(A, &NullSpace)); 5161 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5162 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5163 5164 PetscCall(MatCreateVecs(A, &v, NULL)); 5165 PetscCall(MatCreateVecs(B, &v2, NULL)); 5166 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5167 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs)); 5168 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5169 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5170 PetscCall(VecGetBlockSize(v2, &bs)); 5171 PetscCall(VecGetSize(v2, &N)); 5172 PetscCall(VecGetLocalSize(v2, &n)); 5173 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5174 for (k = 0; k < nnsp_size; k++) { 5175 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5176 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5177 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5178 } 5179 if (nnsp_has_cnst) { 5180 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5181 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5182 } 5183 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5184 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5185 5186 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5187 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c)); 5188 PetscCall(PetscContainerSetPointer(c, ddata)); 5189 PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault)); 5190 PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c)); 5191 PetscCall(PetscContainerDestroy(&c)); 5192 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5193 PetscCall(MatDestroy(&dmat)); 5194 5195 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5196 PetscCall(PetscFree(nullvecs2)); 5197 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5198 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5199 PetscCall(VecDestroy(&v)); 5200 PetscCall(VecDestroy(&v2)); 5201 PetscCall(VecScatterDestroy(&sct)); 5202 PetscFunctionReturn(PETSC_SUCCESS); 5203 } 5204 5205 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5206 { 5207 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5208 PC_IS *pcis = (PC_IS *)pc->data; 5209 PC pc_temp; 5210 Mat A_RR; 5211 MatNullSpace nnsp; 5212 MatReuse reuse; 5213 PetscScalar m_one = -1.0; 5214 PetscReal value; 5215 PetscInt n_D, n_R; 5216 PetscBool issbaij, opts, isset, issym; 5217 void (*f)(void) = NULL; 5218 char dir_prefix[256], neu_prefix[256], str_level[16]; 5219 size_t len; 5220 5221 PetscFunctionBegin; 5222 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5223 /* approximate solver, propagate NearNullSpace if needed */ 5224 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5225 MatNullSpace gnnsp1, gnnsp2; 5226 PetscBool lhas, ghas; 5227 5228 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5229 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5230 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5231 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5232 PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5233 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5234 } 5235 5236 /* compute prefixes */ 5237 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5238 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5239 if (!pcbddc->current_level) { 5240 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5241 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5242 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5243 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5244 } else { 5245 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level))); 5246 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5247 len -= 15; /* remove "pc_bddc_coarse_" */ 5248 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5249 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5250 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5251 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5252 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5253 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5254 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5255 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5256 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5257 } 5258 5259 /* DIRICHLET PROBLEM */ 5260 if (dirichlet) { 5261 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5262 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5263 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5264 if (pcbddc->dbg_flag) { 5265 Mat A_IIn; 5266 5267 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5268 PetscCall(MatDestroy(&pcis->A_II)); 5269 pcis->A_II = A_IIn; 5270 } 5271 } 5272 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5273 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5274 5275 /* Matrix for Dirichlet problem is pcis->A_II */ 5276 n_D = pcis->n - pcis->n_B; 5277 opts = PETSC_FALSE; 5278 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5279 opts = PETSC_TRUE; 5280 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5281 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5282 /* default */ 5283 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5284 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5285 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5286 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5287 if (issbaij) { 5288 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5289 } else { 5290 PetscCall(PCSetType(pc_temp, PCLU)); 5291 } 5292 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5293 } 5294 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5295 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5296 /* Allow user's customization */ 5297 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5298 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5299 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5300 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5301 } 5302 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5303 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5304 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5305 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5306 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5307 const PetscInt *idxs; 5308 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5309 5310 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5311 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5312 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5313 for (i = 0; i < nl; i++) { 5314 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5315 } 5316 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5317 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5318 PetscCall(PetscFree(scoords)); 5319 } 5320 if (sub_schurs && sub_schurs->reuse_solver) { 5321 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5322 5323 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5324 } 5325 5326 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5327 if (!n_D) { 5328 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5329 PetscCall(PCSetType(pc_temp, PCNONE)); 5330 } 5331 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5332 /* set ksp_D into pcis data */ 5333 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5334 PetscCall(KSPDestroy(&pcis->ksp_D)); 5335 pcis->ksp_D = pcbddc->ksp_D; 5336 } 5337 5338 /* NEUMANN PROBLEM */ 5339 A_RR = NULL; 5340 if (neumann) { 5341 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5342 PetscInt ibs, mbs; 5343 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5344 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5345 5346 reuse_neumann_solver = PETSC_FALSE; 5347 if (sub_schurs && sub_schurs->reuse_solver) { 5348 IS iP; 5349 5350 reuse_neumann_solver = PETSC_TRUE; 5351 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5352 if (iP) reuse_neumann_solver = PETSC_FALSE; 5353 } 5354 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5355 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5356 if (pcbddc->ksp_R) { /* already created ksp */ 5357 PetscInt nn_R; 5358 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5359 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5360 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5361 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5362 PetscCall(KSPReset(pcbddc->ksp_R)); 5363 PetscCall(MatDestroy(&A_RR)); 5364 reuse = MAT_INITIAL_MATRIX; 5365 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5366 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5367 PetscCall(MatDestroy(&A_RR)); 5368 reuse = MAT_INITIAL_MATRIX; 5369 } else { /* safe to reuse the matrix */ 5370 reuse = MAT_REUSE_MATRIX; 5371 } 5372 } 5373 /* last check */ 5374 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5375 PetscCall(MatDestroy(&A_RR)); 5376 reuse = MAT_INITIAL_MATRIX; 5377 } 5378 } else { /* first time, so we need to create the matrix */ 5379 reuse = MAT_INITIAL_MATRIX; 5380 } 5381 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5382 TODO: Get Rid of these conversions */ 5383 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5384 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5385 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5386 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5387 if (matis->A == pcbddc->local_mat) { 5388 PetscCall(MatDestroy(&pcbddc->local_mat)); 5389 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5390 } else { 5391 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5392 } 5393 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5394 if (matis->A == pcbddc->local_mat) { 5395 PetscCall(MatDestroy(&pcbddc->local_mat)); 5396 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5397 } else { 5398 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5399 } 5400 } 5401 /* extract A_RR */ 5402 if (reuse_neumann_solver) { 5403 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5404 5405 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5406 PetscCall(MatDestroy(&A_RR)); 5407 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5408 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 5409 } else { 5410 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 5411 } 5412 } else { 5413 PetscCall(MatDestroy(&A_RR)); 5414 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 5415 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5416 } 5417 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5418 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 5419 } 5420 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5421 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 5422 opts = PETSC_FALSE; 5423 if (!pcbddc->ksp_R) { /* create object if not present */ 5424 opts = PETSC_TRUE; 5425 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 5426 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 5427 /* default */ 5428 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 5429 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 5430 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5431 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 5432 if (issbaij) { 5433 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5434 } else { 5435 PetscCall(PCSetType(pc_temp, PCLU)); 5436 } 5437 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 5438 } 5439 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 5440 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 5441 if (opts) { /* Allow user's customization once */ 5442 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5443 } 5444 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5445 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5446 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 5447 } 5448 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5449 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5450 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5451 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5452 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5453 const PetscInt *idxs; 5454 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5455 5456 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 5457 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 5458 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5459 for (i = 0; i < nl; i++) { 5460 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5461 } 5462 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 5463 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5464 PetscCall(PetscFree(scoords)); 5465 } 5466 5467 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5468 if (!n_R) { 5469 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5470 PetscCall(PCSetType(pc_temp, PCNONE)); 5471 } 5472 /* Reuse solver if it is present */ 5473 if (reuse_neumann_solver) { 5474 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5475 5476 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 5477 } 5478 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5479 } 5480 5481 if (pcbddc->dbg_flag) { 5482 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5483 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5484 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5485 } 5486 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5487 5488 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5489 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 5490 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 5491 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 5492 /* check Dirichlet and Neumann solvers */ 5493 if (pcbddc->dbg_flag) { 5494 if (dirichlet) { /* Dirichlet */ 5495 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 5496 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 5497 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 5498 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 5499 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 5500 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 5501 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_D))->prefix, (double)value)); 5502 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5503 } 5504 if (neumann) { /* Neumann */ 5505 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 5506 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 5507 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 5508 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5509 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 5510 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 5511 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_R))->prefix, (double)value)); 5512 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5513 } 5514 } 5515 /* free Neumann problem's matrix */ 5516 PetscCall(MatDestroy(&A_RR)); 5517 PetscFunctionReturn(PETSC_SUCCESS); 5518 } 5519 5520 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5521 { 5522 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5523 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5524 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5525 5526 PetscFunctionBegin; 5527 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 5528 if (!pcbddc->switch_static) { 5529 if (applytranspose && pcbddc->local_auxmat1) { 5530 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 5531 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5532 } 5533 if (!reuse_solver) { 5534 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5535 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5536 } else { 5537 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5538 5539 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5540 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5541 } 5542 } else { 5543 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5544 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5545 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5546 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5547 if (applytranspose && pcbddc->local_auxmat1) { 5548 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 5549 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5550 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5551 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5552 } 5553 } 5554 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5555 if (!reuse_solver || pcbddc->switch_static) { 5556 if (applytranspose) { 5557 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5558 } else { 5559 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5560 } 5561 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 5562 } else { 5563 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5564 5565 if (applytranspose) { 5566 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5567 } else { 5568 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5569 } 5570 } 5571 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5572 PetscCall(VecSet(inout_B, 0.)); 5573 if (!pcbddc->switch_static) { 5574 if (!reuse_solver) { 5575 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5576 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5577 } else { 5578 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5579 5580 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5581 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5582 } 5583 if (!applytranspose && pcbddc->local_auxmat1) { 5584 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5585 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 5586 } 5587 } else { 5588 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5589 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5590 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5591 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5592 if (!applytranspose && pcbddc->local_auxmat1) { 5593 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5594 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 5595 } 5596 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5597 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5598 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5599 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5600 } 5601 PetscFunctionReturn(PETSC_SUCCESS); 5602 } 5603 5604 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5605 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5606 { 5607 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5608 PC_IS *pcis = (PC_IS *)(pc->data); 5609 const PetscScalar zero = 0.0; 5610 5611 PetscFunctionBegin; 5612 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5613 if (!pcbddc->benign_apply_coarse_only) { 5614 if (applytranspose) { 5615 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 5616 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5617 } else { 5618 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 5619 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5620 } 5621 } else { 5622 PetscCall(VecSet(pcbddc->vec1_P, zero)); 5623 } 5624 5625 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5626 if (pcbddc->benign_n) { 5627 PetscScalar *array; 5628 PetscInt j; 5629 5630 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5631 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 5632 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5633 } 5634 5635 /* start communications from local primal nodes to rhs of coarse solver */ 5636 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 5637 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 5638 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 5639 5640 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5641 if (pcbddc->coarse_ksp) { 5642 Mat coarse_mat; 5643 Vec rhs, sol; 5644 MatNullSpace nullsp; 5645 PetscBool isbddc = PETSC_FALSE; 5646 5647 if (pcbddc->benign_have_null) { 5648 PC coarse_pc; 5649 5650 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5651 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 5652 /* we need to propagate to coarser levels the need for a possible benign correction */ 5653 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5654 PC_BDDC *coarsepcbddc = (PC_BDDC *)(coarse_pc->data); 5655 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5656 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5657 } 5658 } 5659 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 5660 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 5661 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 5662 if (applytranspose) { 5663 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 5664 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5665 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 5666 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5667 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5668 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 5669 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5670 } else { 5671 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 5672 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5673 PC coarse_pc; 5674 5675 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 5676 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5677 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 5678 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 5679 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 5680 } else { 5681 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5682 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 5683 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5684 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5685 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5686 } 5687 } 5688 /* we don't need the benign correction at coarser levels anymore */ 5689 if (pcbddc->benign_have_null && isbddc) { 5690 PC coarse_pc; 5691 PC_BDDC *coarsepcbddc; 5692 5693 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5694 coarsepcbddc = (PC_BDDC *)(coarse_pc->data); 5695 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5696 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5697 } 5698 } 5699 5700 /* Local solution on R nodes */ 5701 if (pcis->n && !pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 5702 /* communications from coarse sol to local primal nodes */ 5703 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 5704 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 5705 5706 /* Sum contributions from the two levels */ 5707 if (!pcbddc->benign_apply_coarse_only) { 5708 if (applytranspose) { 5709 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5710 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5711 } else { 5712 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5713 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5714 } 5715 /* store p0 */ 5716 if (pcbddc->benign_n) { 5717 PetscScalar *array; 5718 PetscInt j; 5719 5720 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5721 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 5722 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5723 } 5724 } else { /* expand the coarse solution */ 5725 if (applytranspose) { 5726 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 5727 } else { 5728 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 5729 } 5730 } 5731 PetscFunctionReturn(PETSC_SUCCESS); 5732 } 5733 5734 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 5735 { 5736 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5737 Vec from, to; 5738 const PetscScalar *array; 5739 5740 PetscFunctionBegin; 5741 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5742 from = pcbddc->coarse_vec; 5743 to = pcbddc->vec1_P; 5744 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5745 Vec tvec; 5746 5747 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5748 PetscCall(VecResetArray(tvec)); 5749 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 5750 PetscCall(VecGetArrayRead(tvec, &array)); 5751 PetscCall(VecPlaceArray(from, array)); 5752 PetscCall(VecRestoreArrayRead(tvec, &array)); 5753 } 5754 } else { /* from local to global -> put data in coarse right hand side */ 5755 from = pcbddc->vec1_P; 5756 to = pcbddc->coarse_vec; 5757 } 5758 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5759 PetscFunctionReturn(PETSC_SUCCESS); 5760 } 5761 5762 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5763 { 5764 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5765 Vec from, to; 5766 const PetscScalar *array; 5767 5768 PetscFunctionBegin; 5769 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5770 from = pcbddc->coarse_vec; 5771 to = pcbddc->vec1_P; 5772 } else { /* from local to global -> put data in coarse right hand side */ 5773 from = pcbddc->vec1_P; 5774 to = pcbddc->coarse_vec; 5775 } 5776 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5777 if (smode == SCATTER_FORWARD) { 5778 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5779 Vec tvec; 5780 5781 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5782 PetscCall(VecGetArrayRead(to, &array)); 5783 PetscCall(VecPlaceArray(tvec, array)); 5784 PetscCall(VecRestoreArrayRead(to, &array)); 5785 } 5786 } else { 5787 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5788 PetscCall(VecResetArray(from)); 5789 } 5790 } 5791 PetscFunctionReturn(PETSC_SUCCESS); 5792 } 5793 5794 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5795 { 5796 PC_IS *pcis = (PC_IS *)(pc->data); 5797 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5798 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5799 /* one and zero */ 5800 PetscScalar one = 1.0, zero = 0.0; 5801 /* space to store constraints and their local indices */ 5802 PetscScalar *constraints_data; 5803 PetscInt *constraints_idxs, *constraints_idxs_B; 5804 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 5805 PetscInt *constraints_n; 5806 /* iterators */ 5807 PetscInt i, j, k, total_counts, total_counts_cc, cum; 5808 /* BLAS integers */ 5809 PetscBLASInt lwork, lierr; 5810 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 5811 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 5812 /* reuse */ 5813 PetscInt olocal_primal_size, olocal_primal_size_cc; 5814 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 5815 /* change of basis */ 5816 PetscBool qr_needed; 5817 PetscBT change_basis, qr_needed_idx; 5818 /* auxiliary stuff */ 5819 PetscInt *nnz, *is_indices; 5820 PetscInt ncc; 5821 /* some quantities */ 5822 PetscInt n_vertices, total_primal_vertices, valid_constraints; 5823 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 5824 PetscReal tol; /* tolerance for retaining eigenmodes */ 5825 5826 PetscFunctionBegin; 5827 tol = PetscSqrtReal(PETSC_SMALL); 5828 /* Destroy Mat objects computed previously */ 5829 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 5830 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 5831 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 5832 /* save info on constraints from previous setup (if any) */ 5833 olocal_primal_size = pcbddc->local_primal_size; 5834 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5835 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 5836 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 5837 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 5838 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 5839 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 5840 5841 if (!pcbddc->adaptive_selection) { 5842 IS ISForVertices, *ISForFaces, *ISForEdges; 5843 MatNullSpace nearnullsp; 5844 const Vec *nearnullvecs; 5845 Vec *localnearnullsp; 5846 PetscScalar *array; 5847 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 5848 PetscBool nnsp_has_cnst; 5849 /* LAPACK working arrays for SVD or POD */ 5850 PetscBool skip_lapack, boolforchange; 5851 PetscScalar *work; 5852 PetscReal *singular_vals; 5853 #if defined(PETSC_USE_COMPLEX) 5854 PetscReal *rwork; 5855 #endif 5856 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 5857 PetscBLASInt dummy_int = 1; 5858 PetscScalar dummy_scalar = 1.; 5859 PetscBool use_pod = PETSC_FALSE; 5860 5861 /* MKL SVD with same input gives different results on different processes! */ 5862 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 5863 use_pod = PETSC_TRUE; 5864 #endif 5865 /* Get index sets for faces, edges and vertices from graph */ 5866 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 5867 o_nf = n_ISForFaces; 5868 o_ne = n_ISForEdges; 5869 n_vertices = 0; 5870 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 5871 /* print some info */ 5872 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5873 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 5874 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 5875 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5876 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 5877 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 5878 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 5879 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 5880 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5881 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 5882 } 5883 5884 if (!pcbddc->use_vertices) n_vertices = 0; 5885 if (!pcbddc->use_edges) n_ISForEdges = 0; 5886 if (!pcbddc->use_faces) n_ISForFaces = 0; 5887 5888 /* check if near null space is attached to global mat */ 5889 if (pcbddc->use_nnsp) { 5890 PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 5891 } else nearnullsp = NULL; 5892 5893 if (nearnullsp) { 5894 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 5895 /* remove any stored info */ 5896 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 5897 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 5898 /* store information for BDDC solver reuse */ 5899 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 5900 pcbddc->onearnullspace = nearnullsp; 5901 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 5902 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 5903 } else { /* if near null space is not provided BDDC uses constants by default */ 5904 nnsp_size = 0; 5905 nnsp_has_cnst = PETSC_TRUE; 5906 } 5907 /* get max number of constraints on a single cc */ 5908 max_constraints = nnsp_size; 5909 if (nnsp_has_cnst) max_constraints++; 5910 5911 /* 5912 Evaluate maximum storage size needed by the procedure 5913 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5914 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5915 There can be multiple constraints per connected component 5916 */ 5917 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 5918 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 5919 5920 total_counts = n_ISForFaces + n_ISForEdges; 5921 total_counts *= max_constraints; 5922 total_counts += n_vertices; 5923 PetscCall(PetscBTCreate(total_counts, &change_basis)); 5924 5925 total_counts = 0; 5926 max_size_of_constraint = 0; 5927 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 5928 IS used_is; 5929 if (i < n_ISForEdges) { 5930 used_is = ISForEdges[i]; 5931 } else { 5932 used_is = ISForFaces[i - n_ISForEdges]; 5933 } 5934 PetscCall(ISGetSize(used_is, &j)); 5935 total_counts += j; 5936 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 5937 } 5938 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 5939 5940 /* get local part of global near null space vectors */ 5941 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 5942 for (k = 0; k < nnsp_size; k++) { 5943 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 5944 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 5945 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 5946 } 5947 5948 /* whether or not to skip lapack calls */ 5949 skip_lapack = PETSC_TRUE; 5950 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5951 5952 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5953 if (!skip_lapack) { 5954 PetscScalar temp_work; 5955 5956 if (use_pod) { 5957 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5958 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 5959 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 5960 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 5961 #if defined(PETSC_USE_COMPLEX) 5962 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 5963 #endif 5964 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5965 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 5966 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 5967 lwork = -1; 5968 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 5969 #if !defined(PETSC_USE_COMPLEX) 5970 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 5971 #else 5972 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 5973 #endif 5974 PetscCall(PetscFPTrapPop()); 5975 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr); 5976 } else { 5977 #if !defined(PETSC_MISSING_LAPACK_GESVD) 5978 /* SVD */ 5979 PetscInt max_n, min_n; 5980 max_n = max_size_of_constraint; 5981 min_n = max_constraints; 5982 if (max_size_of_constraint < max_constraints) { 5983 min_n = max_size_of_constraint; 5984 max_n = max_constraints; 5985 } 5986 PetscCall(PetscMalloc1(min_n, &singular_vals)); 5987 #if defined(PETSC_USE_COMPLEX) 5988 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 5989 #endif 5990 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5991 lwork = -1; 5992 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 5993 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 5994 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 5995 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 5996 #if !defined(PETSC_USE_COMPLEX) 5997 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 5998 #else 5999 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6000 #endif 6001 PetscCall(PetscFPTrapPop()); 6002 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr); 6003 #else 6004 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6005 #endif /* on missing GESVD */ 6006 } 6007 /* Allocate optimal workspace */ 6008 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6009 PetscCall(PetscMalloc1(lwork, &work)); 6010 } 6011 /* Now we can loop on constraining sets */ 6012 total_counts = 0; 6013 constraints_idxs_ptr[0] = 0; 6014 constraints_data_ptr[0] = 0; 6015 /* vertices */ 6016 if (n_vertices) { 6017 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6018 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6019 for (i = 0; i < n_vertices; i++) { 6020 constraints_n[total_counts] = 1; 6021 constraints_data[total_counts] = 1.0; 6022 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6023 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6024 total_counts++; 6025 } 6026 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6027 } 6028 6029 /* edges and faces */ 6030 total_counts_cc = total_counts; 6031 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6032 IS used_is; 6033 PetscBool idxs_copied = PETSC_FALSE; 6034 6035 if (ncc < n_ISForEdges) { 6036 used_is = ISForEdges[ncc]; 6037 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6038 } else { 6039 used_is = ISForFaces[ncc - n_ISForEdges]; 6040 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6041 } 6042 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6043 6044 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6045 if (!size_of_constraint) continue; 6046 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6047 /* change of basis should not be performed on local periodic nodes */ 6048 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6049 if (nnsp_has_cnst) { 6050 PetscScalar quad_value; 6051 6052 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6053 idxs_copied = PETSC_TRUE; 6054 6055 if (!pcbddc->use_nnsp_true) { 6056 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6057 } else { 6058 quad_value = 1.0; 6059 } 6060 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6061 temp_constraints++; 6062 total_counts++; 6063 } 6064 for (k = 0; k < nnsp_size; k++) { 6065 PetscReal real_value; 6066 PetscScalar *ptr_to_data; 6067 6068 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6069 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6070 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6071 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6072 /* check if array is null on the connected component */ 6073 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6074 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6075 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6076 temp_constraints++; 6077 total_counts++; 6078 if (!idxs_copied) { 6079 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6080 idxs_copied = PETSC_TRUE; 6081 } 6082 } 6083 } 6084 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6085 valid_constraints = temp_constraints; 6086 if (!pcbddc->use_nnsp_true && temp_constraints) { 6087 if (temp_constraints == 1) { /* just normalize the constraint */ 6088 PetscScalar norm, *ptr_to_data; 6089 6090 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6091 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6092 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6093 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6094 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6095 } else { /* perform SVD */ 6096 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6097 6098 if (use_pod) { 6099 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6100 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6101 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6102 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6103 from that computed using LAPACKgesvd 6104 -> This is due to a different computation of eigenvectors in LAPACKheev 6105 -> The quality of the POD-computed basis will be the same */ 6106 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6107 /* Store upper triangular part of correlation matrix */ 6108 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6109 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6110 for (j = 0; j < temp_constraints; j++) { 6111 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6112 } 6113 /* compute eigenvalues and eigenvectors of correlation matrix */ 6114 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6115 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6116 #if !defined(PETSC_USE_COMPLEX) 6117 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6118 #else 6119 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6120 #endif 6121 PetscCall(PetscFPTrapPop()); 6122 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr); 6123 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6124 j = 0; 6125 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6126 total_counts = total_counts - j; 6127 valid_constraints = temp_constraints - j; 6128 /* scale and copy POD basis into used quadrature memory */ 6129 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6130 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6131 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6132 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6133 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6134 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6135 if (j < temp_constraints) { 6136 PetscInt ii; 6137 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6138 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6139 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6140 PetscCall(PetscFPTrapPop()); 6141 for (k = 0; k < temp_constraints - j; k++) { 6142 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6143 } 6144 } 6145 } else { 6146 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6147 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6148 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6149 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6150 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6151 #if !defined(PETSC_USE_COMPLEX) 6152 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6153 #else 6154 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6155 #endif 6156 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 6157 PetscCall(PetscFPTrapPop()); 6158 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6159 k = temp_constraints; 6160 if (k > size_of_constraint) k = size_of_constraint; 6161 j = 0; 6162 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6163 valid_constraints = k - j; 6164 total_counts = total_counts - temp_constraints + valid_constraints; 6165 #else 6166 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6167 #endif /* on missing GESVD */ 6168 } 6169 } 6170 } 6171 /* update pointers information */ 6172 if (valid_constraints) { 6173 constraints_n[total_counts_cc] = valid_constraints; 6174 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6175 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6176 /* set change_of_basis flag */ 6177 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6178 total_counts_cc++; 6179 } 6180 } 6181 /* free workspace */ 6182 if (!skip_lapack) { 6183 PetscCall(PetscFree(work)); 6184 #if defined(PETSC_USE_COMPLEX) 6185 PetscCall(PetscFree(rwork)); 6186 #endif 6187 PetscCall(PetscFree(singular_vals)); 6188 PetscCall(PetscFree(correlation_mat)); 6189 PetscCall(PetscFree(temp_basis)); 6190 } 6191 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6192 PetscCall(PetscFree(localnearnullsp)); 6193 /* free index sets of faces, edges and vertices */ 6194 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6195 } else { 6196 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6197 6198 total_counts = 0; 6199 n_vertices = 0; 6200 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6201 max_constraints = 0; 6202 total_counts_cc = 0; 6203 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6204 total_counts += pcbddc->adaptive_constraints_n[i]; 6205 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6206 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6207 } 6208 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6209 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6210 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6211 constraints_data = pcbddc->adaptive_constraints_data; 6212 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6213 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6214 total_counts_cc = 0; 6215 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6216 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6217 } 6218 6219 max_size_of_constraint = 0; 6220 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6221 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6222 /* Change of basis */ 6223 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6224 if (pcbddc->use_change_of_basis) { 6225 for (i = 0; i < sub_schurs->n_subs; i++) { 6226 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6227 } 6228 } 6229 } 6230 pcbddc->local_primal_size = total_counts; 6231 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6232 6233 /* map constraints_idxs in boundary numbering */ 6234 if (pcbddc->use_change_of_basis) { 6235 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6236 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6237 } 6238 6239 /* Create constraint matrix */ 6240 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6241 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6242 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6243 6244 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6245 /* determine if a QR strategy is needed for change of basis */ 6246 qr_needed = pcbddc->use_qr_single; 6247 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6248 total_primal_vertices = 0; 6249 pcbddc->local_primal_size_cc = 0; 6250 for (i = 0; i < total_counts_cc; i++) { 6251 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6252 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6253 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6254 pcbddc->local_primal_size_cc += 1; 6255 } else if (PetscBTLookup(change_basis, i)) { 6256 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6257 pcbddc->local_primal_size_cc += constraints_n[i]; 6258 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6259 PetscCall(PetscBTSet(qr_needed_idx, i)); 6260 qr_needed = PETSC_TRUE; 6261 } 6262 } else { 6263 pcbddc->local_primal_size_cc += 1; 6264 } 6265 } 6266 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6267 pcbddc->n_vertices = total_primal_vertices; 6268 /* permute indices in order to have a sorted set of vertices */ 6269 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6270 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6271 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6272 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6273 6274 /* nonzero structure of constraint matrix */ 6275 /* and get reference dof for local constraints */ 6276 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6277 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6278 6279 j = total_primal_vertices; 6280 total_counts = total_primal_vertices; 6281 cum = total_primal_vertices; 6282 for (i = n_vertices; i < total_counts_cc; i++) { 6283 if (!PetscBTLookup(change_basis, i)) { 6284 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6285 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6286 cum++; 6287 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6288 for (k = 0; k < constraints_n[i]; k++) { 6289 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6290 nnz[j + k] = size_of_constraint; 6291 } 6292 j += constraints_n[i]; 6293 } 6294 } 6295 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6296 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6297 PetscCall(PetscFree(nnz)); 6298 6299 /* set values in constraint matrix */ 6300 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6301 total_counts = total_primal_vertices; 6302 for (i = n_vertices; i < total_counts_cc; i++) { 6303 if (!PetscBTLookup(change_basis, i)) { 6304 PetscInt *cols; 6305 6306 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6307 cols = constraints_idxs + constraints_idxs_ptr[i]; 6308 for (k = 0; k < constraints_n[i]; k++) { 6309 PetscInt row = total_counts + k; 6310 PetscScalar *vals; 6311 6312 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6313 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6314 } 6315 total_counts += constraints_n[i]; 6316 } 6317 } 6318 /* assembling */ 6319 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6320 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6321 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6322 6323 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6324 if (pcbddc->use_change_of_basis) { 6325 /* dual and primal dofs on a single cc */ 6326 PetscInt dual_dofs, primal_dofs; 6327 /* working stuff for GEQRF */ 6328 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6329 PetscBLASInt lqr_work; 6330 /* working stuff for UNGQR */ 6331 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6332 PetscBLASInt lgqr_work; 6333 /* working stuff for TRTRS */ 6334 PetscScalar *trs_rhs = NULL; 6335 PetscBLASInt Blas_NRHS; 6336 /* pointers for values insertion into change of basis matrix */ 6337 PetscInt *start_rows, *start_cols; 6338 PetscScalar *start_vals; 6339 /* working stuff for values insertion */ 6340 PetscBT is_primal; 6341 PetscInt *aux_primal_numbering_B; 6342 /* matrix sizes */ 6343 PetscInt global_size, local_size; 6344 /* temporary change of basis */ 6345 Mat localChangeOfBasisMatrix; 6346 /* extra space for debugging */ 6347 PetscScalar *dbg_work = NULL; 6348 6349 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6350 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6351 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6352 /* nonzeros for local mat */ 6353 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6354 if (!pcbddc->benign_change || pcbddc->fake_change) { 6355 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6356 } else { 6357 const PetscInt *ii; 6358 PetscInt n; 6359 PetscBool flg_row; 6360 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6361 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6362 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6363 } 6364 for (i = n_vertices; i < total_counts_cc; i++) { 6365 if (PetscBTLookup(change_basis, i)) { 6366 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6367 if (PetscBTLookup(qr_needed_idx, i)) { 6368 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6369 } else { 6370 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6371 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6372 } 6373 } 6374 } 6375 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6376 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6377 PetscCall(PetscFree(nnz)); 6378 /* Set interior change in the matrix */ 6379 if (!pcbddc->benign_change || pcbddc->fake_change) { 6380 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 6381 } else { 6382 const PetscInt *ii, *jj; 6383 PetscScalar *aa; 6384 PetscInt n; 6385 PetscBool flg_row; 6386 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6387 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6388 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 6389 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6390 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6391 } 6392 6393 if (pcbddc->dbg_flag) { 6394 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6395 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 6396 } 6397 6398 /* Now we loop on the constraints which need a change of basis */ 6399 /* 6400 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6401 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6402 6403 Basic blocks of change of basis matrix T computed: 6404 6405 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6406 6407 | 1 0 ... 0 s_1/S | 6408 | 0 1 ... 0 s_2/S | 6409 | ... | 6410 | 0 ... 1 s_{n-1}/S | 6411 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6412 6413 with S = \sum_{i=1}^n s_i^2 6414 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6415 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6416 6417 - QR decomposition of constraints otherwise 6418 */ 6419 if (qr_needed && max_size_of_constraint) { 6420 /* space to store Q */ 6421 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 6422 /* array to store scaling factors for reflectors */ 6423 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 6424 /* first we issue queries for optimal work */ 6425 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6426 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6427 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6428 lqr_work = -1; 6429 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 6430 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr); 6431 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 6432 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 6433 lgqr_work = -1; 6434 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6435 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 6436 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 6437 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6438 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 6439 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 6440 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr); 6441 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 6442 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 6443 /* array to store rhs and solution of triangular solver */ 6444 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 6445 /* allocating workspace for check */ 6446 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 6447 } 6448 /* array to store whether a node is primal or not */ 6449 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 6450 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 6451 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 6452 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 6453 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 6454 PetscCall(PetscFree(aux_primal_numbering_B)); 6455 6456 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6457 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 6458 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 6459 if (PetscBTLookup(change_basis, total_counts)) { 6460 /* get constraint info */ 6461 primal_dofs = constraints_n[total_counts]; 6462 dual_dofs = size_of_constraint - primal_dofs; 6463 6464 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 6465 6466 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 6467 6468 /* copy quadrature constraints for change of basis check */ 6469 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6470 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6471 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6472 6473 /* compute QR decomposition of constraints */ 6474 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6475 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6476 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6477 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6478 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 6479 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr); 6480 PetscCall(PetscFPTrapPop()); 6481 6482 /* explicitly compute R^-T */ 6483 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 6484 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 6485 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6486 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 6487 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6488 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6489 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6490 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 6491 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr); 6492 PetscCall(PetscFPTrapPop()); 6493 6494 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6495 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6496 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6497 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6498 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6499 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6500 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 6501 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr); 6502 PetscCall(PetscFPTrapPop()); 6503 6504 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6505 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6506 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6507 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6508 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6509 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6510 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6511 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6512 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6513 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6514 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 6515 PetscCall(PetscFPTrapPop()); 6516 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6517 6518 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6519 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6520 /* insert cols for primal dofs */ 6521 for (j = 0; j < primal_dofs; j++) { 6522 start_vals = &qr_basis[j * size_of_constraint]; 6523 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6524 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6525 } 6526 /* insert cols for dual dofs */ 6527 for (j = 0, k = 0; j < dual_dofs; k++) { 6528 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 6529 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 6530 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6531 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6532 j++; 6533 } 6534 } 6535 6536 /* check change of basis */ 6537 if (pcbddc->dbg_flag) { 6538 PetscInt ii, jj; 6539 PetscBool valid_qr = PETSC_TRUE; 6540 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 6541 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6542 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 6543 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6544 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 6545 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 6546 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6547 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 6548 PetscCall(PetscFPTrapPop()); 6549 for (jj = 0; jj < size_of_constraint; jj++) { 6550 for (ii = 0; ii < primal_dofs; ii++) { 6551 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6552 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6553 } 6554 } 6555 if (!valid_qr) { 6556 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 6557 for (jj = 0; jj < size_of_constraint; jj++) { 6558 for (ii = 0; ii < primal_dofs; ii++) { 6559 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 6560 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6561 } 6562 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 6563 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6564 } 6565 } 6566 } 6567 } else { 6568 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 6569 } 6570 } 6571 } else { /* simple transformation block */ 6572 PetscInt row, col; 6573 PetscScalar val, norm; 6574 6575 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6576 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 6577 for (j = 0; j < size_of_constraint; j++) { 6578 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 6579 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6580 if (!PetscBTLookup(is_primal, row_B)) { 6581 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6582 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 6583 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 6584 } else { 6585 for (k = 0; k < size_of_constraint; k++) { 6586 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6587 if (row != col) { 6588 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 6589 } else { 6590 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 6591 } 6592 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 6593 } 6594 } 6595 } 6596 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 6597 } 6598 } else { 6599 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 6600 } 6601 } 6602 6603 /* free workspace */ 6604 if (qr_needed) { 6605 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 6606 PetscCall(PetscFree(trs_rhs)); 6607 PetscCall(PetscFree(qr_tau)); 6608 PetscCall(PetscFree(qr_work)); 6609 PetscCall(PetscFree(gqr_work)); 6610 PetscCall(PetscFree(qr_basis)); 6611 } 6612 PetscCall(PetscBTDestroy(&is_primal)); 6613 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6614 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6615 6616 /* assembling of global change of variable */ 6617 if (!pcbddc->fake_change) { 6618 Mat tmat; 6619 PetscInt bs; 6620 6621 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 6622 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 6623 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 6624 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 6625 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 6626 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 6627 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 6628 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 6629 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 6630 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 6631 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 6632 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 6633 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 6634 PetscCall(MatDestroy(&tmat)); 6635 PetscCall(VecSet(pcis->vec1_global, 0.0)); 6636 PetscCall(VecSet(pcis->vec1_N, 1.0)); 6637 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6638 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6639 PetscCall(VecReciprocal(pcis->vec1_global)); 6640 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 6641 6642 /* check */ 6643 if (pcbddc->dbg_flag) { 6644 PetscReal error; 6645 Vec x, x_change; 6646 6647 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 6648 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 6649 PetscCall(VecSetRandom(x, NULL)); 6650 PetscCall(VecCopy(x, pcis->vec1_global)); 6651 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6652 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6653 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 6654 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6655 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6656 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 6657 PetscCall(VecAXPY(x, -1.0, x_change)); 6658 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 6659 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 6660 PetscCall(VecDestroy(&x)); 6661 PetscCall(VecDestroy(&x_change)); 6662 } 6663 /* adapt sub_schurs computed (if any) */ 6664 if (pcbddc->use_deluxe_scaling) { 6665 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6666 6667 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6668 if (sub_schurs && sub_schurs->S_Ej_all) { 6669 Mat S_new, tmat; 6670 IS is_all_N, is_V_Sall = NULL; 6671 6672 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 6673 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 6674 if (pcbddc->deluxe_zerorows) { 6675 ISLocalToGlobalMapping NtoSall; 6676 IS is_V; 6677 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 6678 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 6679 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 6680 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6681 PetscCall(ISDestroy(&is_V)); 6682 } 6683 PetscCall(ISDestroy(&is_all_N)); 6684 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6685 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6686 PetscCall(PetscObjectReference((PetscObject)S_new)); 6687 if (pcbddc->deluxe_zerorows) { 6688 const PetscScalar *array; 6689 const PetscInt *idxs_V, *idxs_all; 6690 PetscInt i, n_V; 6691 6692 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6693 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 6694 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 6695 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 6696 PetscCall(VecGetArrayRead(pcis->D, &array)); 6697 for (i = 0; i < n_V; i++) { 6698 PetscScalar val; 6699 PetscInt idx; 6700 6701 idx = idxs_V[i]; 6702 val = array[idxs_all[idxs_V[i]]]; 6703 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 6704 } 6705 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 6706 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 6707 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 6708 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 6709 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 6710 } 6711 sub_schurs->S_Ej_all = S_new; 6712 PetscCall(MatDestroy(&S_new)); 6713 if (sub_schurs->sum_S_Ej_all) { 6714 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6715 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 6716 PetscCall(PetscObjectReference((PetscObject)S_new)); 6717 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6718 sub_schurs->sum_S_Ej_all = S_new; 6719 PetscCall(MatDestroy(&S_new)); 6720 } 6721 PetscCall(ISDestroy(&is_V_Sall)); 6722 PetscCall(MatDestroy(&tmat)); 6723 } 6724 /* destroy any change of basis context in sub_schurs */ 6725 if (sub_schurs && sub_schurs->change) { 6726 PetscInt i; 6727 6728 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 6729 PetscCall(PetscFree(sub_schurs->change)); 6730 } 6731 } 6732 if (pcbddc->switch_static) { /* need to save the local change */ 6733 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6734 } else { 6735 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 6736 } 6737 /* determine if any process has changed the pressures locally */ 6738 pcbddc->change_interior = pcbddc->benign_have_null; 6739 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6740 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6741 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6742 pcbddc->use_qr_single = qr_needed; 6743 } 6744 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6745 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6746 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 6747 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6748 } else { 6749 Mat benign_global = NULL; 6750 if (pcbddc->benign_have_null) { 6751 Mat M; 6752 6753 pcbddc->change_interior = PETSC_TRUE; 6754 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 6755 PetscCall(VecReciprocal(pcis->vec1_N)); 6756 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 6757 if (pcbddc->benign_change) { 6758 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 6759 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 6760 } else { 6761 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 6762 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 6763 } 6764 PetscCall(MatISSetLocalMat(benign_global, M)); 6765 PetscCall(MatDestroy(&M)); 6766 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 6767 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 6768 } 6769 if (pcbddc->user_ChangeOfBasisMatrix) { 6770 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix)); 6771 PetscCall(MatDestroy(&benign_global)); 6772 } else if (pcbddc->benign_have_null) { 6773 pcbddc->ChangeOfBasisMatrix = benign_global; 6774 } 6775 } 6776 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6777 IS is_global; 6778 const PetscInt *gidxs; 6779 6780 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 6781 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 6782 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 6783 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 6784 PetscCall(ISDestroy(&is_global)); 6785 } 6786 } 6787 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 6788 6789 if (!pcbddc->fake_change) { 6790 /* add pressure dofs to set of primal nodes for numbering purposes */ 6791 for (i = 0; i < pcbddc->benign_n; i++) { 6792 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6793 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6794 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6795 pcbddc->local_primal_size_cc++; 6796 pcbddc->local_primal_size++; 6797 } 6798 6799 /* check if a new primal space has been introduced (also take into account benign trick) */ 6800 pcbddc->new_primal_space_local = PETSC_TRUE; 6801 if (olocal_primal_size == pcbddc->local_primal_size) { 6802 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6803 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6804 if (!pcbddc->new_primal_space_local) { 6805 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6806 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6807 } 6808 } 6809 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6810 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 6811 } 6812 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 6813 6814 /* flush dbg viewer */ 6815 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6816 6817 /* free workspace */ 6818 PetscCall(PetscBTDestroy(&qr_needed_idx)); 6819 PetscCall(PetscBTDestroy(&change_basis)); 6820 if (!pcbddc->adaptive_selection) { 6821 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 6822 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 6823 } else { 6824 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 6825 PetscCall(PetscFree(constraints_n)); 6826 PetscCall(PetscFree(constraints_idxs_B)); 6827 } 6828 PetscFunctionReturn(PETSC_SUCCESS); 6829 } 6830 6831 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6832 { 6833 ISLocalToGlobalMapping map; 6834 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6835 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6836 PetscInt i, N; 6837 PetscBool rcsr = PETSC_FALSE; 6838 6839 PetscFunctionBegin; 6840 if (pcbddc->recompute_topography) { 6841 pcbddc->graphanalyzed = PETSC_FALSE; 6842 /* Reset previously computed graph */ 6843 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 6844 /* Init local Graph struct */ 6845 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 6846 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 6847 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 6848 6849 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 6850 /* Check validity of the csr graph passed in by the user */ 6851 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 6852 pcbddc->mat_graph->nvtxs); 6853 6854 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6855 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6856 PetscInt *xadj, *adjncy; 6857 PetscInt nvtxs; 6858 PetscBool flg_row = PETSC_FALSE; 6859 6860 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6861 if (flg_row) { 6862 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 6863 pcbddc->computed_rowadj = PETSC_TRUE; 6864 } 6865 PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6866 rcsr = PETSC_TRUE; 6867 } 6868 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6869 6870 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6871 PetscReal *lcoords; 6872 PetscInt n; 6873 MPI_Datatype dimrealtype; 6874 6875 /* TODO: support for blocked */ 6876 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 6877 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6878 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 6879 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype)); 6880 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 6881 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6882 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6883 PetscCallMPI(MPI_Type_free(&dimrealtype)); 6884 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 6885 6886 pcbddc->mat_graph->coords = lcoords; 6887 pcbddc->mat_graph->cloc = PETSC_TRUE; 6888 pcbddc->mat_graph->cnloc = n; 6889 } 6890 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 6891 pcbddc->mat_graph->nvtxs); 6892 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 6893 6894 /* Setup of Graph */ 6895 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6896 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 6897 6898 /* attach info on disconnected subdomains if present */ 6899 if (pcbddc->n_local_subs) { 6900 PetscInt *local_subs, n, totn; 6901 6902 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6903 PetscCall(PetscMalloc1(n, &local_subs)); 6904 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 6905 for (i = 0; i < pcbddc->n_local_subs; i++) { 6906 const PetscInt *idxs; 6907 PetscInt nl, j; 6908 6909 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 6910 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 6911 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 6912 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 6913 } 6914 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 6915 pcbddc->mat_graph->n_local_subs = totn + 1; 6916 pcbddc->mat_graph->local_subs = local_subs; 6917 } 6918 } 6919 6920 if (!pcbddc->graphanalyzed) { 6921 /* Graph's connected components analysis */ 6922 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 6923 pcbddc->graphanalyzed = PETSC_TRUE; 6924 pcbddc->corner_selected = pcbddc->corner_selection; 6925 } 6926 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6927 PetscFunctionReturn(PETSC_SUCCESS); 6928 } 6929 6930 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 6931 { 6932 PetscInt i, j, n; 6933 PetscScalar *alphas; 6934 PetscReal norm, *onorms; 6935 6936 PetscFunctionBegin; 6937 n = *nio; 6938 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 6939 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 6940 PetscCall(VecNormalize(vecs[0], &norm)); 6941 if (norm < PETSC_SMALL) { 6942 onorms[0] = 0.0; 6943 PetscCall(VecSet(vecs[0], 0.0)); 6944 } else { 6945 onorms[0] = norm; 6946 } 6947 6948 for (i = 1; i < n; i++) { 6949 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 6950 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 6951 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 6952 PetscCall(VecNormalize(vecs[i], &norm)); 6953 if (norm < PETSC_SMALL) { 6954 onorms[i] = 0.0; 6955 PetscCall(VecSet(vecs[i], 0.0)); 6956 } else { 6957 onorms[i] = norm; 6958 } 6959 } 6960 /* push nonzero vectors at the beginning */ 6961 for (i = 0; i < n; i++) { 6962 if (onorms[i] == 0.0) { 6963 for (j = i + 1; j < n; j++) { 6964 if (onorms[j] != 0.0) { 6965 PetscCall(VecCopy(vecs[j], vecs[i])); 6966 onorms[j] = 0.0; 6967 } 6968 } 6969 } 6970 } 6971 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 6972 PetscCall(PetscFree2(alphas, onorms)); 6973 PetscFunctionReturn(PETSC_SUCCESS); 6974 } 6975 6976 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 6977 { 6978 ISLocalToGlobalMapping mapping; 6979 Mat A; 6980 PetscInt n_neighs, *neighs, *n_shared, **shared; 6981 PetscMPIInt size, rank, color; 6982 PetscInt *xadj, *adjncy; 6983 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 6984 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 6985 PetscInt void_procs, *procs_candidates = NULL; 6986 PetscInt xadj_count, *count; 6987 PetscBool ismatis, use_vwgt = PETSC_FALSE; 6988 PetscSubcomm psubcomm; 6989 MPI_Comm subcomm; 6990 6991 PetscFunctionBegin; 6992 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 6993 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 6994 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 6995 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 6996 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 6997 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 6998 6999 if (have_void) *have_void = PETSC_FALSE; 7000 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7001 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7002 PetscCall(MatISGetLocalMat(mat, &A)); 7003 PetscCall(MatGetLocalSize(A, &n, NULL)); 7004 im_active = !!n; 7005 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7006 void_procs = size - active_procs; 7007 /* get ranks of of non-active processes in mat communicator */ 7008 if (void_procs) { 7009 PetscInt ncand; 7010 7011 if (have_void) *have_void = PETSC_TRUE; 7012 PetscCall(PetscMalloc1(size, &procs_candidates)); 7013 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7014 for (i = 0, ncand = 0; i < size; i++) { 7015 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7016 } 7017 /* force n_subdomains to be not greater that the number of non-active processes */ 7018 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7019 } 7020 7021 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7022 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7023 PetscCall(MatGetSize(mat, &N, NULL)); 7024 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7025 PetscInt issize, isidx, dest; 7026 if (*n_subdomains == 1) dest = 0; 7027 else dest = rank; 7028 if (im_active) { 7029 issize = 1; 7030 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7031 isidx = procs_candidates[dest]; 7032 } else { 7033 isidx = dest; 7034 } 7035 } else { 7036 issize = 0; 7037 isidx = -1; 7038 } 7039 if (*n_subdomains != 1) *n_subdomains = active_procs; 7040 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7041 PetscCall(PetscFree(procs_candidates)); 7042 PetscFunctionReturn(PETSC_SUCCESS); 7043 } 7044 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL)); 7045 PetscCall(PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL)); 7046 threshold = PetscMax(threshold, 2); 7047 7048 /* Get info on mapping */ 7049 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7050 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7051 7052 /* build local CSR graph of subdomains' connectivity */ 7053 PetscCall(PetscMalloc1(2, &xadj)); 7054 xadj[0] = 0; 7055 xadj[1] = PetscMax(n_neighs - 1, 0); 7056 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7057 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7058 PetscCall(PetscCalloc1(n, &count)); 7059 for (i = 1; i < n_neighs; i++) 7060 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7061 7062 xadj_count = 0; 7063 for (i = 1; i < n_neighs; i++) { 7064 for (j = 0; j < n_shared[i]; j++) { 7065 if (count[shared[i][j]] < threshold) { 7066 adjncy[xadj_count] = neighs[i]; 7067 adjncy_wgt[xadj_count] = n_shared[i]; 7068 xadj_count++; 7069 break; 7070 } 7071 } 7072 } 7073 xadj[1] = xadj_count; 7074 PetscCall(PetscFree(count)); 7075 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7076 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7077 7078 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7079 7080 /* Restrict work on active processes only */ 7081 PetscCall(PetscMPIIntCast(im_active, &color)); 7082 if (void_procs) { 7083 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7084 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7085 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7086 subcomm = PetscSubcommChild(psubcomm); 7087 } else { 7088 psubcomm = NULL; 7089 subcomm = PetscObjectComm((PetscObject)mat); 7090 } 7091 7092 v_wgt = NULL; 7093 if (!color) { 7094 PetscCall(PetscFree(xadj)); 7095 PetscCall(PetscFree(adjncy)); 7096 PetscCall(PetscFree(adjncy_wgt)); 7097 } else { 7098 Mat subdomain_adj; 7099 IS new_ranks, new_ranks_contig; 7100 MatPartitioning partitioner; 7101 PetscInt rstart = 0, rend = 0; 7102 PetscInt *is_indices, *oldranks; 7103 PetscMPIInt size; 7104 PetscBool aggregate; 7105 7106 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7107 if (void_procs) { 7108 PetscInt prank = rank; 7109 PetscCall(PetscMalloc1(size, &oldranks)); 7110 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7111 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7112 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7113 } else { 7114 oldranks = NULL; 7115 } 7116 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7117 if (aggregate) { /* TODO: all this part could be made more efficient */ 7118 PetscInt lrows, row, ncols, *cols; 7119 PetscMPIInt nrank; 7120 PetscScalar *vals; 7121 7122 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7123 lrows = 0; 7124 if (nrank < redprocs) { 7125 lrows = size / redprocs; 7126 if (nrank < size % redprocs) lrows++; 7127 } 7128 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7129 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7130 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7131 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7132 row = nrank; 7133 ncols = xadj[1] - xadj[0]; 7134 cols = adjncy; 7135 PetscCall(PetscMalloc1(ncols, &vals)); 7136 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7137 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7138 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7139 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7140 PetscCall(PetscFree(xadj)); 7141 PetscCall(PetscFree(adjncy)); 7142 PetscCall(PetscFree(adjncy_wgt)); 7143 PetscCall(PetscFree(vals)); 7144 if (use_vwgt) { 7145 Vec v; 7146 const PetscScalar *array; 7147 PetscInt nl; 7148 7149 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7150 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7151 PetscCall(VecAssemblyBegin(v)); 7152 PetscCall(VecAssemblyEnd(v)); 7153 PetscCall(VecGetLocalSize(v, &nl)); 7154 PetscCall(VecGetArrayRead(v, &array)); 7155 PetscCall(PetscMalloc1(nl, &v_wgt)); 7156 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7157 PetscCall(VecRestoreArrayRead(v, &array)); 7158 PetscCall(VecDestroy(&v)); 7159 } 7160 } else { 7161 PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7162 if (use_vwgt) { 7163 PetscCall(PetscMalloc1(1, &v_wgt)); 7164 v_wgt[0] = n; 7165 } 7166 } 7167 /* PetscCall(MatView(subdomain_adj,0)); */ 7168 7169 /* Partition */ 7170 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7171 #if defined(PETSC_HAVE_PTSCOTCH) 7172 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7173 #elif defined(PETSC_HAVE_PARMETIS) 7174 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7175 #else 7176 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7177 #endif 7178 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7179 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7180 *n_subdomains = PetscMin((PetscInt)size, *n_subdomains); 7181 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7182 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7183 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7184 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7185 7186 /* renumber new_ranks to avoid "holes" in new set of processors */ 7187 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7188 PetscCall(ISDestroy(&new_ranks)); 7189 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7190 if (!aggregate) { 7191 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7192 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7193 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7194 } else if (oldranks) { 7195 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7196 } else { 7197 ranks_send_to_idx[0] = is_indices[0]; 7198 } 7199 } else { 7200 PetscInt idx = 0; 7201 PetscMPIInt tag; 7202 MPI_Request *reqs; 7203 7204 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7205 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7206 for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7207 PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7208 PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE)); 7209 PetscCall(PetscFree(reqs)); 7210 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7211 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7212 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7213 } else if (oldranks) { 7214 ranks_send_to_idx[0] = oldranks[idx]; 7215 } else { 7216 ranks_send_to_idx[0] = idx; 7217 } 7218 } 7219 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7220 /* clean up */ 7221 PetscCall(PetscFree(oldranks)); 7222 PetscCall(ISDestroy(&new_ranks_contig)); 7223 PetscCall(MatDestroy(&subdomain_adj)); 7224 PetscCall(MatPartitioningDestroy(&partitioner)); 7225 } 7226 PetscCall(PetscSubcommDestroy(&psubcomm)); 7227 PetscCall(PetscFree(procs_candidates)); 7228 7229 /* assemble parallel IS for sends */ 7230 i = 1; 7231 if (!color) i = 0; 7232 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7233 PetscFunctionReturn(PETSC_SUCCESS); 7234 } 7235 7236 typedef enum { 7237 MATDENSE_PRIVATE = 0, 7238 MATAIJ_PRIVATE, 7239 MATBAIJ_PRIVATE, 7240 MATSBAIJ_PRIVATE 7241 } MatTypePrivate; 7242 7243 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7244 { 7245 Mat local_mat; 7246 IS is_sends_internal; 7247 PetscInt rows, cols, new_local_rows; 7248 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7249 PetscBool ismatis, isdense, newisdense, destroy_mat; 7250 ISLocalToGlobalMapping l2gmap; 7251 PetscInt *l2gmap_indices; 7252 const PetscInt *is_indices; 7253 MatType new_local_type; 7254 /* buffers */ 7255 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7256 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7257 PetscInt *recv_buffer_idxs_local; 7258 PetscScalar *ptr_vals, *recv_buffer_vals; 7259 const PetscScalar *send_buffer_vals; 7260 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7261 /* MPI */ 7262 MPI_Comm comm, comm_n; 7263 PetscSubcomm subcomm; 7264 PetscMPIInt n_sends, n_recvs, size; 7265 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7266 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7267 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7268 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7269 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7270 7271 PetscFunctionBegin; 7272 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7273 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7274 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7275 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7276 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7277 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7278 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7279 PetscValidLogicalCollectiveInt(mat, nis, 8); 7280 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7281 if (nvecs) { 7282 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7283 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7284 } 7285 /* further checks */ 7286 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7287 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7288 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7289 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7290 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7291 if (reuse && *mat_n) { 7292 PetscInt mrows, mcols, mnrows, mncols; 7293 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7294 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7295 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7296 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7297 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7298 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7299 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7300 } 7301 PetscCall(MatGetBlockSize(local_mat, &bs)); 7302 PetscValidLogicalCollectiveInt(mat, bs, 1); 7303 7304 /* prepare IS for sending if not provided */ 7305 if (!is_sends) { 7306 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7307 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7308 } else { 7309 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7310 is_sends_internal = is_sends; 7311 } 7312 7313 /* get comm */ 7314 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7315 7316 /* compute number of sends */ 7317 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7318 PetscCall(PetscMPIIntCast(i, &n_sends)); 7319 7320 /* compute number of receives */ 7321 PetscCallMPI(MPI_Comm_size(comm, &size)); 7322 PetscCall(PetscMalloc1(size, &iflags)); 7323 PetscCall(PetscArrayzero(iflags, size)); 7324 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7325 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7326 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7327 PetscCall(PetscFree(iflags)); 7328 7329 /* restrict comm if requested */ 7330 subcomm = NULL; 7331 destroy_mat = PETSC_FALSE; 7332 if (restrict_comm) { 7333 PetscMPIInt color, subcommsize; 7334 7335 color = 0; 7336 if (restrict_full) { 7337 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7338 } else { 7339 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7340 } 7341 PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7342 subcommsize = size - subcommsize; 7343 /* check if reuse has been requested */ 7344 if (reuse) { 7345 if (*mat_n) { 7346 PetscMPIInt subcommsize2; 7347 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7348 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7349 comm_n = PetscObjectComm((PetscObject)*mat_n); 7350 } else { 7351 comm_n = PETSC_COMM_SELF; 7352 } 7353 } else { /* MAT_INITIAL_MATRIX */ 7354 PetscMPIInt rank; 7355 7356 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7357 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7358 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7359 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7360 comm_n = PetscSubcommChild(subcomm); 7361 } 7362 /* flag to destroy *mat_n if not significative */ 7363 if (color) destroy_mat = PETSC_TRUE; 7364 } else { 7365 comm_n = comm; 7366 } 7367 7368 /* prepare send/receive buffers */ 7369 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 7370 PetscCall(PetscArrayzero(ilengths_idxs, size)); 7371 PetscCall(PetscMalloc1(size, &ilengths_vals)); 7372 PetscCall(PetscArrayzero(ilengths_vals, size)); 7373 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 7374 7375 /* Get data from local matrices */ 7376 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 7377 /* TODO: See below some guidelines on how to prepare the local buffers */ 7378 /* 7379 send_buffer_vals should contain the raw values of the local matrix 7380 send_buffer_idxs should contain: 7381 - MatType_PRIVATE type 7382 - PetscInt size_of_l2gmap 7383 - PetscInt global_row_indices[size_of_l2gmap] 7384 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7385 */ 7386 { 7387 ISLocalToGlobalMapping mapping; 7388 7389 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7390 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 7391 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 7392 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 7393 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7394 send_buffer_idxs[1] = i; 7395 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 7396 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 7397 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 7398 PetscCall(PetscMPIIntCast(i, &len)); 7399 for (i = 0; i < n_sends; i++) { 7400 ilengths_vals[is_indices[i]] = len * len; 7401 ilengths_idxs[is_indices[i]] = len + 2; 7402 } 7403 } 7404 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 7405 /* additional is (if any) */ 7406 if (nis) { 7407 PetscMPIInt psum; 7408 PetscInt j; 7409 for (j = 0, psum = 0; j < nis; j++) { 7410 PetscInt plen; 7411 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7412 PetscCall(PetscMPIIntCast(plen, &len)); 7413 psum += len + 1; /* indices + length */ 7414 } 7415 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 7416 for (j = 0, psum = 0; j < nis; j++) { 7417 PetscInt plen; 7418 const PetscInt *is_array_idxs; 7419 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7420 send_buffer_idxs_is[psum] = plen; 7421 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 7422 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 7423 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 7424 psum += plen + 1; /* indices + length */ 7425 } 7426 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 7427 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 7428 } 7429 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7430 7431 buf_size_idxs = 0; 7432 buf_size_vals = 0; 7433 buf_size_idxs_is = 0; 7434 buf_size_vecs = 0; 7435 for (i = 0; i < n_recvs; i++) { 7436 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7437 buf_size_vals += (PetscInt)olengths_vals[i]; 7438 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7439 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7440 } 7441 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 7442 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 7443 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 7444 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 7445 7446 /* get new tags for clean communications */ 7447 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 7448 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 7449 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 7450 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 7451 7452 /* allocate for requests */ 7453 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 7454 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 7455 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 7456 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 7457 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 7458 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 7459 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 7460 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 7461 7462 /* communications */ 7463 ptr_idxs = recv_buffer_idxs; 7464 ptr_vals = recv_buffer_vals; 7465 ptr_idxs_is = recv_buffer_idxs_is; 7466 ptr_vecs = recv_buffer_vecs; 7467 for (i = 0; i < n_recvs; i++) { 7468 source_dest = onodes[i]; 7469 PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i])); 7470 PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i])); 7471 ptr_idxs += olengths_idxs[i]; 7472 ptr_vals += olengths_vals[i]; 7473 if (nis) { 7474 source_dest = onodes_is[i]; 7475 PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i])); 7476 ptr_idxs_is += olengths_idxs_is[i]; 7477 } 7478 if (nvecs) { 7479 source_dest = onodes[i]; 7480 PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i])); 7481 ptr_vecs += olengths_idxs[i] - 2; 7482 } 7483 } 7484 for (i = 0; i < n_sends; i++) { 7485 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 7486 PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 7487 PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 7488 if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 7489 if (nvecs) { 7490 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7491 PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 7492 } 7493 } 7494 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 7495 PetscCall(ISDestroy(&is_sends_internal)); 7496 7497 /* assemble new l2g map */ 7498 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 7499 ptr_idxs = recv_buffer_idxs; 7500 new_local_rows = 0; 7501 for (i = 0; i < n_recvs; i++) { 7502 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7503 ptr_idxs += olengths_idxs[i]; 7504 } 7505 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 7506 ptr_idxs = recv_buffer_idxs; 7507 new_local_rows = 0; 7508 for (i = 0; i < n_recvs; i++) { 7509 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 7510 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7511 ptr_idxs += olengths_idxs[i]; 7512 } 7513 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 7514 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 7515 PetscCall(PetscFree(l2gmap_indices)); 7516 7517 /* infer new local matrix type from received local matrices type */ 7518 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7519 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7520 if (n_recvs) { 7521 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7522 ptr_idxs = recv_buffer_idxs; 7523 for (i = 0; i < n_recvs; i++) { 7524 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7525 new_local_type_private = MATAIJ_PRIVATE; 7526 break; 7527 } 7528 ptr_idxs += olengths_idxs[i]; 7529 } 7530 switch (new_local_type_private) { 7531 case MATDENSE_PRIVATE: 7532 new_local_type = MATSEQAIJ; 7533 bs = 1; 7534 break; 7535 case MATAIJ_PRIVATE: 7536 new_local_type = MATSEQAIJ; 7537 bs = 1; 7538 break; 7539 case MATBAIJ_PRIVATE: 7540 new_local_type = MATSEQBAIJ; 7541 break; 7542 case MATSBAIJ_PRIVATE: 7543 new_local_type = MATSEQSBAIJ; 7544 break; 7545 default: 7546 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 7547 } 7548 } else { /* by default, new_local_type is seqaij */ 7549 new_local_type = MATSEQAIJ; 7550 bs = 1; 7551 } 7552 7553 /* create MATIS object if needed */ 7554 if (!reuse) { 7555 PetscCall(MatGetSize(mat, &rows, &cols)); 7556 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7557 } else { 7558 /* it also destroys the local matrices */ 7559 if (*mat_n) { 7560 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 7561 } else { /* this is a fake object */ 7562 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7563 } 7564 } 7565 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 7566 PetscCall(MatSetType(local_mat, new_local_type)); 7567 7568 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 7569 7570 /* Global to local map of received indices */ 7571 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 7572 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 7573 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7574 7575 /* restore attributes -> type of incoming data and its size */ 7576 buf_size_idxs = 0; 7577 for (i = 0; i < n_recvs; i++) { 7578 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7579 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 7580 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7581 } 7582 PetscCall(PetscFree(recv_buffer_idxs)); 7583 7584 /* set preallocation */ 7585 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 7586 if (!newisdense) { 7587 PetscInt *new_local_nnz = NULL; 7588 7589 ptr_idxs = recv_buffer_idxs_local; 7590 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 7591 for (i = 0; i < n_recvs; i++) { 7592 PetscInt j; 7593 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7594 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 7595 } else { 7596 /* TODO */ 7597 } 7598 ptr_idxs += olengths_idxs[i]; 7599 } 7600 if (new_local_nnz) { 7601 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 7602 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 7603 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 7604 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7605 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 7606 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7607 } else { 7608 PetscCall(MatSetUp(local_mat)); 7609 } 7610 PetscCall(PetscFree(new_local_nnz)); 7611 } else { 7612 PetscCall(MatSetUp(local_mat)); 7613 } 7614 7615 /* set values */ 7616 ptr_vals = recv_buffer_vals; 7617 ptr_idxs = recv_buffer_idxs_local; 7618 for (i = 0; i < n_recvs; i++) { 7619 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7620 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 7621 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 7622 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 7623 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 7624 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 7625 } else { 7626 /* TODO */ 7627 } 7628 ptr_idxs += olengths_idxs[i]; 7629 ptr_vals += olengths_vals[i]; 7630 } 7631 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 7632 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 7633 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 7634 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 7635 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 7636 PetscCall(PetscFree(recv_buffer_vals)); 7637 7638 #if 0 7639 if (!restrict_comm) { /* check */ 7640 Vec lvec,rvec; 7641 PetscReal infty_error; 7642 7643 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7644 PetscCall(VecSetRandom(rvec,NULL)); 7645 PetscCall(MatMult(mat,rvec,lvec)); 7646 PetscCall(VecScale(lvec,-1.0)); 7647 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7648 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7649 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7650 PetscCall(VecDestroy(&rvec)); 7651 PetscCall(VecDestroy(&lvec)); 7652 } 7653 #endif 7654 7655 /* assemble new additional is (if any) */ 7656 if (nis) { 7657 PetscInt **temp_idxs, *count_is, j, psum; 7658 7659 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 7660 PetscCall(PetscCalloc1(nis, &count_is)); 7661 ptr_idxs = recv_buffer_idxs_is; 7662 psum = 0; 7663 for (i = 0; i < n_recvs; i++) { 7664 for (j = 0; j < nis; j++) { 7665 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7666 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7667 psum += plen; 7668 ptr_idxs += plen + 1; /* shift pointer to received data */ 7669 } 7670 } 7671 PetscCall(PetscMalloc1(nis, &temp_idxs)); 7672 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 7673 for (i = 1; i < nis; i++) temp_idxs[i] = temp_idxs[i - 1] + count_is[i - 1]; 7674 PetscCall(PetscArrayzero(count_is, nis)); 7675 ptr_idxs = recv_buffer_idxs_is; 7676 for (i = 0; i < n_recvs; i++) { 7677 for (j = 0; j < nis; j++) { 7678 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7679 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 7680 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7681 ptr_idxs += plen + 1; /* shift pointer to received data */ 7682 } 7683 } 7684 for (i = 0; i < nis; i++) { 7685 PetscCall(ISDestroy(&isarray[i])); 7686 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 7687 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 7688 } 7689 PetscCall(PetscFree(count_is)); 7690 PetscCall(PetscFree(temp_idxs[0])); 7691 PetscCall(PetscFree(temp_idxs)); 7692 } 7693 /* free workspace */ 7694 PetscCall(PetscFree(recv_buffer_idxs_is)); 7695 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 7696 PetscCall(PetscFree(send_buffer_idxs)); 7697 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 7698 if (isdense) { 7699 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7700 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 7701 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7702 } else { 7703 /* PetscCall(PetscFree(send_buffer_vals)); */ 7704 } 7705 if (nis) { 7706 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 7707 PetscCall(PetscFree(send_buffer_idxs_is)); 7708 } 7709 7710 if (nvecs) { 7711 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 7712 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 7713 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7714 PetscCall(VecDestroy(&nnsp_vec[0])); 7715 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 7716 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 7717 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 7718 /* set values */ 7719 ptr_vals = recv_buffer_vecs; 7720 ptr_idxs = recv_buffer_idxs_local; 7721 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7722 for (i = 0; i < n_recvs; i++) { 7723 PetscInt j; 7724 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 7725 ptr_idxs += olengths_idxs[i]; 7726 ptr_vals += olengths_idxs[i] - 2; 7727 } 7728 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7729 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 7730 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 7731 } 7732 7733 PetscCall(PetscFree(recv_buffer_vecs)); 7734 PetscCall(PetscFree(recv_buffer_idxs_local)); 7735 PetscCall(PetscFree(recv_req_idxs)); 7736 PetscCall(PetscFree(recv_req_vals)); 7737 PetscCall(PetscFree(recv_req_vecs)); 7738 PetscCall(PetscFree(recv_req_idxs_is)); 7739 PetscCall(PetscFree(send_req_idxs)); 7740 PetscCall(PetscFree(send_req_vals)); 7741 PetscCall(PetscFree(send_req_vecs)); 7742 PetscCall(PetscFree(send_req_idxs_is)); 7743 PetscCall(PetscFree(ilengths_vals)); 7744 PetscCall(PetscFree(ilengths_idxs)); 7745 PetscCall(PetscFree(olengths_vals)); 7746 PetscCall(PetscFree(olengths_idxs)); 7747 PetscCall(PetscFree(onodes)); 7748 if (nis) { 7749 PetscCall(PetscFree(ilengths_idxs_is)); 7750 PetscCall(PetscFree(olengths_idxs_is)); 7751 PetscCall(PetscFree(onodes_is)); 7752 } 7753 PetscCall(PetscSubcommDestroy(&subcomm)); 7754 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 7755 PetscCall(MatDestroy(mat_n)); 7756 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 7757 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7758 PetscCall(VecDestroy(&nnsp_vec[0])); 7759 } 7760 *mat_n = NULL; 7761 } 7762 PetscFunctionReturn(PETSC_SUCCESS); 7763 } 7764 7765 /* temporary hack into ksp private data structure */ 7766 #include <petsc/private/kspimpl.h> 7767 7768 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals) 7769 { 7770 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7771 PC_IS *pcis = (PC_IS *)pc->data; 7772 Mat coarse_mat, coarse_mat_is, coarse_submat_dense; 7773 Mat coarsedivudotp = NULL; 7774 Mat coarseG, t_coarse_mat_is; 7775 MatNullSpace CoarseNullSpace = NULL; 7776 ISLocalToGlobalMapping coarse_islg; 7777 IS coarse_is, *isarray, corners; 7778 PetscInt i, im_active = -1, active_procs = -1; 7779 PetscInt nis, nisdofs, nisneu, nisvert; 7780 PetscInt coarse_eqs_per_proc; 7781 PC pc_temp; 7782 PCType coarse_pc_type; 7783 KSPType coarse_ksp_type; 7784 PetscBool multilevel_requested, multilevel_allowed; 7785 PetscBool coarse_reuse; 7786 PetscInt ncoarse, nedcfield; 7787 PetscBool compute_vecs = PETSC_FALSE; 7788 PetscScalar *array; 7789 MatReuse coarse_mat_reuse; 7790 PetscBool restr, full_restr, have_void; 7791 PetscMPIInt size; 7792 7793 PetscFunctionBegin; 7794 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 7795 /* Assign global numbering to coarse dofs */ 7796 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7797 PetscInt ocoarse_size; 7798 compute_vecs = PETSC_TRUE; 7799 7800 pcbddc->new_primal_space = PETSC_TRUE; 7801 ocoarse_size = pcbddc->coarse_size; 7802 PetscCall(PetscFree(pcbddc->global_primal_indices)); 7803 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 7804 /* see if we can avoid some work */ 7805 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7806 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7807 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7808 PetscCall(KSPReset(pcbddc->coarse_ksp)); 7809 coarse_reuse = PETSC_FALSE; 7810 } else { /* we can safely reuse already computed coarse matrix */ 7811 coarse_reuse = PETSC_TRUE; 7812 } 7813 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7814 coarse_reuse = PETSC_FALSE; 7815 } 7816 /* reset any subassembling information */ 7817 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 7818 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7819 coarse_reuse = PETSC_TRUE; 7820 } 7821 if (coarse_reuse && pcbddc->coarse_ksp) { 7822 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 7823 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 7824 coarse_mat_reuse = MAT_REUSE_MATRIX; 7825 } else { 7826 coarse_mat = NULL; 7827 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7828 } 7829 7830 /* creates temporary l2gmap and IS for coarse indexes */ 7831 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 7832 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 7833 7834 /* creates temporary MATIS object for coarse matrix */ 7835 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense)); 7836 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is)); 7837 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense)); 7838 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7839 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7840 PetscCall(MatDestroy(&coarse_submat_dense)); 7841 7842 /* count "active" (i.e. with positive local size) and "void" processes */ 7843 im_active = !!(pcis->n); 7844 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7845 7846 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7847 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 7848 /* full_restr : just use the receivers from the subassembling pattern */ 7849 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 7850 coarse_mat_is = NULL; 7851 multilevel_allowed = PETSC_FALSE; 7852 multilevel_requested = PETSC_FALSE; 7853 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 7854 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 7855 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7856 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 7857 if (multilevel_requested) { 7858 ncoarse = active_procs / pcbddc->coarsening_ratio; 7859 restr = PETSC_FALSE; 7860 full_restr = PETSC_FALSE; 7861 } else { 7862 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 7863 restr = PETSC_TRUE; 7864 full_restr = PETSC_TRUE; 7865 } 7866 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7867 ncoarse = PetscMax(1, ncoarse); 7868 if (!pcbddc->coarse_subassembling) { 7869 if (pcbddc->coarsening_ratio > 1) { 7870 if (multilevel_requested) { 7871 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7872 } else { 7873 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7874 } 7875 } else { 7876 PetscMPIInt rank; 7877 7878 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 7879 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7880 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 7881 } 7882 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7883 PetscInt psum; 7884 if (pcbddc->coarse_ksp) psum = 1; 7885 else psum = 0; 7886 PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7887 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 7888 } 7889 /* determine if we can go multilevel */ 7890 if (multilevel_requested) { 7891 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7892 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7893 } 7894 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7895 7896 /* dump subassembling pattern */ 7897 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 7898 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7899 nedcfield = -1; 7900 corners = NULL; 7901 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 7902 PetscInt *tidxs, *tidxs2, nout, tsize, i; 7903 const PetscInt *idxs; 7904 ISLocalToGlobalMapping tmap; 7905 7906 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7907 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 7908 /* allocate space for temporary storage */ 7909 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 7910 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 7911 /* allocate for IS array */ 7912 nisdofs = pcbddc->n_ISForDofsLocal; 7913 if (pcbddc->nedclocal) { 7914 if (pcbddc->nedfield > -1) { 7915 nedcfield = pcbddc->nedfield; 7916 } else { 7917 nedcfield = 0; 7918 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 7919 nisdofs = 1; 7920 } 7921 } 7922 nisneu = !!pcbddc->NeumannBoundariesLocal; 7923 nisvert = 0; /* nisvert is not used */ 7924 nis = nisdofs + nisneu + nisvert; 7925 PetscCall(PetscMalloc1(nis, &isarray)); 7926 /* dofs splitting */ 7927 for (i = 0; i < nisdofs; i++) { 7928 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 7929 if (nedcfield != i) { 7930 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 7931 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7932 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7933 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7934 } else { 7935 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 7936 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 7937 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7938 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 7939 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 7940 } 7941 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7942 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 7943 /* PetscCall(ISView(isarray[i],0)); */ 7944 } 7945 /* neumann boundaries */ 7946 if (pcbddc->NeumannBoundariesLocal) { 7947 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 7948 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 7949 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 7950 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7951 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 7952 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7953 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 7954 /* PetscCall(ISView(isarray[nisdofs],0)); */ 7955 } 7956 /* coordinates */ 7957 if (pcbddc->corner_selected) { 7958 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 7959 PetscCall(ISGetLocalSize(corners, &tsize)); 7960 PetscCall(ISGetIndices(corners, &idxs)); 7961 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7962 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 7963 PetscCall(ISRestoreIndices(corners, &idxs)); 7964 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 7965 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7966 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 7967 } 7968 PetscCall(PetscFree(tidxs)); 7969 PetscCall(PetscFree(tidxs2)); 7970 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 7971 } else { 7972 nis = 0; 7973 nisdofs = 0; 7974 nisneu = 0; 7975 nisvert = 0; 7976 isarray = NULL; 7977 } 7978 /* destroy no longer needed map */ 7979 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 7980 7981 /* subassemble */ 7982 if (multilevel_allowed) { 7983 Vec vp[1]; 7984 PetscInt nvecs = 0; 7985 PetscBool reuse, reuser; 7986 7987 if (coarse_mat) reuse = PETSC_TRUE; 7988 else reuse = PETSC_FALSE; 7989 PetscCall(MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7990 vp[0] = NULL; 7991 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7992 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 7993 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 7994 PetscCall(VecSetType(vp[0], VECSTANDARD)); 7995 nvecs = 1; 7996 7997 if (pcbddc->divudotp) { 7998 Mat B, loc_divudotp; 7999 Vec v, p; 8000 IS dummy; 8001 PetscInt np; 8002 8003 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8004 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8005 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8006 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8007 PetscCall(MatCreateVecs(B, &v, &p)); 8008 PetscCall(VecSet(p, 1.)); 8009 PetscCall(MatMultTranspose(B, p, v)); 8010 PetscCall(VecDestroy(&p)); 8011 PetscCall(MatDestroy(&B)); 8012 PetscCall(VecGetArray(vp[0], &array)); 8013 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8014 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8015 PetscCall(VecResetArray(pcbddc->vec1_P)); 8016 PetscCall(VecRestoreArray(vp[0], &array)); 8017 PetscCall(ISDestroy(&dummy)); 8018 PetscCall(VecDestroy(&v)); 8019 } 8020 } 8021 if (reuser) { 8022 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8023 } else { 8024 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8025 } 8026 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8027 PetscScalar *arraym; 8028 const PetscScalar *arrayv; 8029 PetscInt nl; 8030 PetscCall(VecGetLocalSize(vp[0], &nl)); 8031 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8032 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8033 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8034 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8035 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8036 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8037 PetscCall(VecDestroy(&vp[0])); 8038 } else { 8039 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8040 } 8041 } else { 8042 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8043 } 8044 if (coarse_mat_is || coarse_mat) { 8045 if (!multilevel_allowed) { 8046 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8047 } else { 8048 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8049 if (coarse_mat_is) { 8050 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8051 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8052 coarse_mat = coarse_mat_is; 8053 } 8054 } 8055 } 8056 PetscCall(MatDestroy(&t_coarse_mat_is)); 8057 PetscCall(MatDestroy(&coarse_mat_is)); 8058 8059 /* create local to global scatters for coarse problem */ 8060 if (compute_vecs) { 8061 PetscInt lrows; 8062 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8063 if (coarse_mat) { 8064 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8065 } else { 8066 lrows = 0; 8067 } 8068 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8069 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8070 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8071 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8072 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8073 } 8074 PetscCall(ISDestroy(&coarse_is)); 8075 8076 /* set defaults for coarse KSP and PC */ 8077 if (multilevel_allowed) { 8078 coarse_ksp_type = KSPRICHARDSON; 8079 coarse_pc_type = PCBDDC; 8080 } else { 8081 coarse_ksp_type = KSPPREONLY; 8082 coarse_pc_type = PCREDUNDANT; 8083 } 8084 8085 /* print some info if requested */ 8086 if (pcbddc->dbg_flag) { 8087 if (!multilevel_allowed) { 8088 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8089 if (multilevel_requested) { 8090 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio)); 8091 } else if (pcbddc->max_levels) { 8092 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8093 } 8094 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8095 } 8096 } 8097 8098 /* communicate coarse discrete gradient */ 8099 coarseG = NULL; 8100 if (pcbddc->nedcG && multilevel_allowed) { 8101 MPI_Comm ccomm; 8102 if (coarse_mat) { 8103 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8104 } else { 8105 ccomm = MPI_COMM_NULL; 8106 } 8107 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8108 } 8109 8110 /* create the coarse KSP object only once with defaults */ 8111 if (coarse_mat) { 8112 PetscBool isredundant, isbddc, force, valid; 8113 PetscViewer dbg_viewer = NULL; 8114 PetscBool isset, issym, isher, isspd; 8115 8116 if (pcbddc->dbg_flag) { 8117 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8118 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8119 } 8120 if (!pcbddc->coarse_ksp) { 8121 char prefix[256], str_level[16]; 8122 size_t len; 8123 8124 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8125 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8126 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8127 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1)); 8128 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8129 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8130 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8131 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8132 /* TODO is this logic correct? should check for coarse_mat type */ 8133 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8134 /* prefix */ 8135 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8136 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8137 if (!pcbddc->current_level) { 8138 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8139 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8140 } else { 8141 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8142 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8143 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8144 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8145 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8146 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level))); 8147 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8148 } 8149 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8150 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8151 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8152 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8153 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8154 /* allow user customization */ 8155 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8156 /* get some info after set from options */ 8157 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8158 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8159 force = PETSC_FALSE; 8160 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8161 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8162 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8163 if (multilevel_allowed && !force && !valid) { 8164 isbddc = PETSC_TRUE; 8165 PetscCall(PCSetType(pc_temp, PCBDDC)); 8166 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8167 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8168 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8169 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8170 PetscObjectOptionsBegin((PetscObject)pc_temp); 8171 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8172 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8173 PetscOptionsEnd(); 8174 pc_temp->setfromoptionscalled++; 8175 } 8176 } 8177 } 8178 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8179 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8180 if (nisdofs) { 8181 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8182 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8183 } 8184 if (nisneu) { 8185 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8186 PetscCall(ISDestroy(&isarray[nisdofs])); 8187 } 8188 if (nisvert) { 8189 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8190 PetscCall(ISDestroy(&isarray[nis - 1])); 8191 } 8192 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8193 8194 /* get some info after set from options */ 8195 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8196 8197 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8198 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8199 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8200 force = PETSC_FALSE; 8201 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8202 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8203 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8204 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8205 if (isredundant) { 8206 KSP inner_ksp; 8207 PC inner_pc; 8208 8209 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8210 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8211 } 8212 8213 /* parameters which miss an API */ 8214 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8215 if (isbddc) { 8216 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8217 8218 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8219 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8220 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8221 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8222 if (pcbddc_coarse->benign_saddle_point) { 8223 Mat coarsedivudotp_is; 8224 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8225 IS row, col; 8226 const PetscInt *gidxs; 8227 PetscInt n, st, M, N; 8228 8229 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8230 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8231 st = st - n; 8232 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8233 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8234 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8235 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8236 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8237 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8238 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8239 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8240 PetscCall(ISGetSize(row, &M)); 8241 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8242 PetscCall(ISDestroy(&row)); 8243 PetscCall(ISDestroy(&col)); 8244 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8245 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8246 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8247 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8248 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8249 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8250 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8251 PetscCall(MatDestroy(&coarsedivudotp)); 8252 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8253 PetscCall(MatDestroy(&coarsedivudotp_is)); 8254 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8255 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8256 } 8257 } 8258 8259 /* propagate symmetry info of coarse matrix */ 8260 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8261 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8262 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8263 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8264 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8265 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8266 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8267 8268 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8269 /* set operators */ 8270 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8271 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8272 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8273 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8274 } 8275 PetscCall(MatDestroy(&coarseG)); 8276 PetscCall(PetscFree(isarray)); 8277 #if 0 8278 { 8279 PetscViewer viewer; 8280 char filename[256]; 8281 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 8282 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8283 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8284 PetscCall(MatView(coarse_mat,viewer)); 8285 PetscCall(PetscViewerPopFormat(viewer)); 8286 PetscCall(PetscViewerDestroy(&viewer)); 8287 } 8288 #endif 8289 8290 if (corners) { 8291 Vec gv; 8292 IS is; 8293 const PetscInt *idxs; 8294 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8295 PetscScalar *coords; 8296 8297 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8298 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8299 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8300 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8301 PetscCall(VecSetBlockSize(gv, cdim)); 8302 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8303 PetscCall(VecSetType(gv, VECSTANDARD)); 8304 PetscCall(VecSetFromOptions(gv)); 8305 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8306 8307 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8308 PetscCall(ISGetLocalSize(is, &n)); 8309 PetscCall(ISGetIndices(is, &idxs)); 8310 PetscCall(PetscMalloc1(n * cdim, &coords)); 8311 for (i = 0; i < n; i++) { 8312 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 8313 } 8314 PetscCall(ISRestoreIndices(is, &idxs)); 8315 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8316 8317 PetscCall(ISGetLocalSize(corners, &n)); 8318 PetscCall(ISGetIndices(corners, &idxs)); 8319 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8320 PetscCall(ISRestoreIndices(corners, &idxs)); 8321 PetscCall(PetscFree(coords)); 8322 PetscCall(VecAssemblyBegin(gv)); 8323 PetscCall(VecAssemblyEnd(gv)); 8324 PetscCall(VecGetArray(gv, &coords)); 8325 if (pcbddc->coarse_ksp) { 8326 PC coarse_pc; 8327 PetscBool isbddc; 8328 8329 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8330 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8331 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8332 PetscReal *realcoords; 8333 8334 PetscCall(VecGetLocalSize(gv, &n)); 8335 #if defined(PETSC_USE_COMPLEX) 8336 PetscCall(PetscMalloc1(n, &realcoords)); 8337 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8338 #else 8339 realcoords = coords; 8340 #endif 8341 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8342 #if defined(PETSC_USE_COMPLEX) 8343 PetscCall(PetscFree(realcoords)); 8344 #endif 8345 } 8346 } 8347 PetscCall(VecRestoreArray(gv, &coords)); 8348 PetscCall(VecDestroy(&gv)); 8349 } 8350 PetscCall(ISDestroy(&corners)); 8351 8352 if (pcbddc->coarse_ksp) { 8353 Vec crhs, csol; 8354 8355 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 8356 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 8357 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &((pcbddc->coarse_ksp)->vec_sol), NULL)); 8358 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &((pcbddc->coarse_ksp)->vec_rhs))); 8359 } 8360 PetscCall(MatDestroy(&coarsedivudotp)); 8361 8362 /* compute null space for coarse solver if the benign trick has been requested */ 8363 if (pcbddc->benign_null) { 8364 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 8365 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 8366 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8367 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8368 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8369 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8370 if (coarse_mat) { 8371 Vec nullv; 8372 PetscScalar *array, *array2; 8373 PetscInt nl; 8374 8375 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 8376 PetscCall(VecGetLocalSize(nullv, &nl)); 8377 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8378 PetscCall(VecGetArray(nullv, &array2)); 8379 PetscCall(PetscArraycpy(array2, array, nl)); 8380 PetscCall(VecRestoreArray(nullv, &array2)); 8381 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8382 PetscCall(VecNormalize(nullv, NULL)); 8383 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 8384 PetscCall(VecDestroy(&nullv)); 8385 } 8386 } 8387 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8388 8389 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8390 if (pcbddc->coarse_ksp) { 8391 PetscBool ispreonly; 8392 8393 if (CoarseNullSpace) { 8394 PetscBool isnull; 8395 8396 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 8397 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 8398 /* TODO: add local nullspaces (if any) */ 8399 } 8400 /* setup coarse ksp */ 8401 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8402 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8403 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 8404 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8405 KSP check_ksp; 8406 KSPType check_ksp_type; 8407 PC check_pc; 8408 Vec check_vec, coarse_vec; 8409 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 8410 PetscInt its; 8411 PetscBool compute_eigs; 8412 PetscReal *eigs_r, *eigs_c; 8413 PetscInt neigs; 8414 const char *prefix; 8415 8416 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8417 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 8418 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 8419 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 8420 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 8421 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size)); 8422 /* prevent from setup unneeded object */ 8423 PetscCall(KSPGetPC(check_ksp, &check_pc)); 8424 PetscCall(PCSetType(check_pc, PCNONE)); 8425 if (ispreonly) { 8426 check_ksp_type = KSPPREONLY; 8427 compute_eigs = PETSC_FALSE; 8428 } else { 8429 check_ksp_type = KSPGMRES; 8430 compute_eigs = PETSC_TRUE; 8431 } 8432 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 8433 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 8434 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 8435 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 8436 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 8437 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 8438 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 8439 PetscCall(KSPSetFromOptions(check_ksp)); 8440 PetscCall(KSPSetUp(check_ksp)); 8441 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 8442 PetscCall(KSPSetPC(check_ksp, check_pc)); 8443 /* create random vec */ 8444 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 8445 PetscCall(VecSetRandom(check_vec, NULL)); 8446 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8447 /* solve coarse problem */ 8448 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 8449 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 8450 /* set eigenvalue estimation if preonly has not been requested */ 8451 if (compute_eigs) { 8452 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 8453 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 8454 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 8455 if (neigs) { 8456 lambda_max = eigs_r[neigs - 1]; 8457 lambda_min = eigs_r[0]; 8458 if (pcbddc->use_coarse_estimates) { 8459 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8460 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 8461 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 8462 } 8463 } 8464 } 8465 } 8466 8467 /* check coarse problem residual error */ 8468 if (pcbddc->dbg_flag) { 8469 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8470 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8471 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 8472 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 8473 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8474 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 8475 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 8476 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp), dbg_viewer)); 8477 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer)); 8478 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 8479 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 8480 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 8481 if (compute_eigs) { 8482 PetscReal lambda_max_s, lambda_min_s; 8483 KSPConvergedReason reason; 8484 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 8485 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 8486 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 8487 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 8488 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 8489 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 8490 } 8491 PetscCall(PetscViewerFlush(dbg_viewer)); 8492 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8493 } 8494 PetscCall(VecDestroy(&check_vec)); 8495 PetscCall(VecDestroy(&coarse_vec)); 8496 PetscCall(KSPDestroy(&check_ksp)); 8497 if (compute_eigs) { 8498 PetscCall(PetscFree(eigs_r)); 8499 PetscCall(PetscFree(eigs_c)); 8500 } 8501 } 8502 } 8503 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8504 /* print additional info */ 8505 if (pcbddc->dbg_flag) { 8506 /* waits until all processes reaches this point */ 8507 PetscCall(PetscBarrier((PetscObject)pc)); 8508 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 8509 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8510 } 8511 8512 /* free memory */ 8513 PetscCall(MatDestroy(&coarse_mat)); 8514 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8515 PetscFunctionReturn(PETSC_SUCCESS); 8516 } 8517 8518 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 8519 { 8520 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8521 PC_IS *pcis = (PC_IS *)pc->data; 8522 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 8523 IS subset, subset_mult, subset_n; 8524 PetscInt local_size, coarse_size = 0; 8525 PetscInt *local_primal_indices = NULL; 8526 const PetscInt *t_local_primal_indices; 8527 8528 PetscFunctionBegin; 8529 /* Compute global number of coarse dofs */ 8530 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 8531 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 8532 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 8533 PetscCall(ISDestroy(&subset_n)); 8534 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 8535 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 8536 PetscCall(ISDestroy(&subset)); 8537 PetscCall(ISDestroy(&subset_mult)); 8538 PetscCall(ISGetLocalSize(subset_n, &local_size)); 8539 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 8540 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 8541 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 8542 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 8543 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 8544 PetscCall(ISDestroy(&subset_n)); 8545 8546 /* check numbering */ 8547 if (pcbddc->dbg_flag) { 8548 PetscScalar coarsesum, *array, *array2; 8549 PetscInt i; 8550 PetscBool set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE; 8551 8552 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8553 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8554 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n")); 8555 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8556 /* counter */ 8557 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8558 PetscCall(VecSet(pcis->vec1_N, 1.0)); 8559 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8560 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8561 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8562 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8563 PetscCall(VecSet(pcis->vec1_N, 0.0)); 8564 for (i = 0; i < pcbddc->local_primal_size; i++) PetscCall(VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES)); 8565 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8566 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8567 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8568 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8569 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8570 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8571 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8572 PetscCall(VecGetArray(pcis->vec1_N, &array)); 8573 PetscCall(VecGetArray(pcis->vec2_N, &array2)); 8574 for (i = 0; i < pcis->n; i++) { 8575 if (array[i] != 0.0 && array[i] != array2[i]) { 8576 PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi; 8577 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8578 set_error = PETSC_TRUE; 8579 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi)); 8580 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh)); 8581 } 8582 } 8583 PetscCall(VecRestoreArray(pcis->vec2_N, &array2)); 8584 PetscCall(MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8585 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8586 for (i = 0; i < pcis->n; i++) { 8587 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]); 8588 } 8589 PetscCall(VecRestoreArray(pcis->vec1_N, &array)); 8590 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8591 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8592 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8593 PetscCall(VecSum(pcis->vec1_global, &coarsesum)); 8594 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum))); 8595 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8596 PetscInt *gidxs; 8597 8598 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &gidxs)); 8599 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs)); 8600 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n")); 8601 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8602 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank)); 8603 for (i = 0; i < pcbddc->local_primal_size; i++) { 8604 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i])); 8605 } 8606 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8607 PetscCall(PetscFree(gidxs)); 8608 } 8609 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8610 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8611 PetscCheck(!set_error_reduced, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "BDDC Numbering of coarse dofs failed"); 8612 } 8613 8614 /* get back data */ 8615 *coarse_size_n = coarse_size; 8616 *local_primal_indices_n = local_primal_indices; 8617 PetscFunctionReturn(PETSC_SUCCESS); 8618 } 8619 8620 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 8621 { 8622 IS localis_t; 8623 PetscInt i, lsize, *idxs, n; 8624 PetscScalar *vals; 8625 8626 PetscFunctionBegin; 8627 /* get indices in local ordering exploiting local to global map */ 8628 PetscCall(ISGetLocalSize(globalis, &lsize)); 8629 PetscCall(PetscMalloc1(lsize, &vals)); 8630 for (i = 0; i < lsize; i++) vals[i] = 1.0; 8631 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 8632 PetscCall(VecSet(gwork, 0.0)); 8633 PetscCall(VecSet(lwork, 0.0)); 8634 if (idxs) { /* multilevel guard */ 8635 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 8636 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 8637 } 8638 PetscCall(VecAssemblyBegin(gwork)); 8639 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 8640 PetscCall(PetscFree(vals)); 8641 PetscCall(VecAssemblyEnd(gwork)); 8642 /* now compute set in local ordering */ 8643 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8644 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8645 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 8646 PetscCall(VecGetSize(lwork, &n)); 8647 for (i = 0, lsize = 0; i < n; i++) { 8648 if (PetscRealPart(vals[i]) > 0.5) lsize++; 8649 } 8650 PetscCall(PetscMalloc1(lsize, &idxs)); 8651 for (i = 0, lsize = 0; i < n; i++) { 8652 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 8653 } 8654 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 8655 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 8656 *localis = localis_t; 8657 PetscFunctionReturn(PETSC_SUCCESS); 8658 } 8659 8660 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 8661 { 8662 PC_IS *pcis = (PC_IS *)pc->data; 8663 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8664 PC_IS *pcisf; 8665 PC_BDDC *pcbddcf; 8666 PC pcf; 8667 8668 PetscFunctionBegin; 8669 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 8670 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 8671 PetscCall(PCSetType(pcf, PCBDDC)); 8672 8673 pcisf = (PC_IS *)pcf->data; 8674 pcbddcf = (PC_BDDC *)pcf->data; 8675 8676 pcisf->is_B_local = pcis->is_B_local; 8677 pcisf->vec1_N = pcis->vec1_N; 8678 pcisf->BtoNmap = pcis->BtoNmap; 8679 pcisf->n = pcis->n; 8680 pcisf->n_B = pcis->n_B; 8681 8682 PetscCall(PetscFree(pcbddcf->mat_graph)); 8683 PetscCall(PetscFree(pcbddcf->sub_schurs)); 8684 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 8685 pcbddcf->sub_schurs = schurs; 8686 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 8687 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 8688 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 8689 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 8690 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 8691 pcbddcf->use_faces = PETSC_TRUE; 8692 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 8693 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 8694 pcbddcf->use_qr_single = (PetscBool)!constraints; 8695 pcbddcf->fake_change = PETSC_TRUE; 8696 pcbddcf->dbg_flag = pcbddc->dbg_flag; 8697 8698 PetscCall(PCBDDCAdaptiveSelection(pcf)); 8699 PetscCall(PCBDDCConstraintsSetUp(pcf)); 8700 8701 *change = pcbddcf->ConstraintMatrix; 8702 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 8703 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 8704 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 8705 8706 if (schurs) pcbddcf->sub_schurs = NULL; 8707 pcbddcf->ConstraintMatrix = NULL; 8708 pcbddcf->mat_graph = NULL; 8709 pcisf->is_B_local = NULL; 8710 pcisf->vec1_N = NULL; 8711 pcisf->BtoNmap = NULL; 8712 PetscCall(PCDestroy(&pcf)); 8713 PetscFunctionReturn(PETSC_SUCCESS); 8714 } 8715 8716 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8717 { 8718 PC_IS *pcis = (PC_IS *)pc->data; 8719 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8720 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 8721 Mat S_j; 8722 PetscInt *used_xadj, *used_adjncy; 8723 PetscBool free_used_adj; 8724 8725 PetscFunctionBegin; 8726 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8727 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8728 free_used_adj = PETSC_FALSE; 8729 if (pcbddc->sub_schurs_layers == -1) { 8730 used_xadj = NULL; 8731 used_adjncy = NULL; 8732 } else { 8733 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8734 used_xadj = pcbddc->mat_graph->xadj; 8735 used_adjncy = pcbddc->mat_graph->adjncy; 8736 } else if (pcbddc->computed_rowadj) { 8737 used_xadj = pcbddc->mat_graph->xadj; 8738 used_adjncy = pcbddc->mat_graph->adjncy; 8739 } else { 8740 PetscBool flg_row = PETSC_FALSE; 8741 const PetscInt *xadj, *adjncy; 8742 PetscInt nvtxs; 8743 8744 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8745 if (flg_row) { 8746 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 8747 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 8748 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 8749 free_used_adj = PETSC_TRUE; 8750 } else { 8751 pcbddc->sub_schurs_layers = -1; 8752 used_xadj = NULL; 8753 used_adjncy = NULL; 8754 } 8755 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8756 } 8757 } 8758 8759 /* setup sub_schurs data */ 8760 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 8761 if (!sub_schurs->schur_explicit) { 8762 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8763 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 8764 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 8765 } else { 8766 Mat change = NULL; 8767 Vec scaling = NULL; 8768 IS change_primal = NULL, iP; 8769 PetscInt benign_n; 8770 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8771 PetscBool need_change = PETSC_FALSE; 8772 PetscBool discrete_harmonic = PETSC_FALSE; 8773 8774 if (!pcbddc->use_vertices && reuse_solvers) { 8775 PetscInt n_vertices; 8776 8777 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 8778 reuse_solvers = (PetscBool)!n_vertices; 8779 } 8780 if (!pcbddc->benign_change_explicit) { 8781 benign_n = pcbddc->benign_n; 8782 } else { 8783 benign_n = 0; 8784 } 8785 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8786 We need a global reduction to avoid possible deadlocks. 8787 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8788 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8789 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8790 PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8791 need_change = (PetscBool)(!need_change); 8792 } 8793 /* If the user defines additional constraints, we import them here */ 8794 if (need_change) { 8795 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 8796 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 8797 } 8798 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8799 8800 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 8801 if (iP) { 8802 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 8803 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 8804 PetscOptionsEnd(); 8805 } 8806 if (discrete_harmonic) { 8807 Mat A; 8808 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 8809 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 8810 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 8811 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 8812 pcbddc->benign_zerodiag_subs, change, change_primal)); 8813 PetscCall(MatDestroy(&A)); 8814 } else { 8815 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 8816 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 8817 } 8818 PetscCall(MatDestroy(&change)); 8819 PetscCall(ISDestroy(&change_primal)); 8820 } 8821 PetscCall(MatDestroy(&S_j)); 8822 8823 /* free adjacency */ 8824 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 8825 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8826 PetscFunctionReturn(PETSC_SUCCESS); 8827 } 8828 8829 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8830 { 8831 PC_IS *pcis = (PC_IS *)pc->data; 8832 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8833 PCBDDCGraph graph; 8834 8835 PetscFunctionBegin; 8836 /* attach interface graph for determining subsets */ 8837 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8838 IS verticesIS, verticescomm; 8839 PetscInt vsize, *idxs; 8840 8841 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8842 PetscCall(ISGetSize(verticesIS, &vsize)); 8843 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 8844 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 8845 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 8846 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8847 PetscCall(PCBDDCGraphCreate(&graph)); 8848 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 8849 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 8850 PetscCall(ISDestroy(&verticescomm)); 8851 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 8852 } else { 8853 graph = pcbddc->mat_graph; 8854 } 8855 /* print some info */ 8856 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8857 IS vertices; 8858 PetscInt nv, nedges, nfaces; 8859 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 8860 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8861 PetscCall(ISGetSize(vertices, &nv)); 8862 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8863 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 8864 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 8865 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 8866 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 8867 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8868 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 8869 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8870 } 8871 8872 /* sub_schurs init */ 8873 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 8874 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 8875 8876 /* free graph struct */ 8877 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 8878 PetscFunctionReturn(PETSC_SUCCESS); 8879 } 8880 8881 PetscErrorCode PCBDDCCheckOperator(PC pc) 8882 { 8883 PC_IS *pcis = (PC_IS *)pc->data; 8884 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8885 8886 PetscFunctionBegin; 8887 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8888 IS zerodiag = NULL; 8889 Mat S_j, B0_B = NULL; 8890 Vec dummy_vec = NULL, vec_check_B, vec_scale_P; 8891 PetscScalar *p0_check, *array, *array2; 8892 PetscReal norm; 8893 PetscInt i; 8894 8895 /* B0 and B0_B */ 8896 if (zerodiag) { 8897 IS dummy; 8898 8899 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &dummy)); 8900 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 8901 PetscCall(MatCreateVecs(B0_B, NULL, &dummy_vec)); 8902 PetscCall(ISDestroy(&dummy)); 8903 } 8904 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8905 PetscCall(VecDuplicate(pcbddc->vec1_P, &vec_scale_P)); 8906 PetscCall(VecSet(pcbddc->vec1_P, 1.0)); 8907 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8908 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8909 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE)); 8910 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE)); 8911 PetscCall(VecReciprocal(vec_scale_P)); 8912 /* S_j */ 8913 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 8914 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 8915 8916 /* mimic vector in \widetilde{W}_\Gamma */ 8917 PetscCall(VecSetRandom(pcis->vec1_N, NULL)); 8918 /* continuous in primal space */ 8919 PetscCall(VecSetRandom(pcbddc->coarse_vec, NULL)); 8920 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8921 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8922 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 8923 PetscCall(PetscCalloc1(pcbddc->benign_n, &p0_check)); 8924 for (i = 0; i < pcbddc->benign_n; i++) p0_check[i] = array[pcbddc->local_primal_size - pcbddc->benign_n + i]; 8925 PetscCall(VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES)); 8926 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 8927 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8928 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8929 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD)); 8930 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD)); 8931 PetscCall(VecDuplicate(pcis->vec2_B, &vec_check_B)); 8932 PetscCall(VecCopy(pcis->vec2_B, vec_check_B)); 8933 8934 /* assemble rhs for coarse problem */ 8935 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8936 /* local with Schur */ 8937 PetscCall(MatMult(S_j, pcis->vec2_B, pcis->vec1_B)); 8938 if (zerodiag) { 8939 PetscCall(VecGetArray(dummy_vec, &array)); 8940 for (i = 0; i < pcbddc->benign_n; i++) array[i] = p0_check[i]; 8941 PetscCall(VecRestoreArray(dummy_vec, &array)); 8942 PetscCall(MatMultTransposeAdd(B0_B, dummy_vec, pcis->vec1_B, pcis->vec1_B)); 8943 } 8944 /* sum on primal nodes the local contributions */ 8945 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE)); 8946 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE)); 8947 PetscCall(VecGetArray(pcis->vec1_N, &array)); 8948 PetscCall(VecGetArray(pcbddc->vec1_P, &array2)); 8949 for (i = 0; i < pcbddc->local_primal_size; i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8950 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array2)); 8951 PetscCall(VecRestoreArray(pcis->vec1_N, &array)); 8952 PetscCall(VecSet(pcbddc->coarse_vec, 0.)); 8953 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8954 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD)); 8955 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8956 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE)); 8957 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 8958 /* scale primal nodes (BDDC sums contibutions) */ 8959 PetscCall(VecPointwiseMult(pcbddc->vec1_P, vec_scale_P, pcbddc->vec1_P)); 8960 PetscCall(VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES)); 8961 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 8962 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8963 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8964 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 8965 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 8966 /* global: \widetilde{B0}_B w_\Gamma */ 8967 if (zerodiag) { 8968 PetscCall(MatMult(B0_B, pcis->vec2_B, dummy_vec)); 8969 PetscCall(VecGetArray(dummy_vec, &array)); 8970 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = array[i]; 8971 PetscCall(VecRestoreArray(dummy_vec, &array)); 8972 } 8973 /* BDDC */ 8974 PetscCall(VecSet(pcis->vec1_D, 0.)); 8975 PetscCall(PCBDDCApplyInterfacePreconditioner(pc, PETSC_FALSE)); 8976 8977 PetscCall(VecCopy(pcis->vec1_B, pcis->vec2_B)); 8978 PetscCall(VecAXPY(pcis->vec1_B, -1.0, vec_check_B)); 8979 PetscCall(VecNorm(pcis->vec1_B, NORM_INFINITY, &norm)); 8980 PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC local error is %1.4e\n", PetscGlobalRank, (double)norm)); 8981 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n", PetscGlobalRank, i, (double)PetscAbsScalar(pcbddc->benign_p0[i] - p0_check[i]))); 8982 PetscCall(PetscFree(p0_check)); 8983 PetscCall(VecDestroy(&vec_scale_P)); 8984 PetscCall(VecDestroy(&vec_check_B)); 8985 PetscCall(VecDestroy(&dummy_vec)); 8986 PetscCall(MatDestroy(&S_j)); 8987 PetscCall(MatDestroy(&B0_B)); 8988 } 8989 PetscFunctionReturn(PETSC_SUCCESS); 8990 } 8991 8992 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8993 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8994 { 8995 Mat At; 8996 IS rows; 8997 PetscInt rst, ren; 8998 PetscLayout rmap; 8999 9000 PetscFunctionBegin; 9001 rst = ren = 0; 9002 if (ccomm != MPI_COMM_NULL) { 9003 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9004 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9005 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9006 PetscCall(PetscLayoutSetUp(rmap)); 9007 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9008 } 9009 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9010 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9011 PetscCall(ISDestroy(&rows)); 9012 9013 if (ccomm != MPI_COMM_NULL) { 9014 Mat_MPIAIJ *a, *b; 9015 IS from, to; 9016 Vec gvec; 9017 PetscInt lsize; 9018 9019 PetscCall(MatCreate(ccomm, B)); 9020 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9021 PetscCall(MatSetType(*B, MATAIJ)); 9022 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9023 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9024 a = (Mat_MPIAIJ *)At->data; 9025 b = (Mat_MPIAIJ *)(*B)->data; 9026 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9027 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9028 PetscCall(PetscObjectReference((PetscObject)a->A)); 9029 PetscCall(PetscObjectReference((PetscObject)a->B)); 9030 b->A = a->A; 9031 b->B = a->B; 9032 9033 b->donotstash = a->donotstash; 9034 b->roworiented = a->roworiented; 9035 b->rowindices = NULL; 9036 b->rowvalues = NULL; 9037 b->getrowactive = PETSC_FALSE; 9038 9039 (*B)->rmap = rmap; 9040 (*B)->factortype = A->factortype; 9041 (*B)->assembled = PETSC_TRUE; 9042 (*B)->insertmode = NOT_SET_VALUES; 9043 (*B)->preallocated = PETSC_TRUE; 9044 9045 if (a->colmap) { 9046 #if defined(PETSC_USE_CTABLE) 9047 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9048 #else 9049 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9050 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9051 #endif 9052 } else b->colmap = NULL; 9053 if (a->garray) { 9054 PetscInt len; 9055 len = a->B->cmap->n; 9056 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9057 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9058 } else b->garray = NULL; 9059 9060 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9061 b->lvec = a->lvec; 9062 9063 /* cannot use VecScatterCopy */ 9064 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9065 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9066 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9067 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9068 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9069 PetscCall(ISDestroy(&from)); 9070 PetscCall(ISDestroy(&to)); 9071 PetscCall(VecDestroy(&gvec)); 9072 } 9073 PetscCall(MatDestroy(&At)); 9074 PetscFunctionReturn(PETSC_SUCCESS); 9075 } 9076