1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork, *data, *U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM, bN, lwork, lierr, di = 1; 20 PetscInt ulw, i, nr, nc, n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A, &nr, &nc)); 27 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 32 PetscCall(PetscMalloc1(ulw, &uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr, nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n, &sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr * nr, &U)); 46 PetscCall(PetscBLASIntCast(nr, &bM)); 47 PetscCall(PetscBLASIntCast(nc, &bN)); 48 PetscCall(PetscBLASIntCast(ulw, &lwork)); 49 PetscCall(MatDenseGetArray(A, &data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 53 #else 54 PetscCall(PetscMalloc1(5 * n, &rwork2)); 55 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 60 PetscCall(MatDenseRestoreArray(A, &data)); 61 for (i = 0; i < n; i++) 62 if (sing[i] < PETSC_SMALL) break; 63 if (!rwork) PetscCall(PetscFree(sing)); 64 if (!work) PetscCall(PetscFree(uwork)); 65 /* create B */ 66 if (!range) { 67 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 68 PetscCall(MatDenseGetArray(*B, &data)); 69 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 70 } else { 71 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 72 PetscCall(MatDenseGetArray(*B, &data)); 73 PetscCall(PetscArraycpy(data, U, i * nr)); 74 } 75 PetscCall(MatDenseRestoreArray(*B, &data)); 76 PetscCall(PetscFree(U)); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 Mat GE, GEd; 89 PetscInt rsize, csize, esize; 90 PetscScalar *ptr; 91 92 PetscFunctionBegin; 93 PetscCall(ISGetSize(edge, &esize)); 94 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 95 PetscCall(ISGetSize(extrow, &rsize)); 96 PetscCall(ISGetSize(extcol, &csize)); 97 98 /* gradients */ 99 ptr = work + 5 * esize; 100 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 101 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 102 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 103 PetscCall(MatDestroy(&GE)); 104 105 /* constants */ 106 ptr += rsize * csize; 107 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 108 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 109 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 110 PetscCall(MatDestroy(&GE)); 111 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 112 PetscCall(MatDestroy(&GEd)); 113 114 if (corners) { 115 Mat GEc; 116 const PetscScalar *vals; 117 PetscScalar v; 118 119 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 120 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 121 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 122 /* v = PetscAbsScalar(vals[0]); */ 123 v = 1.; 124 cvals[0] = vals[0] / v; 125 cvals[1] = vals[1] / v; 126 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 127 PetscCall(MatScale(*GKins, 1. / v)); 128 #if defined(PRINT_GDET) 129 { 130 PetscViewer viewer; 131 char filename[256]; 132 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 133 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 134 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 135 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 136 PetscCall(MatView(GEc, viewer)); 137 PetscCall(PetscObjectSetName((PetscObject)(*GKins), "GK")); 138 PetscCall(MatView(*GKins, viewer)); 139 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 140 PetscCall(MatView(GEd, viewer)); 141 PetscCall(PetscViewerDestroy(&viewer)); 142 } 143 #endif 144 PetscCall(MatDestroy(&GEd)); 145 PetscCall(MatDestroy(&GEc)); 146 } 147 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 PetscErrorCode PCBDDCNedelecSupport(PC pc) 152 { 153 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 154 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 155 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 156 Vec tvec; 157 PetscSF sfv; 158 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 159 MPI_Comm comm; 160 IS lned, primals, allprimals, nedfieldlocal; 161 IS *eedges, *extrows, *extcols, *alleedges; 162 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 163 PetscScalar *vals, *work; 164 PetscReal *rwork; 165 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 166 PetscInt ne, nv, Lv, order, n, field; 167 PetscInt n_neigh, *neigh, *n_shared, **shared; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, singular, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 singular = PETSC_FALSE; 187 188 /* Command line customization */ 189 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 190 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 191 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL)); 192 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 193 /* print debug info TODO: to be removed */ 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 195 PetscOptionsEnd(); 196 197 /* Return if there are no edges in the decomposition and the problem is not singular */ 198 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 199 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 200 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 201 if (!singular) { 202 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 203 lrc[0] = PETSC_FALSE; 204 for (i = 0; i < n; i++) { 205 if (PetscRealPart(vals[i]) > 2.) { 206 lrc[0] = PETSC_TRUE; 207 break; 208 } 209 } 210 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 211 PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 212 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 215 /* Get Nedelec field */ 216 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 217 if (pcbddc->n_ISForDofsLocal && field >= 0) { 218 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 219 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 220 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 221 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 222 ne = n; 223 nedfieldlocal = NULL; 224 global = PETSC_TRUE; 225 } else if (field == PETSC_DECIDE) { 226 PetscInt rst, ren, *idx; 227 228 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 229 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 230 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 231 for (i = rst; i < ren; i++) { 232 PetscInt nc; 233 234 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 235 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 236 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 237 } 238 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 239 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 240 PetscCall(PetscMalloc1(n, &idx)); 241 for (i = 0, ne = 0; i < n; i++) 242 if (matis->sf_leafdata[i]) idx[ne++] = i; 243 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 244 } else { 245 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 246 } 247 248 /* Sanity checks */ 249 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 250 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 251 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 252 253 /* Just set primal dofs and return */ 254 if (setprimal) { 255 IS enedfieldlocal; 256 PetscInt *eidxs; 257 258 PetscCall(PetscMalloc1(ne, &eidxs)); 259 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 260 if (nedfieldlocal) { 261 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 262 for (i = 0, cum = 0; i < ne; i++) { 263 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 264 } 265 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 266 } else { 267 for (i = 0, cum = 0; i < ne; i++) { 268 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 269 } 270 } 271 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 272 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 273 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 274 PetscCall(PetscFree(eidxs)); 275 PetscCall(ISDestroy(&nedfieldlocal)); 276 PetscCall(ISDestroy(&enedfieldlocal)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 /* Compute some l2g maps */ 281 if (nedfieldlocal) { 282 IS is; 283 284 /* need to map from the local Nedelec field to local numbering */ 285 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 286 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 287 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 288 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 289 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 290 if (global) { 291 PetscCall(PetscObjectReference((PetscObject)al2g)); 292 el2g = al2g; 293 } else { 294 IS gis; 295 296 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 297 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 298 PetscCall(ISDestroy(&gis)); 299 } 300 PetscCall(ISDestroy(&is)); 301 } else { 302 /* restore default */ 303 pcbddc->nedfield = -1; 304 /* one ref for the destruction of al2g, one for el2g */ 305 PetscCall(PetscObjectReference((PetscObject)al2g)); 306 PetscCall(PetscObjectReference((PetscObject)al2g)); 307 el2g = al2g; 308 fl2g = NULL; 309 } 310 311 /* Start communication to drop connections for interior edges (for cc analysis only) */ 312 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 313 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 314 if (nedfieldlocal) { 315 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 316 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 317 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 318 } else { 319 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 320 } 321 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 322 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 323 324 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 325 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 326 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 327 if (global) { 328 PetscInt rst; 329 330 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 331 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 332 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 333 } 334 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 335 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 336 } else { 337 PetscInt *tbz; 338 339 PetscCall(PetscMalloc1(ne, &tbz)); 340 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 342 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 343 for (i = 0, cum = 0; i < ne; i++) 344 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 345 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 346 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 347 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 348 PetscCall(PetscFree(tbz)); 349 } 350 } else { /* we need the entire G to infer the nullspace */ 351 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 352 G = pcbddc->discretegradient; 353 } 354 355 /* Extract subdomain relevant rows of G */ 356 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 357 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 358 PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); 359 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 360 PetscCall(ISDestroy(&lned)); 361 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 362 PetscCall(MatDestroy(&lGall)); 363 PetscCall(MatISGetLocalMat(lGis, &lG)); 364 365 /* SF for nodal dofs communications */ 366 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 367 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 368 PetscCall(PetscObjectReference((PetscObject)vl2g)); 369 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 370 PetscCall(PetscSFCreate(comm, &sfv)); 371 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 372 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 373 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 374 i = singular ? 2 : 1; 375 PetscCall(PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots)); 376 377 /* Destroy temporary G created in MATIS format and modified G */ 378 PetscCall(PetscObjectReference((PetscObject)lG)); 379 PetscCall(MatDestroy(&lGis)); 380 PetscCall(MatDestroy(&G)); 381 382 if (print) { 383 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 384 PetscCall(MatView(lG, NULL)); 385 } 386 387 /* Save lG for values insertion in change of basis */ 388 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 389 390 /* Analyze the edge-nodes connections (duplicate lG) */ 391 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 392 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 393 PetscCall(PetscBTCreate(nv, &btv)); 394 PetscCall(PetscBTCreate(ne, &bte)); 395 PetscCall(PetscBTCreate(ne, &btb)); 396 PetscCall(PetscBTCreate(ne, &btbd)); 397 PetscCall(PetscBTCreate(nv, &btvcand)); 398 /* need to import the boundary specification to ensure the 399 proper detection of coarse edges' endpoints */ 400 if (pcbddc->DirichletBoundariesLocal) { 401 IS is; 402 403 if (fl2g) { 404 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 405 } else { 406 is = pcbddc->DirichletBoundariesLocal; 407 } 408 PetscCall(ISGetLocalSize(is, &cum)); 409 PetscCall(ISGetIndices(is, &idxs)); 410 for (i = 0; i < cum; i++) { 411 if (idxs[i] >= 0) { 412 PetscCall(PetscBTSet(btb, idxs[i])); 413 PetscCall(PetscBTSet(btbd, idxs[i])); 414 } 415 } 416 PetscCall(ISRestoreIndices(is, &idxs)); 417 if (fl2g) PetscCall(ISDestroy(&is)); 418 } 419 if (pcbddc->NeumannBoundariesLocal) { 420 IS is; 421 422 if (fl2g) { 423 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 424 } else { 425 is = pcbddc->NeumannBoundariesLocal; 426 } 427 PetscCall(ISGetLocalSize(is, &cum)); 428 PetscCall(ISGetIndices(is, &idxs)); 429 for (i = 0; i < cum; i++) { 430 if (idxs[i] >= 0) PetscCall(PetscBTSet(btb, idxs[i])); 431 } 432 PetscCall(ISRestoreIndices(is, &idxs)); 433 if (fl2g) PetscCall(ISDestroy(&is)); 434 } 435 436 /* Count neighs per dof */ 437 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs)); 438 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs)); 439 440 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 441 for proper detection of coarse edges' endpoints */ 442 PetscCall(PetscBTCreate(ne, &btee)); 443 for (i = 0; i < ne; i++) { 444 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 445 } 446 PetscCall(PetscMalloc1(ne, &marks)); 447 if (!conforming) { 448 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 449 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 450 } 451 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 452 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 453 cum = 0; 454 for (i = 0; i < ne; i++) { 455 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 456 if (!PetscBTLookup(btee, i)) { 457 marks[cum++] = i; 458 continue; 459 } 460 /* set badly connected edge dofs as primal */ 461 if (!conforming) { 462 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 463 marks[cum++] = i; 464 PetscCall(PetscBTSet(bte, i)); 465 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 466 } else { 467 /* every edge dofs should be connected through a certain number of nodal dofs 468 to other edge dofs belonging to coarse edges 469 - at most 2 endpoints 470 - order-1 interior nodal dofs 471 - no undefined nodal dofs (nconn < order) 472 */ 473 PetscInt ends = 0, ints = 0, undef = 0; 474 for (j = ii[i]; j < ii[i + 1]; j++) { 475 PetscInt v = jj[j], k; 476 PetscInt nconn = iit[v + 1] - iit[v]; 477 for (k = iit[v]; k < iit[v + 1]; k++) 478 if (!PetscBTLookup(btee, jjt[k])) nconn--; 479 if (nconn > order) ends++; 480 else if (nconn == order) ints++; 481 else undef++; 482 } 483 if (undef || ends > 2 || ints != order - 1) { 484 marks[cum++] = i; 485 PetscCall(PetscBTSet(bte, i)); 486 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 487 } 488 } 489 } 490 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 491 if (!order && ii[i + 1] != ii[i]) { 492 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 493 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 494 } 495 } 496 PetscCall(PetscBTDestroy(&btee)); 497 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 498 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 499 if (!conforming) { 500 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 501 PetscCall(MatDestroy(&lGt)); 502 } 503 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 504 505 /* identify splitpoints and corner candidates */ 506 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 507 if (print) { 508 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 509 PetscCall(MatView(lGe, NULL)); 510 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 511 PetscCall(MatView(lGt, NULL)); 512 } 513 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 514 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 515 for (i = 0; i < nv; i++) { 516 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 517 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 518 if (!order) { /* variable order */ 519 PetscReal vorder = 0.; 520 521 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 522 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 523 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 524 ord = 1; 525 } 526 PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord); 527 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 528 if (PetscBTLookup(btbd, jj[j])) { 529 bdir = PETSC_TRUE; 530 break; 531 } 532 if (vc != ecount[jj[j]]) { 533 sneighs = PETSC_FALSE; 534 } else { 535 PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]]; 536 for (k = 0; k < vc; k++) { 537 if (vn[k] != en[k]) { 538 sneighs = PETSC_FALSE; 539 break; 540 } 541 } 542 } 543 } 544 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 545 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 546 PetscCall(PetscBTSet(btv, i)); 547 } else if (test == ord) { 548 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 549 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 550 PetscCall(PetscBTSet(btv, i)); 551 } else { 552 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 553 PetscCall(PetscBTSet(btvcand, i)); 554 } 555 } 556 } 557 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 558 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 559 PetscCall(PetscBTDestroy(&btbd)); 560 561 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 562 if (order != 1) { 563 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 564 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 565 for (i = 0; i < nv; i++) { 566 if (PetscBTLookup(btvcand, i)) { 567 PetscBool found = PETSC_FALSE; 568 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 569 PetscInt k, e = jj[j]; 570 if (PetscBTLookup(bte, e)) continue; 571 for (k = iit[e]; k < iit[e + 1]; k++) { 572 PetscInt v = jjt[k]; 573 if (v != i && PetscBTLookup(btvcand, v)) { 574 found = PETSC_TRUE; 575 break; 576 } 577 } 578 } 579 if (!found) { 580 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 581 PetscCall(PetscBTClear(btvcand, i)); 582 } else { 583 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 584 } 585 } 586 } 587 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 588 } 589 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 590 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 591 PetscCall(MatDestroy(&lGe)); 592 593 /* Get the local G^T explicitly */ 594 PetscCall(MatDestroy(&lGt)); 595 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 596 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 597 598 /* Mark interior nodal dofs */ 599 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 600 PetscCall(PetscBTCreate(nv, &btvi)); 601 for (i = 1; i < n_neigh; i++) { 602 for (j = 0; j < n_shared[i]; j++) PetscCall(PetscBTSet(btvi, shared[i][j])); 603 } 604 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 605 606 /* communicate corners and splitpoints */ 607 PetscCall(PetscMalloc1(nv, &vmarks)); 608 PetscCall(PetscArrayzero(sfvleaves, nv)); 609 PetscCall(PetscArrayzero(sfvroots, Lv)); 610 for (i = 0; i < nv; i++) 611 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 612 613 if (print) { 614 IS tbz; 615 616 cum = 0; 617 for (i = 0; i < nv; i++) 618 if (sfvleaves[i]) vmarks[cum++] = i; 619 620 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 621 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 622 PetscCall(ISView(tbz, NULL)); 623 PetscCall(ISDestroy(&tbz)); 624 } 625 626 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 627 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 628 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 629 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 630 631 /* Zero rows of lGt corresponding to identified corners 632 and interior nodal dofs */ 633 cum = 0; 634 for (i = 0; i < nv; i++) { 635 if (sfvleaves[i]) { 636 vmarks[cum++] = i; 637 PetscCall(PetscBTSet(btv, i)); 638 } 639 if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 640 } 641 PetscCall(PetscBTDestroy(&btvi)); 642 if (print) { 643 IS tbz; 644 645 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 646 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 647 PetscCall(ISView(tbz, NULL)); 648 PetscCall(ISDestroy(&tbz)); 649 } 650 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 651 PetscCall(PetscFree(vmarks)); 652 PetscCall(PetscSFDestroy(&sfv)); 653 PetscCall(PetscFree2(sfvleaves, sfvroots)); 654 655 /* Recompute G */ 656 PetscCall(MatDestroy(&lG)); 657 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 658 if (print) { 659 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 660 PetscCall(MatView(lG, NULL)); 661 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 662 PetscCall(MatView(lGt, NULL)); 663 } 664 665 /* Get primal dofs (if any) */ 666 cum = 0; 667 for (i = 0; i < ne; i++) { 668 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 669 } 670 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 671 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 672 if (print) { 673 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 674 PetscCall(ISView(primals, NULL)); 675 } 676 PetscCall(PetscBTDestroy(&bte)); 677 /* TODO: what if the user passed in some of them ? */ 678 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 679 PetscCall(ISDestroy(&primals)); 680 681 /* Compute edge connectivity */ 682 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 683 684 /* Symbolic conn = lG*lGt */ 685 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 686 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 687 PetscCall(MatProductSetAlgorithm(conn, "default")); 688 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 689 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 690 PetscCall(MatProductSetFromOptions(conn)); 691 PetscCall(MatProductSymbolic(conn)); 692 693 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 694 if (fl2g) { 695 PetscBT btf; 696 PetscInt *iia, *jja, *iiu, *jju; 697 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 698 699 /* create CSR for all local dofs */ 700 PetscCall(PetscMalloc1(n + 1, &iia)); 701 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 702 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 703 iiu = pcbddc->mat_graph->xadj; 704 jju = pcbddc->mat_graph->adjncy; 705 } else if (pcbddc->use_local_adj) { 706 rest = PETSC_TRUE; 707 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 708 } else { 709 free = PETSC_TRUE; 710 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 711 iiu[0] = 0; 712 for (i = 0; i < n; i++) { 713 iiu[i + 1] = i + 1; 714 jju[i] = -1; 715 } 716 } 717 718 /* import sizes of CSR */ 719 iia[0] = 0; 720 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 721 722 /* overwrite entries corresponding to the Nedelec field */ 723 PetscCall(PetscBTCreate(n, &btf)); 724 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 725 for (i = 0; i < ne; i++) { 726 PetscCall(PetscBTSet(btf, idxs[i])); 727 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 728 } 729 730 /* iia in CSR */ 731 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 732 733 /* jja in CSR */ 734 PetscCall(PetscMalloc1(iia[n], &jja)); 735 for (i = 0; i < n; i++) 736 if (!PetscBTLookup(btf, i)) 737 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 738 739 /* map edge dofs connectivity */ 740 if (jj) { 741 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 742 for (i = 0; i < ne; i++) { 743 PetscInt e = idxs[i]; 744 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 745 } 746 } 747 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 748 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER)); 749 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 750 if (free) PetscCall(PetscFree2(iiu, jju)); 751 PetscCall(PetscBTDestroy(&btf)); 752 } else { 753 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER)); 754 } 755 756 /* Analyze interface for edge dofs */ 757 PetscCall(PCBDDCAnalyzeInterface(pc)); 758 pcbddc->mat_graph->twodim = PETSC_FALSE; 759 760 /* Get coarse edges in the edge space */ 761 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 762 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 763 764 if (fl2g) { 765 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 766 PetscCall(PetscMalloc1(nee, &eedges)); 767 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 768 } else { 769 eedges = alleedges; 770 primals = allprimals; 771 } 772 773 /* Mark fine edge dofs with their coarse edge id */ 774 PetscCall(PetscArrayzero(marks, ne)); 775 PetscCall(ISGetLocalSize(primals, &cum)); 776 PetscCall(ISGetIndices(primals, &idxs)); 777 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 778 PetscCall(ISRestoreIndices(primals, &idxs)); 779 if (print) { 780 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 781 PetscCall(ISView(primals, NULL)); 782 } 783 784 maxsize = 0; 785 for (i = 0; i < nee; i++) { 786 PetscInt size, mark = i + 1; 787 788 PetscCall(ISGetLocalSize(eedges[i], &size)); 789 PetscCall(ISGetIndices(eedges[i], &idxs)); 790 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 791 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 792 maxsize = PetscMax(maxsize, size); 793 } 794 795 /* Find coarse edge endpoints */ 796 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 797 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 798 for (i = 0; i < nee; i++) { 799 PetscInt mark = i + 1, size; 800 801 PetscCall(ISGetLocalSize(eedges[i], &size)); 802 if (!size && nedfieldlocal) continue; 803 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 804 PetscCall(ISGetIndices(eedges[i], &idxs)); 805 if (print) { 806 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 807 PetscCall(ISView(eedges[i], NULL)); 808 } 809 for (j = 0; j < size; j++) { 810 PetscInt k, ee = idxs[j]; 811 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 812 for (k = ii[ee]; k < ii[ee + 1]; k++) { 813 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 814 if (PetscBTLookup(btv, jj[k])) { 815 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 816 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 817 PetscInt k2; 818 PetscBool corner = PETSC_FALSE; 819 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 820 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 821 /* it's a corner if either is connected with an edge dof belonging to a different cc or 822 if the edge dof lie on the natural part of the boundary */ 823 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 824 corner = PETSC_TRUE; 825 break; 826 } 827 } 828 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 829 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 830 PetscCall(PetscBTSet(btv, jj[k])); 831 } else { 832 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 833 } 834 } 835 } 836 } 837 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 838 } 839 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 840 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 841 PetscCall(PetscBTDestroy(&btb)); 842 843 /* Reset marked primal dofs */ 844 PetscCall(ISGetLocalSize(primals, &cum)); 845 PetscCall(ISGetIndices(primals, &idxs)); 846 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 847 PetscCall(ISRestoreIndices(primals, &idxs)); 848 849 /* Now use the initial lG */ 850 PetscCall(MatDestroy(&lG)); 851 PetscCall(MatDestroy(&lGt)); 852 lG = lGinit; 853 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 854 855 /* Compute extended cols indices */ 856 PetscCall(PetscBTCreate(nv, &btvc)); 857 PetscCall(PetscBTCreate(nee, &bter)); 858 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 859 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 860 i *= maxsize; 861 PetscCall(PetscCalloc1(nee, &extcols)); 862 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 863 eerr = PETSC_FALSE; 864 for (i = 0; i < nee; i++) { 865 PetscInt size, found = 0; 866 867 cum = 0; 868 PetscCall(ISGetLocalSize(eedges[i], &size)); 869 if (!size && nedfieldlocal) continue; 870 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 871 PetscCall(ISGetIndices(eedges[i], &idxs)); 872 PetscCall(PetscBTMemzero(nv, btvc)); 873 for (j = 0; j < size; j++) { 874 PetscInt k, ee = idxs[j]; 875 for (k = ii[ee]; k < ii[ee + 1]; k++) { 876 PetscInt vv = jj[k]; 877 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 878 else if (!PetscBTLookupSet(btvc, vv)) found++; 879 } 880 } 881 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 882 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 883 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 884 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 885 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 886 /* it may happen that endpoints are not defined at this point 887 if it is the case, mark this edge for a second pass */ 888 if (cum != size - 1 || found != 2) { 889 PetscCall(PetscBTSet(bter, i)); 890 if (print) { 891 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 892 PetscCall(ISView(eedges[i], NULL)); 893 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 894 PetscCall(ISView(extcols[i], NULL)); 895 } 896 eerr = PETSC_TRUE; 897 } 898 } 899 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 900 PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 901 if (done) { 902 PetscInt *newprimals; 903 904 PetscCall(PetscMalloc1(ne, &newprimals)); 905 PetscCall(ISGetLocalSize(primals, &cum)); 906 PetscCall(ISGetIndices(primals, &idxs)); 907 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 908 PetscCall(ISRestoreIndices(primals, &idxs)); 909 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 910 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 911 for (i = 0; i < nee; i++) { 912 PetscBool has_candidates = PETSC_FALSE; 913 if (PetscBTLookup(bter, i)) { 914 PetscInt size, mark = i + 1; 915 916 PetscCall(ISGetLocalSize(eedges[i], &size)); 917 PetscCall(ISGetIndices(eedges[i], &idxs)); 918 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 919 for (j = 0; j < size; j++) { 920 PetscInt k, ee = idxs[j]; 921 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 922 for (k = ii[ee]; k < ii[ee + 1]; k++) { 923 /* set all candidates located on the edge as corners */ 924 if (PetscBTLookup(btvcand, jj[k])) { 925 PetscInt k2, vv = jj[k]; 926 has_candidates = PETSC_TRUE; 927 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 928 PetscCall(PetscBTSet(btv, vv)); 929 /* set all edge dofs connected to candidate as primals */ 930 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 931 if (marks[jjt[k2]] == mark) { 932 PetscInt k3, ee2 = jjt[k2]; 933 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 934 newprimals[cum++] = ee2; 935 /* finally set the new corners */ 936 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 937 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 938 PetscCall(PetscBTSet(btv, jj[k3])); 939 } 940 } 941 } 942 } else { 943 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 944 } 945 } 946 } 947 if (!has_candidates) { /* circular edge */ 948 PetscInt k, ee = idxs[0], *tmarks; 949 950 PetscCall(PetscCalloc1(ne, &tmarks)); 951 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 952 for (k = ii[ee]; k < ii[ee + 1]; k++) { 953 PetscInt k2; 954 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 955 PetscCall(PetscBTSet(btv, jj[k])); 956 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 957 } 958 for (j = 0; j < size; j++) { 959 if (tmarks[idxs[j]] > 1) { 960 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 961 newprimals[cum++] = idxs[j]; 962 } 963 } 964 PetscCall(PetscFree(tmarks)); 965 } 966 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 967 } 968 PetscCall(ISDestroy(&extcols[i])); 969 } 970 PetscCall(PetscFree(extcols)); 971 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 972 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 973 if (fl2g) { 974 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 975 PetscCall(ISDestroy(&primals)); 976 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 977 PetscCall(PetscFree(eedges)); 978 } 979 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 980 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 981 PetscCall(PetscFree(newprimals)); 982 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 983 PetscCall(ISDestroy(&primals)); 984 PetscCall(PCBDDCAnalyzeInterface(pc)); 985 pcbddc->mat_graph->twodim = PETSC_FALSE; 986 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 987 if (fl2g) { 988 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 989 PetscCall(PetscMalloc1(nee, &eedges)); 990 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 991 } else { 992 eedges = alleedges; 993 primals = allprimals; 994 } 995 PetscCall(PetscCalloc1(nee, &extcols)); 996 997 /* Mark again */ 998 PetscCall(PetscArrayzero(marks, ne)); 999 for (i = 0; i < nee; i++) { 1000 PetscInt size, mark = i + 1; 1001 1002 PetscCall(ISGetLocalSize(eedges[i], &size)); 1003 PetscCall(ISGetIndices(eedges[i], &idxs)); 1004 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1005 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1006 } 1007 if (print) { 1008 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1009 PetscCall(ISView(primals, NULL)); 1010 } 1011 1012 /* Recompute extended cols */ 1013 eerr = PETSC_FALSE; 1014 for (i = 0; i < nee; i++) { 1015 PetscInt size; 1016 1017 cum = 0; 1018 PetscCall(ISGetLocalSize(eedges[i], &size)); 1019 if (!size && nedfieldlocal) continue; 1020 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1021 PetscCall(ISGetIndices(eedges[i], &idxs)); 1022 for (j = 0; j < size; j++) { 1023 PetscInt k, ee = idxs[j]; 1024 for (k = ii[ee]; k < ii[ee + 1]; k++) 1025 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1026 } 1027 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1028 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1029 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1030 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1031 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1032 if (cum != size - 1) { 1033 if (print) { 1034 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1035 PetscCall(ISView(eedges[i], NULL)); 1036 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1037 PetscCall(ISView(extcols[i], NULL)); 1038 } 1039 eerr = PETSC_TRUE; 1040 } 1041 } 1042 } 1043 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1044 PetscCall(PetscFree2(extrow, gidxs)); 1045 PetscCall(PetscBTDestroy(&bter)); 1046 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1047 /* an error should not occur at this point */ 1048 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1049 1050 /* Check the number of endpoints */ 1051 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1052 PetscCall(PetscMalloc1(2 * nee, &corners)); 1053 PetscCall(PetscMalloc1(nee, &cedges)); 1054 for (i = 0; i < nee; i++) { 1055 PetscInt size, found = 0, gc[2]; 1056 1057 /* init with defaults */ 1058 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1059 PetscCall(ISGetLocalSize(eedges[i], &size)); 1060 if (!size && nedfieldlocal) continue; 1061 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1062 PetscCall(ISGetIndices(eedges[i], &idxs)); 1063 PetscCall(PetscBTMemzero(nv, btvc)); 1064 for (j = 0; j < size; j++) { 1065 PetscInt k, ee = idxs[j]; 1066 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1067 PetscInt vv = jj[k]; 1068 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1069 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more then two corners for edge %" PetscInt_FMT, i); 1070 corners[i * 2 + found++] = vv; 1071 } 1072 } 1073 } 1074 if (found != 2) { 1075 PetscInt e; 1076 if (fl2g) { 1077 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1078 } else { 1079 e = idxs[0]; 1080 } 1081 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1082 } 1083 1084 /* get primal dof index on this coarse edge */ 1085 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1086 if (gc[0] > gc[1]) { 1087 PetscInt swap = corners[2 * i]; 1088 corners[2 * i] = corners[2 * i + 1]; 1089 corners[2 * i + 1] = swap; 1090 } 1091 cedges[i] = idxs[size - 1]; 1092 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1093 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1094 } 1095 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1096 PetscCall(PetscBTDestroy(&btvc)); 1097 1098 if (PetscDefined(USE_DEBUG)) { 1099 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1100 not interfere with neighbouring coarse edges */ 1101 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1102 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1103 for (i = 0; i < nv; i++) { 1104 PetscInt emax = 0, eemax = 0; 1105 1106 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1107 PetscCall(PetscArrayzero(emarks, nee + 1)); 1108 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1109 for (j = 1; j < nee + 1; j++) { 1110 if (emax < emarks[j]) { 1111 emax = emarks[j]; 1112 eemax = j; 1113 } 1114 } 1115 /* not relevant for edges */ 1116 if (!eemax) continue; 1117 1118 for (j = ii[i]; j < ii[i + 1]; j++) { 1119 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1120 } 1121 } 1122 PetscCall(PetscFree(emarks)); 1123 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1124 } 1125 1126 /* Compute extended rows indices for edge blocks of the change of basis */ 1127 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1128 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1129 extmem *= maxsize; 1130 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1131 PetscCall(PetscMalloc1(nee, &extrows)); 1132 PetscCall(PetscCalloc1(nee, &extrowcum)); 1133 for (i = 0; i < nv; i++) { 1134 PetscInt mark = 0, size, start; 1135 1136 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1137 for (j = ii[i]; j < ii[i + 1]; j++) 1138 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1139 1140 /* not relevant */ 1141 if (!mark) continue; 1142 1143 /* import extended row */ 1144 mark--; 1145 start = mark * extmem + extrowcum[mark]; 1146 size = ii[i + 1] - ii[i]; 1147 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1148 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1149 extrowcum[mark] += size; 1150 } 1151 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1152 PetscCall(MatDestroy(&lGt)); 1153 PetscCall(PetscFree(marks)); 1154 1155 /* Compress extrows */ 1156 cum = 0; 1157 for (i = 0; i < nee; i++) { 1158 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1159 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1160 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1161 cum = PetscMax(cum, size); 1162 } 1163 PetscCall(PetscFree(extrowcum)); 1164 PetscCall(PetscBTDestroy(&btv)); 1165 PetscCall(PetscBTDestroy(&btvcand)); 1166 1167 /* Workspace for lapack inner calls and VecSetValues */ 1168 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1169 1170 /* Create change of basis matrix (preallocation can be improved) */ 1171 PetscCall(MatCreate(comm, &T)); 1172 PetscCall(MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N)); 1173 PetscCall(MatSetType(T, MATAIJ)); 1174 PetscCall(MatSeqAIJSetPreallocation(T, 10, NULL)); 1175 PetscCall(MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL)); 1176 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1177 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1178 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1179 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1180 1181 /* Defaults to identity */ 1182 PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL)); 1183 PetscCall(VecSet(tvec, 1.0)); 1184 PetscCall(MatDiagonalSet(T, tvec, INSERT_VALUES)); 1185 PetscCall(VecDestroy(&tvec)); 1186 1187 /* Create discrete gradient for the coarser level if needed */ 1188 PetscCall(MatDestroy(&pcbddc->nedcG)); 1189 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1190 if (pcbddc->current_level < pcbddc->max_levels) { 1191 ISLocalToGlobalMapping cel2g, cvl2g; 1192 IS wis, gwis; 1193 PetscInt cnv, cne; 1194 1195 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1196 if (fl2g) { 1197 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1198 } else { 1199 PetscCall(PetscObjectReference((PetscObject)wis)); 1200 pcbddc->nedclocal = wis; 1201 } 1202 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1203 PetscCall(ISDestroy(&wis)); 1204 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1205 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1206 PetscCall(ISDestroy(&wis)); 1207 PetscCall(ISDestroy(&gwis)); 1208 1209 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1210 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1211 PetscCall(ISDestroy(&wis)); 1212 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1213 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1214 PetscCall(ISDestroy(&wis)); 1215 PetscCall(ISDestroy(&gwis)); 1216 1217 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1218 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1219 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1220 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1221 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1222 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1223 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1224 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1225 } 1226 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1227 1228 #if defined(PRINT_GDET) 1229 inc = 0; 1230 lev = pcbddc->current_level; 1231 #endif 1232 1233 /* Insert values in the change of basis matrix */ 1234 for (i = 0; i < nee; i++) { 1235 Mat Gins = NULL, GKins = NULL; 1236 IS cornersis = NULL; 1237 PetscScalar cvals[2]; 1238 1239 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1240 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1241 if (Gins && GKins) { 1242 const PetscScalar *data; 1243 const PetscInt *rows, *cols; 1244 PetscInt nrh, nch, nrc, ncc; 1245 1246 PetscCall(ISGetIndices(eedges[i], &cols)); 1247 /* H1 */ 1248 PetscCall(ISGetIndices(extrows[i], &rows)); 1249 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1250 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1251 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1252 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1253 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1254 /* complement */ 1255 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1256 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1257 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1258 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1259 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1260 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1261 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1262 1263 /* coarse discrete gradient */ 1264 if (pcbddc->nedcG) { 1265 PetscInt cols[2]; 1266 1267 cols[0] = 2 * i; 1268 cols[1] = 2 * i + 1; 1269 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1270 } 1271 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1272 } 1273 PetscCall(ISDestroy(&extrows[i])); 1274 PetscCall(ISDestroy(&extcols[i])); 1275 PetscCall(ISDestroy(&cornersis)); 1276 PetscCall(MatDestroy(&Gins)); 1277 PetscCall(MatDestroy(&GKins)); 1278 } 1279 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1280 1281 /* Start assembling */ 1282 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1283 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1284 1285 /* Free */ 1286 if (fl2g) { 1287 PetscCall(ISDestroy(&primals)); 1288 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1289 PetscCall(PetscFree(eedges)); 1290 } 1291 1292 /* hack mat_graph with primal dofs on the coarse edges */ 1293 { 1294 PCBDDCGraph graph = pcbddc->mat_graph; 1295 PetscInt *oqueue = graph->queue; 1296 PetscInt *ocptr = graph->cptr; 1297 PetscInt ncc, *idxs; 1298 1299 /* find first primal edge */ 1300 if (pcbddc->nedclocal) { 1301 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1302 } else { 1303 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1304 idxs = cedges; 1305 } 1306 cum = 0; 1307 while (cum < nee && cedges[cum] < 0) cum++; 1308 1309 /* adapt connected components */ 1310 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1311 graph->cptr[0] = 0; 1312 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1313 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1314 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1315 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1316 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1317 ncc++; 1318 lc--; 1319 cum++; 1320 while (cum < nee && cedges[cum] < 0) cum++; 1321 } 1322 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1323 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1324 ncc++; 1325 } 1326 graph->ncc = ncc; 1327 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1328 PetscCall(PetscFree2(ocptr, oqueue)); 1329 } 1330 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1331 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1332 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1333 PetscCall(MatDestroy(&conn)); 1334 1335 PetscCall(ISDestroy(&nedfieldlocal)); 1336 PetscCall(PetscFree(extrow)); 1337 PetscCall(PetscFree2(work, rwork)); 1338 PetscCall(PetscFree(corners)); 1339 PetscCall(PetscFree(cedges)); 1340 PetscCall(PetscFree(extrows)); 1341 PetscCall(PetscFree(extcols)); 1342 PetscCall(MatDestroy(&lG)); 1343 1344 /* Complete assembling */ 1345 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1346 if (pcbddc->nedcG) { 1347 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1348 #if 0 1349 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1350 PetscCall(MatView(pcbddc->nedcG,NULL)); 1351 #endif 1352 } 1353 1354 /* set change of basis */ 1355 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, singular)); 1356 PetscCall(MatDestroy(&T)); 1357 1358 PetscFunctionReturn(PETSC_SUCCESS); 1359 } 1360 1361 /* the near-null space of BDDC carries information on quadrature weights, 1362 and these can be collinear -> so cheat with MatNullSpaceCreate 1363 and create a suitable set of basis vectors first */ 1364 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1365 { 1366 PetscInt i; 1367 1368 PetscFunctionBegin; 1369 for (i = 0; i < nvecs; i++) { 1370 PetscInt first, last; 1371 1372 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1373 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1374 if (i >= first && i < last) { 1375 PetscScalar *data; 1376 PetscCall(VecGetArray(quad_vecs[i], &data)); 1377 if (!has_const) { 1378 data[i - first] = 1.; 1379 } else { 1380 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1381 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1382 } 1383 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1384 } 1385 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1386 } 1387 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1388 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1389 PetscInt first, last; 1390 PetscCall(VecLockReadPop(quad_vecs[i])); 1391 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1392 if (i >= first && i < last) { 1393 PetscScalar *data; 1394 PetscCall(VecGetArray(quad_vecs[i], &data)); 1395 if (!has_const) { 1396 data[i - first] = 0.; 1397 } else { 1398 data[2 * i - first] = 0.; 1399 data[2 * i - first + 1] = 0.; 1400 } 1401 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1402 } 1403 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1404 PetscCall(VecLockReadPush(quad_vecs[i])); 1405 } 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1410 { 1411 Mat loc_divudotp; 1412 Vec p, v, vins, quad_vec, *quad_vecs; 1413 ISLocalToGlobalMapping map; 1414 PetscScalar *vals; 1415 const PetscScalar *array; 1416 PetscInt i, maxneighs = 0, maxsize, *gidxs; 1417 PetscInt n_neigh, *neigh, *n_shared, **shared; 1418 PetscMPIInt rank; 1419 1420 PetscFunctionBegin; 1421 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1422 for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs); 1423 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A))); 1424 if (!maxneighs) { 1425 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1426 *nnsp = NULL; 1427 PetscFunctionReturn(PETSC_SUCCESS); 1428 } 1429 maxsize = 0; 1430 for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize); 1431 PetscCall(PetscMalloc2(maxsize, &gidxs, maxsize, &vals)); 1432 /* create vectors to hold quadrature weights */ 1433 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1434 if (!transpose) { 1435 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1436 } else { 1437 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1438 } 1439 PetscCall(VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs)); 1440 PetscCall(VecDestroy(&quad_vec)); 1441 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp)); 1442 for (i = 0; i < maxneighs; i++) PetscCall(VecLockReadPop(quad_vecs[i])); 1443 1444 /* compute local quad vec */ 1445 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1446 if (!transpose) { 1447 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1448 } else { 1449 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1450 } 1451 PetscCall(VecSet(p, 1.)); 1452 if (!transpose) { 1453 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1454 } else { 1455 PetscCall(MatMult(loc_divudotp, p, v)); 1456 } 1457 if (vl2l) { 1458 Mat lA; 1459 VecScatter sc; 1460 1461 PetscCall(MatISGetLocalMat(A, &lA)); 1462 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1463 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1464 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1466 PetscCall(VecScatterDestroy(&sc)); 1467 } else { 1468 vins = v; 1469 } 1470 PetscCall(VecGetArrayRead(vins, &array)); 1471 PetscCall(VecDestroy(&p)); 1472 1473 /* insert in global quadrature vecs */ 1474 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1475 for (i = 1; i < n_neigh; i++) { 1476 const PetscInt *idxs; 1477 PetscInt idx, nn, j; 1478 1479 idxs = shared[i]; 1480 nn = n_shared[i]; 1481 for (j = 0; j < nn; j++) vals[j] = array[idxs[j]]; 1482 PetscCall(PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx)); 1483 idx = -(idx + 1); 1484 PetscCheck(idx >= 0 && idx < maxneighs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")", idx, maxneighs); 1485 PetscCall(ISLocalToGlobalMappingApply(map, nn, idxs, gidxs)); 1486 PetscCall(VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES)); 1487 } 1488 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1489 PetscCall(VecRestoreArrayRead(vins, &array)); 1490 if (vl2l) PetscCall(VecDestroy(&vins)); 1491 PetscCall(VecDestroy(&v)); 1492 PetscCall(PetscFree2(gidxs, vals)); 1493 1494 /* assemble near null space */ 1495 for (i = 0; i < maxneighs; i++) PetscCall(VecAssemblyBegin(quad_vecs[i])); 1496 for (i = 0; i < maxneighs; i++) { 1497 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1498 PetscCall(VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view")); 1499 PetscCall(VecLockReadPush(quad_vecs[i])); 1500 } 1501 PetscCall(VecDestroyVecs(maxneighs, &quad_vecs)); 1502 PetscFunctionReturn(PETSC_SUCCESS); 1503 } 1504 1505 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1506 { 1507 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1508 1509 PetscFunctionBegin; 1510 if (primalv) { 1511 if (pcbddc->user_primal_vertices_local) { 1512 IS list[2], newp; 1513 1514 list[0] = primalv; 1515 list[1] = pcbddc->user_primal_vertices_local; 1516 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1517 PetscCall(ISSortRemoveDups(newp)); 1518 PetscCall(ISDestroy(&list[1])); 1519 pcbddc->user_primal_vertices_local = newp; 1520 } else { 1521 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1522 } 1523 } 1524 PetscFunctionReturn(PETSC_SUCCESS); 1525 } 1526 1527 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1528 { 1529 PetscInt f, *comp = (PetscInt *)ctx; 1530 1531 PetscFunctionBegin; 1532 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1533 PetscFunctionReturn(PETSC_SUCCESS); 1534 } 1535 1536 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1537 { 1538 Vec local, global; 1539 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1540 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1541 PetscBool monolithic = PETSC_FALSE; 1542 1543 PetscFunctionBegin; 1544 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1545 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1546 PetscOptionsEnd(); 1547 /* need to convert from global to local topology information and remove references to information in global ordering */ 1548 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1549 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1550 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1551 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1552 if (monolithic) { /* just get block size to properly compute vertices */ 1553 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1554 goto boundary; 1555 } 1556 1557 if (pcbddc->user_provided_isfordofs) { 1558 if (pcbddc->n_ISForDofs) { 1559 PetscInt i; 1560 1561 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1562 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1563 PetscInt bs; 1564 1565 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1566 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1567 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1568 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1569 } 1570 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1571 pcbddc->n_ISForDofs = 0; 1572 PetscCall(PetscFree(pcbddc->ISForDofs)); 1573 } 1574 } else { 1575 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1576 DM dm; 1577 1578 PetscCall(MatGetDM(pc->pmat, &dm)); 1579 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1580 if (dm) { 1581 IS *fields; 1582 PetscInt nf, i; 1583 1584 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1585 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1586 for (i = 0; i < nf; i++) { 1587 PetscInt bs; 1588 1589 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1590 PetscCall(ISGetBlockSize(fields[i], &bs)); 1591 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1592 PetscCall(ISDestroy(&fields[i])); 1593 } 1594 PetscCall(PetscFree(fields)); 1595 pcbddc->n_ISForDofsLocal = nf; 1596 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1597 PetscContainer c; 1598 1599 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1600 if (c) { 1601 MatISLocalFields lf; 1602 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1603 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1604 } else { /* fallback, create the default fields if bs > 1 */ 1605 PetscInt i, n = matis->A->rmap->n; 1606 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1607 if (i > 1) { 1608 pcbddc->n_ISForDofsLocal = i; 1609 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1610 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1611 } 1612 } 1613 } 1614 } else { 1615 PetscInt i; 1616 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1617 } 1618 } 1619 1620 boundary: 1621 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1622 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1623 } else if (pcbddc->DirichletBoundariesLocal) { 1624 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1625 } 1626 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1627 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1628 } else if (pcbddc->NeumannBoundariesLocal) { 1629 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1630 } 1631 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1632 PetscCall(VecDestroy(&global)); 1633 PetscCall(VecDestroy(&local)); 1634 /* detect local disconnected subdomains if requested (use matis->A) */ 1635 if (pcbddc->detect_disconnected) { 1636 IS primalv = NULL; 1637 PetscInt i; 1638 PetscBool filter = pcbddc->detect_disconnected_filter; 1639 1640 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1641 PetscCall(PetscFree(pcbddc->local_subs)); 1642 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1643 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1644 PetscCall(ISDestroy(&primalv)); 1645 } 1646 /* early stage corner detection */ 1647 { 1648 DM dm; 1649 1650 PetscCall(MatGetDM(pc->pmat, &dm)); 1651 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1652 if (dm) { 1653 PetscBool isda; 1654 1655 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1656 if (isda) { 1657 ISLocalToGlobalMapping l2l; 1658 IS corners; 1659 Mat lA; 1660 PetscBool gl, lo; 1661 1662 { 1663 Vec cvec; 1664 const PetscScalar *coords; 1665 PetscInt dof, n, cdim; 1666 PetscBool memc = PETSC_TRUE; 1667 1668 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1669 PetscCall(DMGetCoordinates(dm, &cvec)); 1670 PetscCall(VecGetLocalSize(cvec, &n)); 1671 PetscCall(VecGetBlockSize(cvec, &cdim)); 1672 n /= cdim; 1673 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1674 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1675 PetscCall(VecGetArrayRead(cvec, &coords)); 1676 #if defined(PETSC_USE_COMPLEX) 1677 memc = PETSC_FALSE; 1678 #endif 1679 if (dof != 1) memc = PETSC_FALSE; 1680 if (memc) { 1681 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1682 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1683 PetscReal *bcoords = pcbddc->mat_graph->coords; 1684 PetscInt i, b, d; 1685 1686 for (i = 0; i < n; i++) { 1687 for (b = 0; b < dof; b++) { 1688 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1689 } 1690 } 1691 } 1692 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1693 pcbddc->mat_graph->cdim = cdim; 1694 pcbddc->mat_graph->cnloc = dof * n; 1695 pcbddc->mat_graph->cloc = PETSC_FALSE; 1696 } 1697 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1698 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1699 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1700 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1701 lo = (PetscBool)(l2l && corners); 1702 PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1703 if (gl) { /* From PETSc's DMDA */ 1704 const PetscInt *idx; 1705 PetscInt dof, bs, *idxout, n; 1706 1707 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1708 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1709 PetscCall(ISGetLocalSize(corners, &n)); 1710 PetscCall(ISGetIndices(corners, &idx)); 1711 if (bs == dof) { 1712 PetscCall(PetscMalloc1(n, &idxout)); 1713 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1714 } else { /* the original DMDA local-to-local map have been modified */ 1715 PetscInt i, d; 1716 1717 PetscCall(PetscMalloc1(dof * n, &idxout)); 1718 for (i = 0; i < n; i++) 1719 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1720 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1721 1722 bs = 1; 1723 n *= dof; 1724 } 1725 PetscCall(ISRestoreIndices(corners, &idx)); 1726 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1727 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1728 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1729 PetscCall(ISDestroy(&corners)); 1730 pcbddc->corner_selected = PETSC_TRUE; 1731 pcbddc->corner_selection = PETSC_TRUE; 1732 } 1733 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1734 } 1735 } 1736 } 1737 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1738 DM dm; 1739 1740 PetscCall(MatGetDM(pc->pmat, &dm)); 1741 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1742 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1743 Vec vcoords; 1744 PetscSection section; 1745 PetscReal *coords; 1746 PetscInt d, cdim, nl, nf, **ctxs; 1747 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1748 /* debug coordinates */ 1749 PetscViewer viewer; 1750 PetscBool flg; 1751 PetscViewerFormat format; 1752 const char *prefix; 1753 1754 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1755 PetscCall(DMGetLocalSection(dm, §ion)); 1756 PetscCall(PetscSectionGetNumFields(section, &nf)); 1757 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1758 PetscCall(VecGetLocalSize(vcoords, &nl)); 1759 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1760 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1761 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1762 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1763 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1764 1765 /* debug coordinates */ 1766 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1767 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1768 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1769 for (d = 0; d < cdim; d++) { 1770 PetscInt i; 1771 const PetscScalar *v; 1772 char name[16]; 1773 1774 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1775 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d)); 1776 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1777 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1778 if (flg) PetscCall(VecView(vcoords, viewer)); 1779 PetscCall(VecGetArrayRead(vcoords, &v)); 1780 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1781 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1782 } 1783 PetscCall(VecDestroy(&vcoords)); 1784 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1785 PetscCall(PetscFree(coords)); 1786 PetscCall(PetscFree(ctxs[0])); 1787 PetscCall(PetscFree2(funcs, ctxs)); 1788 if (flg) { 1789 PetscCall(PetscViewerPopFormat(viewer)); 1790 PetscCall(PetscViewerDestroy(&viewer)); 1791 } 1792 } 1793 } 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1798 { 1799 Mat_IS *matis = (Mat_IS *)(pc->pmat->data); 1800 IS nis; 1801 const PetscInt *idxs; 1802 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1803 1804 PetscFunctionBegin; 1805 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1806 if (mop == MPI_LAND) { 1807 /* init rootdata with true */ 1808 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1809 } else { 1810 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1811 } 1812 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1813 PetscCall(ISGetLocalSize(*is, &nd)); 1814 PetscCall(ISGetIndices(*is, &idxs)); 1815 for (i = 0; i < nd; i++) 1816 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1817 PetscCall(ISRestoreIndices(*is, &idxs)); 1818 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1819 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1820 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1821 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1822 if (mop == MPI_LAND) { 1823 PetscCall(PetscMalloc1(nd, &nidxs)); 1824 } else { 1825 PetscCall(PetscMalloc1(n, &nidxs)); 1826 } 1827 for (i = 0, nnd = 0; i < n; i++) 1828 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 1829 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 1830 PetscCall(ISDestroy(is)); 1831 *is = nis; 1832 PetscFunctionReturn(PETSC_SUCCESS); 1833 } 1834 1835 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 1836 { 1837 PC_IS *pcis = (PC_IS *)(pc->data); 1838 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 1839 1840 PetscFunctionBegin; 1841 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 1842 if (pcbddc->ChangeOfBasisMatrix) { 1843 Vec swap; 1844 1845 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 1846 swap = pcbddc->work_change; 1847 pcbddc->work_change = r; 1848 r = swap; 1849 } 1850 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1851 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1852 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1853 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 1854 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1855 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 1856 PetscCall(VecSet(z, 0.)); 1857 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1858 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1859 if (pcbddc->ChangeOfBasisMatrix) { 1860 pcbddc->work_change = r; 1861 PetscCall(VecCopy(z, pcbddc->work_change)); 1862 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 1863 } 1864 PetscFunctionReturn(PETSC_SUCCESS); 1865 } 1866 1867 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1868 { 1869 PCBDDCBenignMatMult_ctx ctx; 1870 PetscBool apply_right, apply_left, reset_x; 1871 1872 PetscFunctionBegin; 1873 PetscCall(MatShellGetContext(A, &ctx)); 1874 if (transpose) { 1875 apply_right = ctx->apply_left; 1876 apply_left = ctx->apply_right; 1877 } else { 1878 apply_right = ctx->apply_right; 1879 apply_left = ctx->apply_left; 1880 } 1881 reset_x = PETSC_FALSE; 1882 if (apply_right) { 1883 const PetscScalar *ax; 1884 PetscInt nl, i; 1885 1886 PetscCall(VecGetLocalSize(x, &nl)); 1887 PetscCall(VecGetArrayRead(x, &ax)); 1888 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 1889 PetscCall(VecRestoreArrayRead(x, &ax)); 1890 for (i = 0; i < ctx->benign_n; i++) { 1891 PetscScalar sum, val; 1892 const PetscInt *idxs; 1893 PetscInt nz, j; 1894 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1895 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1896 sum = 0.; 1897 if (ctx->apply_p0) { 1898 val = ctx->work[idxs[nz - 1]]; 1899 for (j = 0; j < nz - 1; j++) { 1900 sum += ctx->work[idxs[j]]; 1901 ctx->work[idxs[j]] += val; 1902 } 1903 } else { 1904 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 1905 } 1906 ctx->work[idxs[nz - 1]] -= sum; 1907 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1908 } 1909 PetscCall(VecPlaceArray(x, ctx->work)); 1910 reset_x = PETSC_TRUE; 1911 } 1912 if (transpose) { 1913 PetscCall(MatMultTranspose(ctx->A, x, y)); 1914 } else { 1915 PetscCall(MatMult(ctx->A, x, y)); 1916 } 1917 if (reset_x) PetscCall(VecResetArray(x)); 1918 if (apply_left) { 1919 PetscScalar *ay; 1920 PetscInt i; 1921 1922 PetscCall(VecGetArray(y, &ay)); 1923 for (i = 0; i < ctx->benign_n; i++) { 1924 PetscScalar sum, val; 1925 const PetscInt *idxs; 1926 PetscInt nz, j; 1927 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1928 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1929 val = -ay[idxs[nz - 1]]; 1930 if (ctx->apply_p0) { 1931 sum = 0.; 1932 for (j = 0; j < nz - 1; j++) { 1933 sum += ay[idxs[j]]; 1934 ay[idxs[j]] += val; 1935 } 1936 ay[idxs[nz - 1]] += sum; 1937 } else { 1938 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 1939 ay[idxs[nz - 1]] = 0.; 1940 } 1941 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1942 } 1943 PetscCall(VecRestoreArray(y, &ay)); 1944 } 1945 PetscFunctionReturn(PETSC_SUCCESS); 1946 } 1947 1948 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1949 { 1950 PetscFunctionBegin; 1951 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 1952 PetscFunctionReturn(PETSC_SUCCESS); 1953 } 1954 1955 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1956 { 1957 PetscFunctionBegin; 1958 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 1959 PetscFunctionReturn(PETSC_SUCCESS); 1960 } 1961 1962 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1963 { 1964 PC_IS *pcis = (PC_IS *)pc->data; 1965 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1966 PCBDDCBenignMatMult_ctx ctx; 1967 1968 PetscFunctionBegin; 1969 if (!restore) { 1970 Mat A_IB, A_BI; 1971 PetscScalar *work; 1972 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1973 1974 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 1975 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 1976 PetscCall(PetscMalloc1(pcis->n, &work)); 1977 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 1978 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 1979 PetscCall(MatSetType(A_IB, MATSHELL)); 1980 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 1981 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 1982 PetscCall(PetscNew(&ctx)); 1983 PetscCall(MatShellSetContext(A_IB, ctx)); 1984 ctx->apply_left = PETSC_TRUE; 1985 ctx->apply_right = PETSC_FALSE; 1986 ctx->apply_p0 = PETSC_FALSE; 1987 ctx->benign_n = pcbddc->benign_n; 1988 if (reuse) { 1989 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1990 ctx->free = PETSC_FALSE; 1991 } else { /* TODO: could be optimized for successive solves */ 1992 ISLocalToGlobalMapping N_to_D; 1993 PetscInt i; 1994 1995 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 1996 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 1997 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 1998 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 1999 ctx->free = PETSC_TRUE; 2000 } 2001 ctx->A = pcis->A_IB; 2002 ctx->work = work; 2003 PetscCall(MatSetUp(A_IB)); 2004 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2005 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2006 pcis->A_IB = A_IB; 2007 2008 /* A_BI as A_IB^T */ 2009 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2010 pcbddc->benign_original_mat = pcis->A_BI; 2011 pcis->A_BI = A_BI; 2012 } else { 2013 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2014 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2015 PetscCall(MatDestroy(&pcis->A_IB)); 2016 pcis->A_IB = ctx->A; 2017 ctx->A = NULL; 2018 PetscCall(MatDestroy(&pcis->A_BI)); 2019 pcis->A_BI = pcbddc->benign_original_mat; 2020 pcbddc->benign_original_mat = NULL; 2021 if (ctx->free) { 2022 PetscInt i; 2023 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2024 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2025 } 2026 PetscCall(PetscFree(ctx->work)); 2027 PetscCall(PetscFree(ctx)); 2028 } 2029 PetscFunctionReturn(PETSC_SUCCESS); 2030 } 2031 2032 /* used just in bddc debug mode */ 2033 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2034 { 2035 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2036 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2037 Mat An; 2038 2039 PetscFunctionBegin; 2040 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2041 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2042 if (is1) { 2043 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2044 PetscCall(MatDestroy(&An)); 2045 } else { 2046 *B = An; 2047 } 2048 PetscFunctionReturn(PETSC_SUCCESS); 2049 } 2050 2051 /* TODO: add reuse flag */ 2052 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2053 { 2054 Mat Bt; 2055 PetscScalar *a, *bdata; 2056 const PetscInt *ii, *ij; 2057 PetscInt m, n, i, nnz, *bii, *bij; 2058 PetscBool flg_row; 2059 2060 PetscFunctionBegin; 2061 PetscCall(MatGetSize(A, &n, &m)); 2062 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2063 PetscCall(MatSeqAIJGetArray(A, &a)); 2064 nnz = n; 2065 for (i = 0; i < ii[n]; i++) { 2066 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2067 } 2068 PetscCall(PetscMalloc1(n + 1, &bii)); 2069 PetscCall(PetscMalloc1(nnz, &bij)); 2070 PetscCall(PetscMalloc1(nnz, &bdata)); 2071 nnz = 0; 2072 bii[0] = 0; 2073 for (i = 0; i < n; i++) { 2074 PetscInt j; 2075 for (j = ii[i]; j < ii[i + 1]; j++) { 2076 PetscScalar entry = a[j]; 2077 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2078 bij[nnz] = ij[j]; 2079 bdata[nnz] = entry; 2080 nnz++; 2081 } 2082 } 2083 bii[i + 1] = nnz; 2084 } 2085 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2086 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2087 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2088 { 2089 Mat_SeqAIJ *b = (Mat_SeqAIJ *)(Bt->data); 2090 b->free_a = PETSC_TRUE; 2091 b->free_ij = PETSC_TRUE; 2092 } 2093 if (*B == A) PetscCall(MatDestroy(&A)); 2094 *B = Bt; 2095 PetscFunctionReturn(PETSC_SUCCESS); 2096 } 2097 2098 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2099 { 2100 Mat B = NULL; 2101 DM dm; 2102 IS is_dummy, *cc_n; 2103 ISLocalToGlobalMapping l2gmap_dummy; 2104 PCBDDCGraph graph; 2105 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2106 PetscInt i, n; 2107 PetscInt *xadj, *adjncy; 2108 PetscBool isplex = PETSC_FALSE; 2109 2110 PetscFunctionBegin; 2111 if (ncc) *ncc = 0; 2112 if (cc) *cc = NULL; 2113 if (primalv) *primalv = NULL; 2114 PetscCall(PCBDDCGraphCreate(&graph)); 2115 PetscCall(MatGetDM(pc->pmat, &dm)); 2116 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2117 if (dm) PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex)); 2118 if (filter) isplex = PETSC_FALSE; 2119 2120 if (isplex) { /* this code has been modified from plexpartition.c */ 2121 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2122 PetscInt *adj = NULL; 2123 IS cellNumbering; 2124 const PetscInt *cellNum; 2125 PetscBool useCone, useClosure; 2126 PetscSection section; 2127 PetscSegBuffer adjBuffer; 2128 PetscSF sfPoint; 2129 2130 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2131 PetscCall(DMGetPointSF(dm, &sfPoint)); 2132 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2133 /* Build adjacency graph via a section/segbuffer */ 2134 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2135 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2136 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2137 /* Always use FVM adjacency to create partitioner graph */ 2138 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2139 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2140 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2141 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2142 for (n = 0, p = pStart; p < pEnd; p++) { 2143 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2144 if (nroots > 0) { 2145 if (cellNum[p] < 0) continue; 2146 } 2147 adjSize = PETSC_DETERMINE; 2148 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2149 for (a = 0; a < adjSize; ++a) { 2150 const PetscInt point = adj[a]; 2151 if (pStart <= point && point < pEnd) { 2152 PetscInt *PETSC_RESTRICT pBuf; 2153 PetscCall(PetscSectionAddDof(section, p, 1)); 2154 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2155 *pBuf = point; 2156 } 2157 } 2158 n++; 2159 } 2160 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2161 /* Derive CSR graph from section/segbuffer */ 2162 PetscCall(PetscSectionSetUp(section)); 2163 PetscCall(PetscSectionGetStorageSize(section, &size)); 2164 PetscCall(PetscMalloc1(n + 1, &xadj)); 2165 for (idx = 0, p = pStart; p < pEnd; p++) { 2166 if (nroots > 0) { 2167 if (cellNum[p] < 0) continue; 2168 } 2169 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2170 } 2171 xadj[n] = size; 2172 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2173 /* Clean up */ 2174 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2175 PetscCall(PetscSectionDestroy(§ion)); 2176 PetscCall(PetscFree(adj)); 2177 graph->xadj = xadj; 2178 graph->adjncy = adjncy; 2179 } else { 2180 Mat A; 2181 PetscBool isseqaij, flg_row; 2182 2183 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2184 if (!A->rmap->N || !A->cmap->N) { 2185 PetscCall(PCBDDCGraphDestroy(&graph)); 2186 PetscFunctionReturn(PETSC_SUCCESS); 2187 } 2188 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2189 if (!isseqaij && filter) { 2190 PetscBool isseqdense; 2191 2192 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2193 if (!isseqdense) { 2194 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2195 } else { /* TODO: rectangular case and LDA */ 2196 PetscScalar *array; 2197 PetscReal chop = 1.e-6; 2198 2199 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2200 PetscCall(MatDenseGetArray(B, &array)); 2201 PetscCall(MatGetSize(B, &n, NULL)); 2202 for (i = 0; i < n; i++) { 2203 PetscInt j; 2204 for (j = i + 1; j < n; j++) { 2205 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2206 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2207 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2208 } 2209 } 2210 PetscCall(MatDenseRestoreArray(B, &array)); 2211 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2212 } 2213 } else { 2214 PetscCall(PetscObjectReference((PetscObject)A)); 2215 B = A; 2216 } 2217 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2218 2219 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2220 if (filter) { 2221 PetscScalar *data; 2222 PetscInt j, cum; 2223 2224 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2225 PetscCall(MatSeqAIJGetArray(B, &data)); 2226 cum = 0; 2227 for (i = 0; i < n; i++) { 2228 PetscInt t; 2229 2230 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2231 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2232 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2233 } 2234 t = xadj_filtered[i]; 2235 xadj_filtered[i] = cum; 2236 cum += t; 2237 } 2238 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2239 graph->xadj = xadj_filtered; 2240 graph->adjncy = adjncy_filtered; 2241 } else { 2242 graph->xadj = xadj; 2243 graph->adjncy = adjncy; 2244 } 2245 } 2246 /* compute local connected components using PCBDDCGraph */ 2247 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2248 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2249 PetscCall(ISDestroy(&is_dummy)); 2250 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT)); 2251 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2252 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2253 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2254 2255 /* partial clean up */ 2256 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2257 if (B) { 2258 PetscBool flg_row; 2259 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2260 PetscCall(MatDestroy(&B)); 2261 } 2262 if (isplex) { 2263 PetscCall(PetscFree(xadj)); 2264 PetscCall(PetscFree(adjncy)); 2265 } 2266 2267 /* get back data */ 2268 if (isplex) { 2269 if (ncc) *ncc = graph->ncc; 2270 if (cc || primalv) { 2271 Mat A; 2272 PetscBT btv, btvt; 2273 PetscSection subSection; 2274 PetscInt *ids, cum, cump, *cids, *pids; 2275 2276 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2277 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2278 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2279 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2280 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2281 2282 cids[0] = 0; 2283 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2284 PetscInt j; 2285 2286 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2287 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2288 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2289 2290 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2291 for (k = 0; k < 2 * size; k += 2) { 2292 PetscInt s, pp, p = closure[k], off, dof, cdof; 2293 2294 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2295 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2296 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2297 for (s = 0; s < dof - cdof; s++) { 2298 if (PetscBTLookupSet(btvt, off + s)) continue; 2299 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2300 else pids[cump++] = off + s; /* cross-vertex */ 2301 } 2302 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2303 if (pp != p) { 2304 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2305 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2306 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2307 for (s = 0; s < dof - cdof; s++) { 2308 if (PetscBTLookupSet(btvt, off + s)) continue; 2309 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2310 else pids[cump++] = off + s; /* cross-vertex */ 2311 } 2312 } 2313 } 2314 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2315 } 2316 cids[i + 1] = cum; 2317 /* mark dofs as already assigned */ 2318 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2319 } 2320 if (cc) { 2321 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2322 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2323 *cc = cc_n; 2324 } 2325 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2326 PetscCall(PetscFree3(ids, cids, pids)); 2327 PetscCall(PetscBTDestroy(&btv)); 2328 PetscCall(PetscBTDestroy(&btvt)); 2329 } 2330 } else { 2331 if (ncc) *ncc = graph->ncc; 2332 if (cc) { 2333 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2334 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2335 *cc = cc_n; 2336 } 2337 } 2338 /* clean up graph */ 2339 graph->xadj = NULL; 2340 graph->adjncy = NULL; 2341 PetscCall(PCBDDCGraphDestroy(&graph)); 2342 PetscFunctionReturn(PETSC_SUCCESS); 2343 } 2344 2345 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2346 { 2347 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2348 PC_IS *pcis = (PC_IS *)(pc->data); 2349 IS dirIS = NULL; 2350 PetscInt i; 2351 2352 PetscFunctionBegin; 2353 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2354 if (zerodiag) { 2355 Mat A; 2356 Vec vec3_N; 2357 PetscScalar *vals; 2358 const PetscInt *idxs; 2359 PetscInt nz, *count; 2360 2361 /* p0 */ 2362 PetscCall(VecSet(pcis->vec1_N, 0.)); 2363 PetscCall(PetscMalloc1(pcis->n, &vals)); 2364 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2365 PetscCall(ISGetIndices(zerodiag, &idxs)); 2366 for (i = 0; i < nz; i++) vals[i] = 1.; 2367 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2368 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2369 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2370 /* v_I */ 2371 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2372 for (i = 0; i < nz; i++) vals[i] = 0.; 2373 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2374 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2375 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2376 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2377 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2378 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2379 if (dirIS) { 2380 PetscInt n; 2381 2382 PetscCall(ISGetLocalSize(dirIS, &n)); 2383 PetscCall(ISGetIndices(dirIS, &idxs)); 2384 for (i = 0; i < n; i++) vals[i] = 0.; 2385 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2386 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2387 } 2388 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2389 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2390 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2391 PetscCall(VecSet(vec3_N, 0.)); 2392 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2393 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2394 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2395 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2396 PetscCall(PetscFree(vals)); 2397 PetscCall(VecDestroy(&vec3_N)); 2398 2399 /* there should not be any pressure dofs lying on the interface */ 2400 PetscCall(PetscCalloc1(pcis->n, &count)); 2401 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2402 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2403 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2404 PetscCall(ISGetIndices(zerodiag, &idxs)); 2405 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2406 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2407 PetscCall(PetscFree(count)); 2408 } 2409 PetscCall(ISDestroy(&dirIS)); 2410 2411 /* check PCBDDCBenignGetOrSetP0 */ 2412 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2413 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2414 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2415 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2416 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2417 for (i = 0; i < pcbddc->benign_n; i++) { 2418 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2419 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2420 } 2421 PetscFunctionReturn(PETSC_SUCCESS); 2422 } 2423 2424 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2425 { 2426 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2427 Mat_IS *matis = (Mat_IS *)(pc->pmat->data); 2428 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2429 PetscInt nz, n, benign_n, bsp = 1; 2430 PetscInt *interior_dofs, n_interior_dofs, nneu; 2431 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2432 2433 PetscFunctionBegin; 2434 if (reuse) goto project_b0; 2435 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2436 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2437 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2438 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2439 has_null_pressures = PETSC_TRUE; 2440 have_null = PETSC_TRUE; 2441 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2442 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2443 Checks if all the pressure dofs in each subdomain have a zero diagonal 2444 If not, a change of basis on pressures is not needed 2445 since the local Schur complements are already SPD 2446 */ 2447 if (pcbddc->n_ISForDofsLocal) { 2448 IS iP = NULL; 2449 PetscInt p, *pp; 2450 PetscBool flg; 2451 2452 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2453 n = pcbddc->n_ISForDofsLocal; 2454 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2455 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2456 PetscOptionsEnd(); 2457 if (!flg) { 2458 n = 1; 2459 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2460 } 2461 2462 bsp = 0; 2463 for (p = 0; p < n; p++) { 2464 PetscInt bs; 2465 2466 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2467 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2468 bsp += bs; 2469 } 2470 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2471 bsp = 0; 2472 for (p = 0; p < n; p++) { 2473 const PetscInt *idxs; 2474 PetscInt b, bs, npl, *bidxs; 2475 2476 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2477 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2478 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2479 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2480 for (b = 0; b < bs; b++) { 2481 PetscInt i; 2482 2483 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2484 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2485 bsp++; 2486 } 2487 PetscCall(PetscFree(bidxs)); 2488 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2489 } 2490 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2491 2492 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2493 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2494 if (iP) { 2495 IS newpressures; 2496 2497 PetscCall(ISDifference(pressures, iP, &newpressures)); 2498 PetscCall(ISDestroy(&pressures)); 2499 pressures = newpressures; 2500 } 2501 PetscCall(ISSorted(pressures, &sorted)); 2502 if (!sorted) PetscCall(ISSort(pressures)); 2503 PetscCall(PetscFree(pp)); 2504 } 2505 2506 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2507 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2508 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2509 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2510 PetscCall(ISSorted(zerodiag, &sorted)); 2511 if (!sorted) PetscCall(ISSort(zerodiag)); 2512 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2513 zerodiag_save = zerodiag; 2514 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2515 if (!nz) { 2516 if (n) have_null = PETSC_FALSE; 2517 has_null_pressures = PETSC_FALSE; 2518 PetscCall(ISDestroy(&zerodiag)); 2519 } 2520 recompute_zerodiag = PETSC_FALSE; 2521 2522 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2523 zerodiag_subs = NULL; 2524 benign_n = 0; 2525 n_interior_dofs = 0; 2526 interior_dofs = NULL; 2527 nneu = 0; 2528 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2529 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2530 if (checkb) { /* need to compute interior nodes */ 2531 PetscInt n, i, j; 2532 PetscInt n_neigh, *neigh, *n_shared, **shared; 2533 PetscInt *iwork; 2534 2535 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping, &n)); 2536 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2537 PetscCall(PetscCalloc1(n, &iwork)); 2538 PetscCall(PetscMalloc1(n, &interior_dofs)); 2539 for (i = 1; i < n_neigh; i++) 2540 for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1; 2541 for (i = 0; i < n; i++) 2542 if (!iwork[i]) interior_dofs[n_interior_dofs++] = i; 2543 PetscCall(PetscFree(iwork)); 2544 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2545 } 2546 if (has_null_pressures) { 2547 IS *subs; 2548 PetscInt nsubs, i, j, nl; 2549 const PetscInt *idxs; 2550 PetscScalar *array; 2551 Vec *work; 2552 2553 subs = pcbddc->local_subs; 2554 nsubs = pcbddc->n_local_subs; 2555 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2556 if (checkb) { 2557 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2558 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2559 PetscCall(ISGetIndices(zerodiag, &idxs)); 2560 /* work[0] = 1_p */ 2561 PetscCall(VecSet(work[0], 0.)); 2562 PetscCall(VecGetArray(work[0], &array)); 2563 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2564 PetscCall(VecRestoreArray(work[0], &array)); 2565 /* work[0] = 1_v */ 2566 PetscCall(VecSet(work[1], 1.)); 2567 PetscCall(VecGetArray(work[1], &array)); 2568 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2569 PetscCall(VecRestoreArray(work[1], &array)); 2570 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2571 } 2572 2573 if (nsubs > 1 || bsp > 1) { 2574 IS *is; 2575 PetscInt b, totb; 2576 2577 totb = bsp; 2578 is = bsp > 1 ? bzerodiag : &zerodiag; 2579 nsubs = PetscMax(nsubs, 1); 2580 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2581 for (b = 0; b < totb; b++) { 2582 for (i = 0; i < nsubs; i++) { 2583 ISLocalToGlobalMapping l2g; 2584 IS t_zerodiag_subs; 2585 PetscInt nl; 2586 2587 if (subs) { 2588 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2589 } else { 2590 IS tis; 2591 2592 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2593 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2594 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2595 PetscCall(ISDestroy(&tis)); 2596 } 2597 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2598 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2599 if (nl) { 2600 PetscBool valid = PETSC_TRUE; 2601 2602 if (checkb) { 2603 PetscCall(VecSet(matis->x, 0)); 2604 PetscCall(ISGetLocalSize(subs[i], &nl)); 2605 PetscCall(ISGetIndices(subs[i], &idxs)); 2606 PetscCall(VecGetArray(matis->x, &array)); 2607 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2608 PetscCall(VecRestoreArray(matis->x, &array)); 2609 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2610 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2611 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2612 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2613 PetscCall(VecGetArray(matis->y, &array)); 2614 for (j = 0; j < n_interior_dofs; j++) { 2615 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2616 valid = PETSC_FALSE; 2617 break; 2618 } 2619 } 2620 PetscCall(VecRestoreArray(matis->y, &array)); 2621 } 2622 if (valid && nneu) { 2623 const PetscInt *idxs; 2624 PetscInt nzb; 2625 2626 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2627 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2628 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2629 if (nzb) valid = PETSC_FALSE; 2630 } 2631 if (valid && pressures) { 2632 IS t_pressure_subs, tmp; 2633 PetscInt i1, i2; 2634 2635 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2636 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2637 PetscCall(ISGetLocalSize(tmp, &i1)); 2638 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2639 if (i2 != i1) valid = PETSC_FALSE; 2640 PetscCall(ISDestroy(&t_pressure_subs)); 2641 PetscCall(ISDestroy(&tmp)); 2642 } 2643 if (valid) { 2644 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2645 benign_n++; 2646 } else recompute_zerodiag = PETSC_TRUE; 2647 } 2648 PetscCall(ISDestroy(&t_zerodiag_subs)); 2649 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2650 } 2651 } 2652 } else { /* there's just one subdomain (or zero if they have not been detected */ 2653 PetscBool valid = PETSC_TRUE; 2654 2655 if (nneu) valid = PETSC_FALSE; 2656 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2657 if (valid && checkb) { 2658 PetscCall(MatMult(matis->A, work[0], matis->x)); 2659 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2660 PetscCall(VecGetArray(matis->x, &array)); 2661 for (j = 0; j < n_interior_dofs; j++) { 2662 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2663 valid = PETSC_FALSE; 2664 break; 2665 } 2666 } 2667 PetscCall(VecRestoreArray(matis->x, &array)); 2668 } 2669 if (valid) { 2670 benign_n = 1; 2671 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2672 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2673 zerodiag_subs[0] = zerodiag; 2674 } 2675 } 2676 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2677 } 2678 PetscCall(PetscFree(interior_dofs)); 2679 2680 if (!benign_n) { 2681 PetscInt n; 2682 2683 PetscCall(ISDestroy(&zerodiag)); 2684 recompute_zerodiag = PETSC_FALSE; 2685 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2686 if (n) have_null = PETSC_FALSE; 2687 } 2688 2689 /* final check for null pressures */ 2690 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2691 2692 if (recompute_zerodiag) { 2693 PetscCall(ISDestroy(&zerodiag)); 2694 if (benign_n == 1) { 2695 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2696 zerodiag = zerodiag_subs[0]; 2697 } else { 2698 PetscInt i, nzn, *new_idxs; 2699 2700 nzn = 0; 2701 for (i = 0; i < benign_n; i++) { 2702 PetscInt ns; 2703 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2704 nzn += ns; 2705 } 2706 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2707 nzn = 0; 2708 for (i = 0; i < benign_n; i++) { 2709 PetscInt ns, *idxs; 2710 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2711 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2712 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2713 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2714 nzn += ns; 2715 } 2716 PetscCall(PetscSortInt(nzn, new_idxs)); 2717 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2718 } 2719 have_null = PETSC_FALSE; 2720 } 2721 2722 /* determines if the coarse solver will be singular or not */ 2723 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2724 2725 /* Prepare matrix to compute no-net-flux */ 2726 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2727 Mat A, loc_divudotp; 2728 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2729 IS row, col, isused = NULL; 2730 PetscInt M, N, n, st, n_isused; 2731 2732 if (pressures) { 2733 isused = pressures; 2734 } else { 2735 isused = zerodiag_save; 2736 } 2737 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2738 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2739 PetscCall(MatGetLocalSize(A, &n, NULL)); 2740 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2741 n_isused = 0; 2742 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 2743 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2744 st = st - n_isused; 2745 if (n) { 2746 const PetscInt *gidxs; 2747 2748 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2749 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2750 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2751 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2752 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2753 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2754 } else { 2755 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 2756 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2757 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 2758 } 2759 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 2760 PetscCall(ISGetSize(row, &M)); 2761 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 2762 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 2763 PetscCall(ISDestroy(&row)); 2764 PetscCall(ISDestroy(&col)); 2765 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 2766 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 2767 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 2768 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 2769 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2770 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2771 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 2772 PetscCall(MatDestroy(&loc_divudotp)); 2773 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2774 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2775 } 2776 PetscCall(ISDestroy(&zerodiag_save)); 2777 PetscCall(ISDestroy(&pressures)); 2778 if (bzerodiag) { 2779 PetscInt i; 2780 2781 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 2782 PetscCall(PetscFree(bzerodiag)); 2783 } 2784 pcbddc->benign_n = benign_n; 2785 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2786 2787 /* determines if the problem has subdomains with 0 pressure block */ 2788 have_null = (PetscBool)(!!pcbddc->benign_n); 2789 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 2790 2791 project_b0: 2792 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2793 /* change of basis and p0 dofs */ 2794 if (pcbddc->benign_n) { 2795 PetscInt i, s, *nnz; 2796 2797 /* local change of basis for pressures */ 2798 PetscCall(MatDestroy(&pcbddc->benign_change)); 2799 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 2800 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 2801 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 2802 PetscCall(PetscMalloc1(n, &nnz)); 2803 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 2804 for (i = 0; i < pcbddc->benign_n; i++) { 2805 const PetscInt *idxs; 2806 PetscInt nzs, j; 2807 2808 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 2809 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2810 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 2811 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 2812 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2813 } 2814 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 2815 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2816 PetscCall(PetscFree(nnz)); 2817 /* set identity by default */ 2818 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 2819 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 2820 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 2821 /* set change on pressures */ 2822 for (s = 0; s < pcbddc->benign_n; s++) { 2823 PetscScalar *array; 2824 const PetscInt *idxs; 2825 PetscInt nzs; 2826 2827 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 2828 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2829 for (i = 0; i < nzs - 1; i++) { 2830 PetscScalar vals[2]; 2831 PetscInt cols[2]; 2832 2833 cols[0] = idxs[i]; 2834 cols[1] = idxs[nzs - 1]; 2835 vals[0] = 1.; 2836 vals[1] = 1.; 2837 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 2838 } 2839 PetscCall(PetscMalloc1(nzs, &array)); 2840 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 2841 array[nzs - 1] = 1.; 2842 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 2843 /* store local idxs for p0 */ 2844 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 2845 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2846 PetscCall(PetscFree(array)); 2847 } 2848 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2849 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2850 2851 /* project if needed */ 2852 if (pcbddc->benign_change_explicit) { 2853 Mat M; 2854 2855 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 2856 PetscCall(MatDestroy(&pcbddc->local_mat)); 2857 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 2858 PetscCall(MatDestroy(&M)); 2859 } 2860 /* store global idxs for p0 */ 2861 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 2862 } 2863 *zerodiaglocal = zerodiag; 2864 PetscFunctionReturn(PETSC_SUCCESS); 2865 } 2866 2867 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2868 { 2869 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2870 PetscScalar *array; 2871 2872 PetscFunctionBegin; 2873 if (!pcbddc->benign_sf) { 2874 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 2875 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 2876 } 2877 if (get) { 2878 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 2879 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2880 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2881 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 2882 } else { 2883 PetscCall(VecGetArray(v, &array)); 2884 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2885 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2886 PetscCall(VecRestoreArray(v, &array)); 2887 } 2888 PetscFunctionReturn(PETSC_SUCCESS); 2889 } 2890 2891 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2892 { 2893 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2894 2895 PetscFunctionBegin; 2896 /* TODO: add error checking 2897 - avoid nested pop (or push) calls. 2898 - cannot push before pop. 2899 - cannot call this if pcbddc->local_mat is NULL 2900 */ 2901 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 2902 if (pop) { 2903 if (pcbddc->benign_change_explicit) { 2904 IS is_p0; 2905 MatReuse reuse; 2906 2907 /* extract B_0 */ 2908 reuse = MAT_INITIAL_MATRIX; 2909 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 2910 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 2911 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 2912 /* remove rows and cols from local problem */ 2913 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 2914 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 2915 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 2916 PetscCall(ISDestroy(&is_p0)); 2917 } else { 2918 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2919 PetscScalar *vals; 2920 PetscInt i, n, *idxs_ins; 2921 2922 PetscCall(VecGetLocalSize(matis->y, &n)); 2923 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 2924 if (!pcbddc->benign_B0) { 2925 PetscInt *nnz; 2926 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 2927 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 2928 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 2929 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 2930 for (i = 0; i < pcbddc->benign_n; i++) { 2931 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 2932 nnz[i] = n - nnz[i]; 2933 } 2934 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 2935 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2936 PetscCall(PetscFree(nnz)); 2937 } 2938 2939 for (i = 0; i < pcbddc->benign_n; i++) { 2940 PetscScalar *array; 2941 PetscInt *idxs, j, nz, cum; 2942 2943 PetscCall(VecSet(matis->x, 0.)); 2944 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 2945 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 2946 for (j = 0; j < nz; j++) vals[j] = 1.; 2947 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 2948 PetscCall(VecAssemblyBegin(matis->x)); 2949 PetscCall(VecAssemblyEnd(matis->x)); 2950 PetscCall(VecSet(matis->y, 0.)); 2951 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2952 PetscCall(VecGetArray(matis->y, &array)); 2953 cum = 0; 2954 for (j = 0; j < n; j++) { 2955 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2956 vals[cum] = array[j]; 2957 idxs_ins[cum] = j; 2958 cum++; 2959 } 2960 } 2961 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 2962 PetscCall(VecRestoreArray(matis->y, &array)); 2963 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 2964 } 2965 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 2966 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 2967 PetscCall(PetscFree2(idxs_ins, vals)); 2968 } 2969 } else { /* push */ 2970 2971 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 2972 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 2973 PetscScalar *B0_vals; 2974 PetscInt *B0_cols, B0_ncol; 2975 2976 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 2977 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 2978 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 2979 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 2980 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 2981 } 2982 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 2983 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 2984 } 2985 PetscFunctionReturn(PETSC_SUCCESS); 2986 } 2987 2988 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2989 { 2990 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2991 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2992 PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork; 2993 PetscBLASInt *B_iwork, *B_ifail; 2994 PetscScalar *work, lwork; 2995 PetscScalar *St, *S, *eigv; 2996 PetscScalar *Sarray, *Starray; 2997 PetscReal *eigs, thresh, lthresh, uthresh; 2998 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 2999 PetscBool allocated_S_St, upart; 3000 #if defined(PETSC_USE_COMPLEX) 3001 PetscReal *rwork; 3002 #endif 3003 3004 PetscFunctionBegin; 3005 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3006 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3007 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3008 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3009 sub_schurs->is_posdef); 3010 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3011 3012 if (pcbddc->dbg_flag) { 3013 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3014 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3015 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3016 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3017 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3018 } 3019 3020 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3021 3022 /* max size of subsets */ 3023 mss = 0; 3024 for (i = 0; i < sub_schurs->n_subs; i++) { 3025 PetscInt subset_size; 3026 3027 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3028 mss = PetscMax(mss, subset_size); 3029 } 3030 3031 /* min/max and threshold */ 3032 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3033 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3034 nmax = PetscMax(nmin, nmax); 3035 allocated_S_St = PETSC_FALSE; 3036 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3037 allocated_S_St = PETSC_TRUE; 3038 } 3039 3040 /* allocate lapack workspace */ 3041 cum = cum2 = 0; 3042 maxneigs = 0; 3043 for (i = 0; i < sub_schurs->n_subs; i++) { 3044 PetscInt n, subset_size; 3045 3046 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3047 n = PetscMin(subset_size, nmax); 3048 cum += subset_size; 3049 cum2 += subset_size * n; 3050 maxneigs = PetscMax(maxneigs, n); 3051 } 3052 lwork = 0; 3053 if (mss) { 3054 PetscScalar sdummy = 0.; 3055 PetscBLASInt B_itype = 1; 3056 PetscBLASInt B_N = mss, idummy = 0; 3057 PetscReal rdummy = 0., zero = 0.0; 3058 PetscReal eps = 0.0; /* dlamch? */ 3059 3060 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3061 B_lwork = -1; 3062 /* some implementations may complain about NULL pointers, even if we are querying */ 3063 S = &sdummy; 3064 St = &sdummy; 3065 eigs = &rdummy; 3066 eigv = &sdummy; 3067 B_iwork = &idummy; 3068 B_ifail = &idummy; 3069 #if defined(PETSC_USE_COMPLEX) 3070 rwork = &rdummy; 3071 #endif 3072 thresh = 1.0; 3073 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3074 #if defined(PETSC_USE_COMPLEX) 3075 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3076 #else 3077 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3078 #endif 3079 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr); 3080 PetscCall(PetscFPTrapPop()); 3081 } 3082 3083 nv = 0; 3084 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3085 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3086 } 3087 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3088 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3089 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3090 #if defined(PETSC_USE_COMPLEX) 3091 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3092 #endif 3093 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3094 &pcbddc->adaptive_constraints_data)); 3095 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3096 3097 maxneigs = 0; 3098 cum = cumarray = 0; 3099 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3100 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3101 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3102 const PetscInt *idxs; 3103 3104 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3105 for (cum = 0; cum < nv; cum++) { 3106 pcbddc->adaptive_constraints_n[cum] = 1; 3107 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3108 pcbddc->adaptive_constraints_data[cum] = 1.0; 3109 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3110 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3111 } 3112 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3113 } 3114 3115 if (mss) { /* multilevel */ 3116 if (sub_schurs->gdsw) { 3117 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3118 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3119 } else { 3120 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3121 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3122 } 3123 } 3124 3125 lthresh = pcbddc->adaptive_threshold[0]; 3126 uthresh = pcbddc->adaptive_threshold[1]; 3127 upart = pcbddc->use_deluxe_scaling; 3128 for (i = 0; i < sub_schurs->n_subs; i++) { 3129 const PetscInt *idxs; 3130 PetscReal upper, lower; 3131 PetscInt j, subset_size, eigs_start = 0; 3132 PetscBLASInt B_N; 3133 PetscBool same_data = PETSC_FALSE; 3134 PetscBool scal = PETSC_FALSE; 3135 3136 if (upart) { 3137 upper = PETSC_MAX_REAL; 3138 lower = uthresh; 3139 } else { 3140 if (sub_schurs->gdsw) { 3141 upper = uthresh; 3142 lower = PETSC_MIN_REAL; 3143 } else { 3144 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3145 upper = 1. / uthresh; 3146 lower = 0.; 3147 } 3148 } 3149 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3150 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3151 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3152 /* this is experimental: we assume the dofs have been properly grouped to have 3153 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3154 if (!sub_schurs->is_posdef) { 3155 Mat T; 3156 3157 for (j = 0; j < subset_size; j++) { 3158 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3159 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3160 PetscCall(MatScale(T, -1.0)); 3161 PetscCall(MatDestroy(&T)); 3162 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3163 PetscCall(MatScale(T, -1.0)); 3164 PetscCall(MatDestroy(&T)); 3165 if (sub_schurs->change_primal_sub) { 3166 PetscInt nz, k; 3167 const PetscInt *idxs; 3168 3169 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3170 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3171 for (k = 0; k < nz; k++) { 3172 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3173 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3174 } 3175 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3176 } 3177 scal = PETSC_TRUE; 3178 break; 3179 } 3180 } 3181 } 3182 3183 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3184 if (sub_schurs->is_symmetric) { 3185 PetscInt j, k; 3186 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3187 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3188 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3189 } 3190 for (j = 0; j < subset_size; j++) { 3191 for (k = j; k < subset_size; k++) { 3192 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3193 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3194 } 3195 } 3196 } else { 3197 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3198 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3199 } 3200 } else { 3201 S = Sarray + cumarray; 3202 St = Starray + cumarray; 3203 } 3204 /* see if we can save some work */ 3205 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3206 3207 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3208 B_neigs = 0; 3209 } else { 3210 PetscBLASInt B_itype = 1; 3211 PetscBLASInt B_IL, B_IU; 3212 PetscReal eps = -1.0; /* dlamch? */ 3213 PetscInt nmin_s; 3214 PetscBool compute_range; 3215 3216 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3217 B_neigs = 0; 3218 compute_range = (PetscBool)!same_data; 3219 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3220 3221 if (pcbddc->dbg_flag) { 3222 PetscInt nc = 0; 3223 3224 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3225 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, 3226 sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc)); 3227 } 3228 3229 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3230 if (compute_range) { 3231 /* ask for eigenvalues larger than thresh */ 3232 if (sub_schurs->is_posdef) { 3233 #if defined(PETSC_USE_COMPLEX) 3234 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3235 #else 3236 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3237 #endif 3238 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3239 } else { /* no theory so far, but it works nicely */ 3240 PetscInt recipe = 0, recipe_m = 1; 3241 PetscReal bb[2]; 3242 3243 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3244 switch (recipe) { 3245 case 0: 3246 if (scal) { 3247 bb[0] = PETSC_MIN_REAL; 3248 bb[1] = lthresh; 3249 } else { 3250 bb[0] = uthresh; 3251 bb[1] = PETSC_MAX_REAL; 3252 } 3253 #if defined(PETSC_USE_COMPLEX) 3254 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3255 #else 3256 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3257 #endif 3258 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3259 break; 3260 case 1: 3261 bb[0] = PETSC_MIN_REAL; 3262 bb[1] = lthresh * lthresh; 3263 #if defined(PETSC_USE_COMPLEX) 3264 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3265 #else 3266 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3267 #endif 3268 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3269 if (!scal) { 3270 PetscBLASInt B_neigs2 = 0; 3271 3272 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3273 bb[1] = PETSC_MAX_REAL; 3274 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3275 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3276 #if defined(PETSC_USE_COMPLEX) 3277 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3278 #else 3279 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3280 #endif 3281 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3282 B_neigs += B_neigs2; 3283 } 3284 break; 3285 case 2: 3286 if (scal) { 3287 bb[0] = PETSC_MIN_REAL; 3288 bb[1] = 0; 3289 #if defined(PETSC_USE_COMPLEX) 3290 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3291 #else 3292 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3293 #endif 3294 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3295 } else { 3296 PetscBLASInt B_neigs2 = 0; 3297 PetscBool do_copy = PETSC_FALSE; 3298 3299 lthresh = PetscMax(lthresh, 0.0); 3300 if (lthresh > 0.0) { 3301 bb[0] = PETSC_MIN_REAL; 3302 bb[1] = lthresh * lthresh; 3303 3304 do_copy = PETSC_TRUE; 3305 #if defined(PETSC_USE_COMPLEX) 3306 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3307 #else 3308 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3309 #endif 3310 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3311 } 3312 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3313 bb[1] = PETSC_MAX_REAL; 3314 if (do_copy) { 3315 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3316 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3317 } 3318 #if defined(PETSC_USE_COMPLEX) 3319 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3320 #else 3321 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3322 #endif 3323 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3324 B_neigs += B_neigs2; 3325 } 3326 break; 3327 case 3: 3328 if (scal) { 3329 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3330 } else { 3331 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3332 } 3333 if (!scal) { 3334 bb[0] = uthresh; 3335 bb[1] = PETSC_MAX_REAL; 3336 #if defined(PETSC_USE_COMPLEX) 3337 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3338 #else 3339 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3340 #endif 3341 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3342 } 3343 if (recipe_m > 0 && B_N - B_neigs > 0) { 3344 PetscBLASInt B_neigs2 = 0; 3345 3346 B_IL = 1; 3347 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3348 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3349 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3350 #if defined(PETSC_USE_COMPLEX) 3351 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3352 #else 3353 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3354 #endif 3355 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3356 B_neigs += B_neigs2; 3357 } 3358 break; 3359 case 4: 3360 bb[0] = PETSC_MIN_REAL; 3361 bb[1] = lthresh; 3362 #if defined(PETSC_USE_COMPLEX) 3363 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3364 #else 3365 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3366 #endif 3367 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3368 { 3369 PetscBLASInt B_neigs2 = 0; 3370 3371 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3372 bb[1] = PETSC_MAX_REAL; 3373 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3374 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3375 #if defined(PETSC_USE_COMPLEX) 3376 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3377 #else 3378 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3379 #endif 3380 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3381 B_neigs += B_neigs2; 3382 } 3383 break; 3384 case 5: /* same as before: first compute all eigenvalues, then filter */ 3385 #if defined(PETSC_USE_COMPLEX) 3386 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3387 #else 3388 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3389 #endif 3390 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3391 { 3392 PetscInt e, k, ne; 3393 for (e = 0, ne = 0; e < B_neigs; e++) { 3394 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3395 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3396 eigs[ne] = eigs[e]; 3397 ne++; 3398 } 3399 } 3400 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3401 B_neigs = ne; 3402 } 3403 break; 3404 default: 3405 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3406 } 3407 } 3408 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3409 B_IU = PetscMax(1, PetscMin(B_N, nmax)); 3410 B_IL = 1; 3411 #if defined(PETSC_USE_COMPLEX) 3412 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3413 #else 3414 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3415 #endif 3416 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3417 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3418 PetscInt k; 3419 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3420 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3421 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3422 nmin = nmax; 3423 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3424 for (k = 0; k < nmax; k++) { 3425 eigs[k] = 1. / PETSC_SMALL; 3426 eigv[k * (subset_size + 1)] = 1.0; 3427 } 3428 } 3429 PetscCall(PetscFPTrapPop()); 3430 if (B_ierr) { 3431 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3432 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3433 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3434 } 3435 3436 if (B_neigs > nmax) { 3437 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3438 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3439 B_neigs = nmax; 3440 } 3441 3442 nmin_s = PetscMin(nmin, B_N); 3443 if (B_neigs < nmin_s) { 3444 PetscBLASInt B_neigs2 = 0; 3445 3446 if (upart) { 3447 if (scal) { 3448 B_IU = nmin_s; 3449 B_IL = B_neigs + 1; 3450 } else { 3451 B_IL = B_N - nmin_s + 1; 3452 B_IU = B_N - B_neigs; 3453 } 3454 } else { 3455 B_IL = B_neigs + 1; 3456 B_IU = nmin_s; 3457 } 3458 if (pcbddc->dbg_flag) { 3459 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3460 } 3461 if (sub_schurs->is_symmetric) { 3462 PetscInt j, k; 3463 for (j = 0; j < subset_size; j++) { 3464 for (k = j; k < subset_size; k++) { 3465 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3466 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3467 } 3468 } 3469 } else { 3470 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3471 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3472 } 3473 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3474 #if defined(PETSC_USE_COMPLEX) 3475 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3476 #else 3477 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3478 #endif 3479 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3480 PetscCall(PetscFPTrapPop()); 3481 B_neigs += B_neigs2; 3482 } 3483 if (B_ierr) { 3484 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3485 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3486 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3487 } 3488 if (pcbddc->dbg_flag) { 3489 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3490 for (j = 0; j < B_neigs; j++) { 3491 if (!sub_schurs->gdsw) { 3492 if (eigs[j] == 0.0) { 3493 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3494 } else { 3495 if (upart) { 3496 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3497 } else { 3498 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start]))); 3499 } 3500 } 3501 } else { 3502 double pg = (double)eigs[j + eigs_start]; 3503 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3504 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3505 } 3506 } 3507 } 3508 } 3509 /* change the basis back to the original one */ 3510 if (sub_schurs->change) { 3511 Mat change, phi, phit; 3512 3513 if (pcbddc->dbg_flag > 2) { 3514 PetscInt ii; 3515 for (ii = 0; ii < B_neigs; ii++) { 3516 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3517 for (j = 0; j < B_N; j++) { 3518 #if defined(PETSC_USE_COMPLEX) 3519 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3520 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3521 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3522 #else 3523 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3524 #endif 3525 } 3526 } 3527 } 3528 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3529 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3530 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi)); 3531 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3532 PetscCall(MatDestroy(&phit)); 3533 PetscCall(MatDestroy(&phi)); 3534 } 3535 maxneigs = PetscMax(B_neigs, maxneigs); 3536 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3537 if (B_neigs) { 3538 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3539 3540 if (pcbddc->dbg_flag > 1) { 3541 PetscInt ii; 3542 for (ii = 0; ii < B_neigs; ii++) { 3543 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3544 for (j = 0; j < B_N; j++) { 3545 #if defined(PETSC_USE_COMPLEX) 3546 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3547 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3548 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3549 #else 3550 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3551 #endif 3552 } 3553 } 3554 } 3555 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3556 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3557 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3558 cum++; 3559 } 3560 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3561 /* shift for next computation */ 3562 cumarray += subset_size * subset_size; 3563 } 3564 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3565 3566 if (mss) { 3567 if (sub_schurs->gdsw) { 3568 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3569 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3570 } else { 3571 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3572 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3573 /* destroy matrices (junk) */ 3574 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3575 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3576 } 3577 } 3578 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3579 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3580 #if defined(PETSC_USE_COMPLEX) 3581 PetscCall(PetscFree(rwork)); 3582 #endif 3583 if (pcbddc->dbg_flag) { 3584 PetscInt maxneigs_r; 3585 PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3586 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3587 } 3588 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3589 PetscFunctionReturn(PETSC_SUCCESS); 3590 } 3591 3592 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3593 { 3594 PetscScalar *coarse_submat_vals; 3595 3596 PetscFunctionBegin; 3597 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3598 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3599 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3600 3601 /* Setup local neumann solver ksp_R */ 3602 /* PCBDDCSetUpLocalScatters should be called first! */ 3603 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3604 3605 /* 3606 Setup local correction and local part of coarse basis. 3607 Gives back the dense local part of the coarse matrix in column major ordering 3608 */ 3609 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat_vals)); 3610 3611 /* Compute total number of coarse nodes and setup coarse solver */ 3612 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals)); 3613 3614 /* free */ 3615 PetscCall(PetscFree(coarse_submat_vals)); 3616 PetscFunctionReturn(PETSC_SUCCESS); 3617 } 3618 3619 PetscErrorCode PCBDDCResetCustomization(PC pc) 3620 { 3621 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3622 3623 PetscFunctionBegin; 3624 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3625 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3626 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3627 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3628 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3629 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3630 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3631 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3632 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3633 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3634 PetscFunctionReturn(PETSC_SUCCESS); 3635 } 3636 3637 PetscErrorCode PCBDDCResetTopography(PC pc) 3638 { 3639 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3640 PetscInt i; 3641 3642 PetscFunctionBegin; 3643 PetscCall(MatDestroy(&pcbddc->nedcG)); 3644 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3645 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3646 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3647 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3648 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3649 PetscCall(VecDestroy(&pcbddc->work_change)); 3650 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3651 PetscCall(MatDestroy(&pcbddc->divudotp)); 3652 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3653 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3654 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3655 pcbddc->n_local_subs = 0; 3656 PetscCall(PetscFree(pcbddc->local_subs)); 3657 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3658 pcbddc->graphanalyzed = PETSC_FALSE; 3659 pcbddc->recompute_topography = PETSC_TRUE; 3660 pcbddc->corner_selected = PETSC_FALSE; 3661 PetscFunctionReturn(PETSC_SUCCESS); 3662 } 3663 3664 PetscErrorCode PCBDDCResetSolvers(PC pc) 3665 { 3666 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3667 3668 PetscFunctionBegin; 3669 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3670 if (pcbddc->coarse_phi_B) { 3671 PetscScalar *array; 3672 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &array)); 3673 PetscCall(PetscFree(array)); 3674 } 3675 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3676 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3677 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3678 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3679 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3680 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3681 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3682 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3683 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3684 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3685 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3686 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3687 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3688 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3689 PetscCall(KSPReset(pcbddc->ksp_D)); 3690 PetscCall(KSPReset(pcbddc->ksp_R)); 3691 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3692 PetscCall(MatDestroy(&pcbddc->local_mat)); 3693 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3694 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3695 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3696 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3697 PetscCall(MatDestroy(&pcbddc->benign_change)); 3698 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3699 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3700 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3701 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3702 if (pcbddc->benign_zerodiag_subs) { 3703 PetscInt i; 3704 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3705 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3706 } 3707 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3708 PetscFunctionReturn(PETSC_SUCCESS); 3709 } 3710 3711 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3712 { 3713 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3714 PC_IS *pcis = (PC_IS *)pc->data; 3715 VecType impVecType; 3716 PetscInt n_constraints, n_R, old_size; 3717 3718 PetscFunctionBegin; 3719 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3720 n_R = pcis->n - pcbddc->n_vertices; 3721 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3722 /* local work vectors (try to avoid unneeded work)*/ 3723 /* R nodes */ 3724 old_size = -1; 3725 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3726 if (n_R != old_size) { 3727 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3728 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3729 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3730 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3731 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3732 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3733 } 3734 /* local primal dofs */ 3735 old_size = -1; 3736 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3737 if (pcbddc->local_primal_size != old_size) { 3738 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3739 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3740 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3741 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3742 } 3743 /* local explicit constraints */ 3744 old_size = -1; 3745 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 3746 if (n_constraints && n_constraints != old_size) { 3747 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3748 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3749 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3750 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3751 } 3752 PetscFunctionReturn(PETSC_SUCCESS); 3753 } 3754 3755 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3756 { 3757 /* pointers to pcis and pcbddc */ 3758 PC_IS *pcis = (PC_IS *)pc->data; 3759 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3760 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3761 /* submatrices of local problem */ 3762 Mat A_RV, A_VR, A_VV, local_auxmat2_R; 3763 /* submatrices of local coarse problem */ 3764 Mat S_VV, S_CV, S_VC, S_CC; 3765 /* working matrices */ 3766 Mat C_CR; 3767 /* additional working stuff */ 3768 PC pc_R; 3769 Mat F, Brhs = NULL; 3770 Vec dummy_vec; 3771 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 3772 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3773 PetscScalar *work; 3774 PetscInt *idx_V_B; 3775 PetscInt lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I; 3776 PetscInt i, n_R, n_D, n_B; 3777 PetscScalar one = 1.0, m_one = -1.0; 3778 3779 PetscFunctionBegin; 3780 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 3781 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3782 3783 /* Set Non-overlapping dimensions */ 3784 n_vertices = pcbddc->n_vertices; 3785 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3786 n_B = pcis->n_B; 3787 n_D = pcis->n - n_B; 3788 n_R = pcis->n - n_vertices; 3789 3790 /* vertices in boundary numbering */ 3791 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 3792 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 3793 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 3794 3795 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3796 PetscCall(PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals)); 3797 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV)); 3798 PetscCall(MatDenseSetLDA(S_VV, pcbddc->local_primal_size)); 3799 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, coarse_submat_vals + n_vertices, &S_CV)); 3800 PetscCall(MatDenseSetLDA(S_CV, pcbddc->local_primal_size)); 3801 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, coarse_submat_vals + pcbddc->local_primal_size * n_vertices, &S_VC)); 3802 PetscCall(MatDenseSetLDA(S_VC, pcbddc->local_primal_size)); 3803 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, coarse_submat_vals + (pcbddc->local_primal_size + 1) * n_vertices, &S_CC)); 3804 PetscCall(MatDenseSetLDA(S_CC, pcbddc->local_primal_size)); 3805 3806 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3807 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 3808 PetscCall(PCSetUp(pc_R)); 3809 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 3810 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 3811 lda_rhs = n_R; 3812 need_benign_correction = PETSC_FALSE; 3813 if (isLU || isCHOL) { 3814 PetscCall(PCFactorGetMatrix(pc_R, &F)); 3815 } else if (sub_schurs && sub_schurs->reuse_solver) { 3816 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3817 MatFactorType type; 3818 3819 F = reuse_solver->F; 3820 PetscCall(MatGetFactorType(F, &type)); 3821 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3822 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3823 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 3824 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3825 } else F = NULL; 3826 3827 /* determine if we can use a sparse right-hand side */ 3828 sparserhs = PETSC_FALSE; 3829 if (F) { 3830 MatSolverType solver; 3831 3832 PetscCall(MatFactorGetSolverType(F, &solver)); 3833 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 3834 } 3835 3836 /* allocate workspace */ 3837 n = 0; 3838 if (n_constraints) n += lda_rhs * n_constraints; 3839 if (n_vertices) { 3840 n = PetscMax(2 * lda_rhs * n_vertices, n); 3841 n = PetscMax((lda_rhs + n_B) * n_vertices, n); 3842 } 3843 if (!pcbddc->symmetric_primal) n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n); 3844 PetscCall(PetscMalloc1(n, &work)); 3845 3846 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3847 dummy_vec = NULL; 3848 if (need_benign_correction && lda_rhs != n_R && F) { 3849 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 3850 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 3851 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 3852 } 3853 3854 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3855 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3856 3857 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3858 if (n_constraints) { 3859 Mat M3, C_B; 3860 IS is_aux; 3861 3862 /* Extract constraints on R nodes: C_{CR} */ 3863 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux)); 3864 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 3865 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 3866 3867 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3868 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3869 if (!sparserhs) { 3870 PetscCall(PetscArrayzero(work, lda_rhs * n_constraints)); 3871 for (i = 0; i < n_constraints; i++) { 3872 const PetscScalar *row_cmat_values; 3873 const PetscInt *row_cmat_indices; 3874 PetscInt size_of_constraint, j; 3875 3876 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3877 for (j = 0; j < size_of_constraint; j++) work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j]; 3878 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3879 } 3880 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs)); 3881 } else { 3882 Mat tC_CR; 3883 3884 PetscCall(MatScale(C_CR, -1.0)); 3885 if (lda_rhs != n_R) { 3886 PetscScalar *aa; 3887 PetscInt r, *ii, *jj; 3888 PetscBool done; 3889 3890 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3891 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 3892 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 3893 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 3894 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3895 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 3896 } else { 3897 PetscCall(PetscObjectReference((PetscObject)C_CR)); 3898 tC_CR = C_CR; 3899 } 3900 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 3901 PetscCall(MatDestroy(&tC_CR)); 3902 } 3903 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R)); 3904 if (F) { 3905 if (need_benign_correction) { 3906 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3907 3908 /* rhs is already zero on interior dofs, no need to change the rhs */ 3909 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 3910 } 3911 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 3912 if (need_benign_correction) { 3913 PetscScalar *marr; 3914 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3915 3916 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 3917 if (lda_rhs != n_R) { 3918 for (i = 0; i < n_constraints; i++) { 3919 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 3920 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 3921 PetscCall(VecResetArray(dummy_vec)); 3922 } 3923 } else { 3924 for (i = 0; i < n_constraints; i++) { 3925 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 3926 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 3927 PetscCall(VecResetArray(pcbddc->vec1_R)); 3928 } 3929 } 3930 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 3931 } 3932 } else { 3933 PetscScalar *marr; 3934 3935 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 3936 for (i = 0; i < n_constraints; i++) { 3937 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 3938 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 3939 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 3940 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 3941 PetscCall(VecResetArray(pcbddc->vec1_R)); 3942 PetscCall(VecResetArray(pcbddc->vec2_R)); 3943 } 3944 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 3945 } 3946 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 3947 PetscCall(MatDestroy(&Brhs)); 3948 if (!pcbddc->switch_static) { 3949 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2)); 3950 for (i = 0; i < n_constraints; i++) { 3951 Vec r, b; 3952 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 3953 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 3954 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 3955 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 3956 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 3957 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 3958 } 3959 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 3960 } else { 3961 if (lda_rhs != n_R) { 3962 IS dummy; 3963 3964 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy)); 3965 PetscCall(MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 3966 PetscCall(ISDestroy(&dummy)); 3967 } else { 3968 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 3969 pcbddc->local_auxmat2 = local_auxmat2_R; 3970 } 3971 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 3972 } 3973 PetscCall(ISDestroy(&is_aux)); 3974 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 3975 PetscCall(MatScale(M3, m_one)); 3976 if (isCHOL) { 3977 PetscCall(MatCholeskyFactor(M3, NULL, NULL)); 3978 } else { 3979 PetscCall(MatLUFactor(M3, NULL, NULL, NULL)); 3980 } 3981 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 3982 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3983 PetscCall(MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1)); 3984 PetscCall(MatDestroy(&C_B)); 3985 PetscCall(MatCopy(M3, S_CC, SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3986 PetscCall(MatDestroy(&M3)); 3987 } 3988 3989 /* Get submatrices from subdomain matrix */ 3990 if (n_vertices) { 3991 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 3992 PetscBool oldpin; 3993 #endif 3994 PetscBool isaij; 3995 IS is_aux; 3996 3997 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3998 IS tis; 3999 4000 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4001 PetscCall(ISSort(tis)); 4002 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4003 PetscCall(ISDestroy(&tis)); 4004 } else { 4005 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4006 } 4007 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4008 oldpin = pcbddc->local_mat->boundtocpu; 4009 #endif 4010 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4011 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4012 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4013 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij)); 4014 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4015 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4016 } 4017 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4018 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4019 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4020 #endif 4021 PetscCall(ISDestroy(&is_aux)); 4022 } 4023 4024 /* Matrix of coarse basis functions (local) */ 4025 if (pcbddc->coarse_phi_B) { 4026 PetscInt on_B, on_primal, on_D = n_D; 4027 if (pcbddc->coarse_phi_D) PetscCall(MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL)); 4028 PetscCall(MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal)); 4029 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4030 PetscScalar *marray; 4031 4032 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &marray)); 4033 PetscCall(PetscFree(marray)); 4034 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4035 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4036 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4037 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4038 } 4039 } 4040 4041 if (!pcbddc->coarse_phi_B) { 4042 PetscScalar *marr; 4043 4044 /* memory size */ 4045 n = n_B * pcbddc->local_primal_size; 4046 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size; 4047 if (!pcbddc->symmetric_primal) n *= 2; 4048 PetscCall(PetscCalloc1(n, &marr)); 4049 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B)); 4050 marr += n_B * pcbddc->local_primal_size; 4051 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4052 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D)); 4053 marr += n_D * pcbddc->local_primal_size; 4054 } 4055 if (!pcbddc->symmetric_primal) { 4056 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B)); 4057 marr += n_B * pcbddc->local_primal_size; 4058 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D)); 4059 } else { 4060 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4061 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4062 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4063 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4064 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4065 } 4066 } 4067 } 4068 4069 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4070 p0_lidx_I = NULL; 4071 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4072 const PetscInt *idxs; 4073 4074 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4075 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4076 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4077 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4078 } 4079 4080 /* vertices */ 4081 if (n_vertices) { 4082 PetscBool restoreavr = PETSC_FALSE; 4083 4084 PetscCall(MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV)); 4085 4086 if (n_R) { 4087 Mat A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */ 4088 PetscBLASInt B_N, B_one = 1; 4089 const PetscScalar *x; 4090 PetscScalar *y; 4091 4092 PetscCall(MatScale(A_RV, m_one)); 4093 if (need_benign_correction) { 4094 ISLocalToGlobalMapping RtoN; 4095 IS is_p0; 4096 PetscInt *idxs_p0, n; 4097 4098 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4099 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4100 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4101 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4102 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4103 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4104 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4105 PetscCall(ISDestroy(&is_p0)); 4106 } 4107 4108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV)); 4109 if (!sparserhs || need_benign_correction) { 4110 if (lda_rhs == n_R) { 4111 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4112 } else { 4113 PetscScalar *av, *array; 4114 const PetscInt *xadj, *adjncy; 4115 PetscInt n; 4116 PetscBool flg_row; 4117 4118 array = work + lda_rhs * n_vertices; 4119 PetscCall(PetscArrayzero(array, lda_rhs * n_vertices)); 4120 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4121 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4122 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4123 for (i = 0; i < n; i++) { 4124 PetscInt j; 4125 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j]; 4126 } 4127 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4128 PetscCall(MatDestroy(&A_RV)); 4129 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV)); 4130 } 4131 if (need_benign_correction) { 4132 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4133 PetscScalar *marr; 4134 4135 PetscCall(MatDenseGetArray(A_RV, &marr)); 4136 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4137 4138 | 0 0 0 | (V) 4139 L = | 0 0 -1 | (P-p0) 4140 | 0 0 -1 | (p0) 4141 4142 */ 4143 for (i = 0; i < reuse_solver->benign_n; i++) { 4144 const PetscScalar *vals; 4145 const PetscInt *idxs, *idxs_zero; 4146 PetscInt n, j, nz; 4147 4148 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4149 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4150 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4151 for (j = 0; j < n; j++) { 4152 PetscScalar val = vals[j]; 4153 PetscInt k, col = idxs[j]; 4154 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4155 } 4156 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4157 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4158 } 4159 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4160 } 4161 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4162 Brhs = A_RV; 4163 } else { 4164 Mat tA_RVT, A_RVT; 4165 4166 if (!pcbddc->symmetric_primal) { 4167 /* A_RV already scaled by -1 */ 4168 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4169 } else { 4170 restoreavr = PETSC_TRUE; 4171 PetscCall(MatScale(A_VR, -1.0)); 4172 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4173 A_RVT = A_VR; 4174 } 4175 if (lda_rhs != n_R) { 4176 PetscScalar *aa; 4177 PetscInt r, *ii, *jj; 4178 PetscBool done; 4179 4180 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4181 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4182 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4183 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4184 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4185 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4186 } else { 4187 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4188 tA_RVT = A_RVT; 4189 } 4190 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4191 PetscCall(MatDestroy(&tA_RVT)); 4192 PetscCall(MatDestroy(&A_RVT)); 4193 } 4194 if (F) { 4195 /* need to correct the rhs */ 4196 if (need_benign_correction) { 4197 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4198 PetscScalar *marr; 4199 4200 PetscCall(MatDenseGetArray(Brhs, &marr)); 4201 if (lda_rhs != n_R) { 4202 for (i = 0; i < n_vertices; i++) { 4203 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4204 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4205 PetscCall(VecResetArray(dummy_vec)); 4206 } 4207 } else { 4208 for (i = 0; i < n_vertices; i++) { 4209 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4210 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4211 PetscCall(VecResetArray(pcbddc->vec1_R)); 4212 } 4213 } 4214 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4215 } 4216 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4217 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4218 /* need to correct the solution */ 4219 if (need_benign_correction) { 4220 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4221 PetscScalar *marr; 4222 4223 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4224 if (lda_rhs != n_R) { 4225 for (i = 0; i < n_vertices; i++) { 4226 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4227 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4228 PetscCall(VecResetArray(dummy_vec)); 4229 } 4230 } else { 4231 for (i = 0; i < n_vertices; i++) { 4232 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4233 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4234 PetscCall(VecResetArray(pcbddc->vec1_R)); 4235 } 4236 } 4237 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4238 } 4239 } else { 4240 PetscCall(MatDenseGetArray(Brhs, &y)); 4241 for (i = 0; i < n_vertices; i++) { 4242 PetscCall(VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs)); 4243 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs)); 4244 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4245 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4246 PetscCall(VecResetArray(pcbddc->vec1_R)); 4247 PetscCall(VecResetArray(pcbddc->vec2_R)); 4248 } 4249 PetscCall(MatDenseRestoreArray(Brhs, &y)); 4250 } 4251 PetscCall(MatDestroy(&A_RV)); 4252 PetscCall(MatDestroy(&Brhs)); 4253 /* S_VV and S_CV */ 4254 if (n_constraints) { 4255 Mat B; 4256 4257 PetscCall(PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices)); 4258 for (i = 0; i < n_vertices; i++) { 4259 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 4260 PetscCall(VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B)); 4261 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4262 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4263 PetscCall(VecResetArray(pcis->vec1_B)); 4264 PetscCall(VecResetArray(pcbddc->vec1_R)); 4265 } 4266 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B)); 4267 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4268 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV)); 4269 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4270 PetscCall(MatProductSetFromOptions(S_CV)); 4271 PetscCall(MatProductSymbolic(S_CV)); 4272 PetscCall(MatProductNumeric(S_CV)); 4273 PetscCall(MatProductClear(S_CV)); 4274 4275 PetscCall(MatDestroy(&B)); 4276 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B)); 4277 /* Reuse B = local_auxmat2_R * S_CV */ 4278 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B)); 4279 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4280 PetscCall(MatProductSetFromOptions(B)); 4281 PetscCall(MatProductSymbolic(B)); 4282 PetscCall(MatProductNumeric(B)); 4283 4284 PetscCall(MatScale(S_CV, m_one)); 4285 PetscCall(PetscBLASIntCast(lda_rhs * n_vertices, &B_N)); 4286 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one)); 4287 PetscCall(MatDestroy(&B)); 4288 } 4289 if (lda_rhs != n_R) { 4290 PetscCall(MatDestroy(&A_RRmA_RV)); 4291 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV)); 4292 PetscCall(MatDenseSetLDA(A_RRmA_RV, lda_rhs)); 4293 } 4294 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt)); 4295 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4296 if (need_benign_correction) { 4297 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4298 PetscScalar *marr, *sums; 4299 4300 PetscCall(PetscMalloc1(n_vertices, &sums)); 4301 PetscCall(MatDenseGetArray(S_VVt, &marr)); 4302 for (i = 0; i < reuse_solver->benign_n; i++) { 4303 const PetscScalar *vals; 4304 const PetscInt *idxs, *idxs_zero; 4305 PetscInt n, j, nz; 4306 4307 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4308 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4309 for (j = 0; j < n_vertices; j++) { 4310 PetscInt k; 4311 sums[j] = 0.; 4312 for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs]; 4313 } 4314 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4315 for (j = 0; j < n; j++) { 4316 PetscScalar val = vals[j]; 4317 PetscInt k; 4318 for (k = 0; k < n_vertices; k++) marr[idxs[j] + k * n_vertices] += val * sums[k]; 4319 } 4320 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4321 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4322 } 4323 PetscCall(PetscFree(sums)); 4324 PetscCall(MatDenseRestoreArray(S_VVt, &marr)); 4325 PetscCall(MatDestroy(&A_RV_bcorr)); 4326 } 4327 PetscCall(MatDestroy(&A_RRmA_RV)); 4328 PetscCall(PetscBLASIntCast(n_vertices * n_vertices, &B_N)); 4329 PetscCall(MatDenseGetArrayRead(A_VV, &x)); 4330 PetscCall(MatDenseGetArray(S_VVt, &y)); 4331 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one)); 4332 PetscCall(MatDenseRestoreArrayRead(A_VV, &x)); 4333 PetscCall(MatDenseRestoreArray(S_VVt, &y)); 4334 PetscCall(MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN)); 4335 PetscCall(MatDestroy(&S_VVt)); 4336 } else { 4337 PetscCall(MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN)); 4338 } 4339 PetscCall(MatDestroy(&A_VV)); 4340 4341 /* coarse basis functions */ 4342 for (i = 0; i < n_vertices; i++) { 4343 Vec v; 4344 PetscScalar one = 1.0, zero = 0.0; 4345 4346 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4347 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v)); 4348 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4349 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4350 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4351 PetscMPIInt rank; 4352 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank)); 4353 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4354 } 4355 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4356 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4357 PetscCall(VecAssemblyEnd(v)); 4358 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v)); 4359 4360 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4361 PetscInt j; 4362 4363 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v)); 4364 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4365 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4366 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4367 PetscMPIInt rank; 4368 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank)); 4369 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4370 } 4371 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4372 PetscCall(VecAssemblyBegin(v)); 4373 PetscCall(VecAssemblyEnd(v)); 4374 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v)); 4375 } 4376 PetscCall(VecResetArray(pcbddc->vec1_R)); 4377 } 4378 /* if n_R == 0 the object is not destroyed */ 4379 PetscCall(MatDestroy(&A_RV)); 4380 } 4381 PetscCall(VecDestroy(&dummy_vec)); 4382 4383 if (n_constraints) { 4384 Mat B; 4385 4386 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B)); 4387 PetscCall(MatScale(S_CC, m_one)); 4388 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B)); 4389 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4390 PetscCall(MatProductSetFromOptions(B)); 4391 PetscCall(MatProductSymbolic(B)); 4392 PetscCall(MatProductNumeric(B)); 4393 4394 PetscCall(MatScale(S_CC, m_one)); 4395 if (n_vertices) { 4396 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4397 PetscCall(MatTransposeSetPrecursor(S_CV, S_VC)); 4398 PetscCall(MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC)); 4399 } else { 4400 Mat S_VCt; 4401 4402 if (lda_rhs != n_R) { 4403 PetscCall(MatDestroy(&B)); 4404 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B)); 4405 PetscCall(MatDenseSetLDA(B, lda_rhs)); 4406 } 4407 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt)); 4408 PetscCall(MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN)); 4409 PetscCall(MatDestroy(&S_VCt)); 4410 } 4411 } 4412 PetscCall(MatDestroy(&B)); 4413 /* coarse basis functions */ 4414 for (i = 0; i < n_constraints; i++) { 4415 Vec v; 4416 4417 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4418 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4419 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4420 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4421 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4422 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4423 PetscInt j; 4424 PetscScalar zero = 0.0; 4425 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4426 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4427 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4428 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4429 PetscCall(VecAssemblyBegin(v)); 4430 PetscCall(VecAssemblyEnd(v)); 4431 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4432 } 4433 PetscCall(VecResetArray(pcbddc->vec1_R)); 4434 } 4435 } 4436 if (n_constraints) PetscCall(MatDestroy(&local_auxmat2_R)); 4437 PetscCall(PetscFree(p0_lidx_I)); 4438 4439 /* coarse matrix entries relative to B_0 */ 4440 if (pcbddc->benign_n) { 4441 Mat B0_B, B0_BPHI; 4442 IS is_dummy; 4443 const PetscScalar *data; 4444 PetscInt j; 4445 4446 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4447 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4448 PetscCall(ISDestroy(&is_dummy)); 4449 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4450 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4451 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 4452 for (j = 0; j < pcbddc->benign_n; j++) { 4453 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4454 for (i = 0; i < pcbddc->local_primal_size; i++) { 4455 coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j]; 4456 coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j]; 4457 } 4458 } 4459 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 4460 PetscCall(MatDestroy(&B0_B)); 4461 PetscCall(MatDestroy(&B0_BPHI)); 4462 } 4463 4464 /* compute other basis functions for non-symmetric problems */ 4465 if (!pcbddc->symmetric_primal) { 4466 Mat B_V = NULL, B_C = NULL; 4467 PetscScalar *marray; 4468 4469 if (n_constraints) { 4470 Mat S_CCT, C_CRT; 4471 4472 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 4473 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 4474 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C)); 4475 PetscCall(MatDestroy(&S_CCT)); 4476 if (n_vertices) { 4477 Mat S_VCT; 4478 4479 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 4480 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V)); 4481 PetscCall(MatDestroy(&S_VCT)); 4482 } 4483 PetscCall(MatDestroy(&C_CRT)); 4484 } else { 4485 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 4486 } 4487 if (n_vertices && n_R) { 4488 PetscScalar *av, *marray; 4489 const PetscInt *xadj, *adjncy; 4490 PetscInt n; 4491 PetscBool flg_row; 4492 4493 /* B_V = B_V - A_VR^T */ 4494 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4495 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4496 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 4497 PetscCall(MatDenseGetArray(B_V, &marray)); 4498 for (i = 0; i < n; i++) { 4499 PetscInt j; 4500 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 4501 } 4502 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4503 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4504 PetscCall(MatDestroy(&A_VR)); 4505 } 4506 4507 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4508 if (n_vertices) { 4509 PetscCall(MatDenseGetArray(B_V, &marray)); 4510 for (i = 0; i < n_vertices; i++) { 4511 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 4512 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4513 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4514 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4515 PetscCall(VecResetArray(pcbddc->vec1_R)); 4516 PetscCall(VecResetArray(pcbddc->vec2_R)); 4517 } 4518 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4519 } 4520 if (B_C) { 4521 PetscCall(MatDenseGetArray(B_C, &marray)); 4522 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 4523 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 4524 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4525 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4526 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4527 PetscCall(VecResetArray(pcbddc->vec1_R)); 4528 PetscCall(VecResetArray(pcbddc->vec2_R)); 4529 } 4530 PetscCall(MatDenseRestoreArray(B_C, &marray)); 4531 } 4532 /* coarse basis functions */ 4533 for (i = 0; i < pcbddc->local_primal_size; i++) { 4534 Vec v; 4535 4536 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 4537 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 4538 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4539 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4540 if (i < n_vertices) { 4541 PetscScalar one = 1.0; 4542 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4543 PetscCall(VecAssemblyBegin(v)); 4544 PetscCall(VecAssemblyEnd(v)); 4545 } 4546 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 4547 4548 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4549 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 4550 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4551 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4552 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 4553 } 4554 PetscCall(VecResetArray(pcbddc->vec1_R)); 4555 } 4556 PetscCall(MatDestroy(&B_V)); 4557 PetscCall(MatDestroy(&B_C)); 4558 } 4559 4560 /* free memory */ 4561 PetscCall(PetscFree(idx_V_B)); 4562 PetscCall(MatDestroy(&S_VV)); 4563 PetscCall(MatDestroy(&S_CV)); 4564 PetscCall(MatDestroy(&S_VC)); 4565 PetscCall(MatDestroy(&S_CC)); 4566 PetscCall(PetscFree(work)); 4567 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 4568 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 4569 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4570 4571 /* Checking coarse_sub_mat and coarse basis functions */ 4572 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4573 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4574 if (pcbddc->dbg_flag) { 4575 Mat coarse_sub_mat; 4576 Mat AUXMAT, TM1, TM2, TM3, TM4; 4577 Mat coarse_phi_D, coarse_phi_B; 4578 Mat coarse_psi_D, coarse_psi_B; 4579 Mat A_II, A_BB, A_IB, A_BI; 4580 Mat C_B, CPHI; 4581 IS is_dummy; 4582 Vec mones; 4583 MatType checkmattype = MATSEQAIJ; 4584 PetscReal real_value; 4585 4586 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4587 Mat A; 4588 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 4589 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 4590 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 4591 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 4592 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 4593 PetscCall(MatDestroy(&A)); 4594 } else { 4595 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 4596 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 4597 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 4598 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 4599 } 4600 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 4601 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 4602 if (!pcbddc->symmetric_primal) { 4603 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 4604 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 4605 } 4606 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat)); 4607 4608 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 4609 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 4610 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4611 if (!pcbddc->symmetric_primal) { 4612 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4613 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4614 PetscCall(MatDestroy(&AUXMAT)); 4615 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4616 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4617 PetscCall(MatDestroy(&AUXMAT)); 4618 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4619 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4620 PetscCall(MatDestroy(&AUXMAT)); 4621 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4622 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4623 PetscCall(MatDestroy(&AUXMAT)); 4624 } else { 4625 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4626 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4627 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4628 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4629 PetscCall(MatDestroy(&AUXMAT)); 4630 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4631 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4632 PetscCall(MatDestroy(&AUXMAT)); 4633 } 4634 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 4635 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 4636 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 4637 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 4638 if (pcbddc->benign_n) { 4639 Mat B0_B, B0_BPHI; 4640 const PetscScalar *data2; 4641 PetscScalar *data; 4642 PetscInt j; 4643 4644 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4645 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4646 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4647 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4648 PetscCall(MatDenseGetArray(TM1, &data)); 4649 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 4650 for (j = 0; j < pcbddc->benign_n; j++) { 4651 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4652 for (i = 0; i < pcbddc->local_primal_size; i++) { 4653 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 4654 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 4655 } 4656 } 4657 PetscCall(MatDenseRestoreArray(TM1, &data)); 4658 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 4659 PetscCall(MatDestroy(&B0_B)); 4660 PetscCall(ISDestroy(&is_dummy)); 4661 PetscCall(MatDestroy(&B0_BPHI)); 4662 } 4663 #if 0 4664 { 4665 PetscViewer viewer; 4666 char filename[256]; 4667 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 4668 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4669 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4670 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4671 PetscCall(MatView(coarse_sub_mat,viewer)); 4672 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4673 PetscCall(MatView(TM1,viewer)); 4674 if (pcbddc->coarse_phi_B) { 4675 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4676 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4677 } 4678 if (pcbddc->coarse_phi_D) { 4679 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4680 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4681 } 4682 if (pcbddc->coarse_psi_B) { 4683 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4684 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4685 } 4686 if (pcbddc->coarse_psi_D) { 4687 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4688 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4689 } 4690 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4691 PetscCall(MatView(pcbddc->local_mat,viewer)); 4692 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4693 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4694 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4695 PetscCall(ISView(pcis->is_I_local,viewer)); 4696 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4697 PetscCall(ISView(pcis->is_B_local,viewer)); 4698 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4699 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4700 PetscCall(PetscViewerDestroy(&viewer)); 4701 } 4702 #endif 4703 PetscCall(MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN)); 4704 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 4705 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4706 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4707 4708 /* check constraints */ 4709 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 4710 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4711 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4712 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4713 } else { 4714 PetscScalar *data; 4715 Mat tmat; 4716 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 4717 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 4718 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 4719 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4720 PetscCall(MatDestroy(&tmat)); 4721 } 4722 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 4723 PetscCall(VecSet(mones, -1.0)); 4724 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4725 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4726 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4727 if (!pcbddc->symmetric_primal) { 4728 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 4729 PetscCall(VecSet(mones, -1.0)); 4730 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4731 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4732 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4733 } 4734 PetscCall(MatDestroy(&C_B)); 4735 PetscCall(MatDestroy(&CPHI)); 4736 PetscCall(ISDestroy(&is_dummy)); 4737 PetscCall(VecDestroy(&mones)); 4738 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4739 PetscCall(MatDestroy(&A_II)); 4740 PetscCall(MatDestroy(&A_BB)); 4741 PetscCall(MatDestroy(&A_IB)); 4742 PetscCall(MatDestroy(&A_BI)); 4743 PetscCall(MatDestroy(&TM1)); 4744 PetscCall(MatDestroy(&TM2)); 4745 PetscCall(MatDestroy(&TM3)); 4746 PetscCall(MatDestroy(&TM4)); 4747 PetscCall(MatDestroy(&coarse_phi_D)); 4748 PetscCall(MatDestroy(&coarse_phi_B)); 4749 if (!pcbddc->symmetric_primal) { 4750 PetscCall(MatDestroy(&coarse_psi_D)); 4751 PetscCall(MatDestroy(&coarse_psi_B)); 4752 } 4753 PetscCall(MatDestroy(&coarse_sub_mat)); 4754 } 4755 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4756 { 4757 PetscBool gpu; 4758 4759 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu)); 4760 if (gpu) { 4761 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 4762 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 4763 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 4764 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 4765 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 4766 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 4767 } 4768 } 4769 /* get back data */ 4770 *coarse_submat_vals_n = coarse_submat_vals; 4771 PetscFunctionReturn(PETSC_SUCCESS); 4772 } 4773 4774 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 4775 { 4776 Mat *work_mat; 4777 IS isrow_s, iscol_s; 4778 PetscBool rsorted, csorted; 4779 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 4780 4781 PetscFunctionBegin; 4782 PetscCall(ISSorted(isrow, &rsorted)); 4783 PetscCall(ISSorted(iscol, &csorted)); 4784 PetscCall(ISGetLocalSize(isrow, &rsize)); 4785 PetscCall(ISGetLocalSize(iscol, &csize)); 4786 4787 if (!rsorted) { 4788 const PetscInt *idxs; 4789 PetscInt *idxs_sorted, i; 4790 4791 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 4792 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 4793 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 4794 PetscCall(ISGetIndices(isrow, &idxs)); 4795 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 4796 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4797 PetscCall(ISRestoreIndices(isrow, &idxs)); 4798 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 4799 } else { 4800 PetscCall(PetscObjectReference((PetscObject)isrow)); 4801 isrow_s = isrow; 4802 } 4803 4804 if (!csorted) { 4805 if (isrow == iscol) { 4806 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4807 iscol_s = isrow_s; 4808 } else { 4809 const PetscInt *idxs; 4810 PetscInt *idxs_sorted, i; 4811 4812 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 4813 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 4814 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 4815 PetscCall(ISGetIndices(iscol, &idxs)); 4816 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 4817 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4818 PetscCall(ISRestoreIndices(iscol, &idxs)); 4819 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 4820 } 4821 } else { 4822 PetscCall(PetscObjectReference((PetscObject)iscol)); 4823 iscol_s = iscol; 4824 } 4825 4826 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 4827 4828 if (!rsorted || !csorted) { 4829 Mat new_mat; 4830 IS is_perm_r, is_perm_c; 4831 4832 if (!rsorted) { 4833 PetscInt *idxs_r, i; 4834 PetscCall(PetscMalloc1(rsize, &idxs_r)); 4835 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 4836 PetscCall(PetscFree(idxs_perm_r)); 4837 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 4838 } else { 4839 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 4840 } 4841 PetscCall(ISSetPermutation(is_perm_r)); 4842 4843 if (!csorted) { 4844 if (isrow_s == iscol_s) { 4845 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 4846 is_perm_c = is_perm_r; 4847 } else { 4848 PetscInt *idxs_c, i; 4849 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 4850 PetscCall(PetscMalloc1(csize, &idxs_c)); 4851 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 4852 PetscCall(PetscFree(idxs_perm_c)); 4853 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 4854 } 4855 } else { 4856 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 4857 } 4858 PetscCall(ISSetPermutation(is_perm_c)); 4859 4860 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 4861 PetscCall(MatDestroy(&work_mat[0])); 4862 work_mat[0] = new_mat; 4863 PetscCall(ISDestroy(&is_perm_r)); 4864 PetscCall(ISDestroy(&is_perm_c)); 4865 } 4866 4867 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 4868 *B = work_mat[0]; 4869 PetscCall(MatDestroyMatrices(1, &work_mat)); 4870 PetscCall(ISDestroy(&isrow_s)); 4871 PetscCall(ISDestroy(&iscol_s)); 4872 PetscFunctionReturn(PETSC_SUCCESS); 4873 } 4874 4875 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4876 { 4877 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 4878 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4879 Mat new_mat, lA; 4880 IS is_local, is_global; 4881 PetscInt local_size; 4882 PetscBool isseqaij, issym, isset; 4883 4884 PetscFunctionBegin; 4885 PetscCall(MatDestroy(&pcbddc->local_mat)); 4886 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 4887 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 4888 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 4889 PetscCall(ISDestroy(&is_local)); 4890 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 4891 PetscCall(ISDestroy(&is_global)); 4892 4893 if (pcbddc->dbg_flag) { 4894 Vec x, x_change; 4895 PetscReal error; 4896 4897 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 4898 PetscCall(VecSetRandom(x, NULL)); 4899 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 4900 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4901 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4902 PetscCall(MatMult(new_mat, matis->x, matis->y)); 4903 if (!pcbddc->change_interior) { 4904 const PetscScalar *x, *y, *v; 4905 PetscReal lerror = 0.; 4906 PetscInt i; 4907 4908 PetscCall(VecGetArrayRead(matis->x, &x)); 4909 PetscCall(VecGetArrayRead(matis->y, &y)); 4910 PetscCall(VecGetArrayRead(matis->counter, &v)); 4911 for (i = 0; i < local_size; i++) 4912 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 4913 PetscCall(VecRestoreArrayRead(matis->x, &x)); 4914 PetscCall(VecRestoreArrayRead(matis->y, &y)); 4915 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 4916 PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 4917 if (error > PETSC_SMALL) { 4918 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4919 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 4920 } else { 4921 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 4922 } 4923 } 4924 } 4925 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4926 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4927 PetscCall(VecAXPY(x, -1.0, x_change)); 4928 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 4929 if (error > PETSC_SMALL) { 4930 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4931 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 4932 } else { 4933 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 4934 } 4935 } 4936 PetscCall(VecDestroy(&x)); 4937 PetscCall(VecDestroy(&x_change)); 4938 } 4939 4940 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4941 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 4942 4943 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4944 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 4945 if (isseqaij) { 4946 PetscCall(MatDestroy(&pcbddc->local_mat)); 4947 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 4948 if (lA) { 4949 Mat work; 4950 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 4951 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 4952 PetscCall(MatDestroy(&work)); 4953 } 4954 } else { 4955 Mat work_mat; 4956 4957 PetscCall(MatDestroy(&pcbddc->local_mat)); 4958 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 4959 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 4960 PetscCall(MatDestroy(&work_mat)); 4961 if (lA) { 4962 Mat work; 4963 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 4964 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 4965 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 4966 PetscCall(MatDestroy(&work)); 4967 } 4968 } 4969 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 4970 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 4971 PetscCall(MatDestroy(&new_mat)); 4972 PetscFunctionReturn(PETSC_SUCCESS); 4973 } 4974 4975 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4976 { 4977 PC_IS *pcis = (PC_IS *)(pc->data); 4978 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4979 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4980 PetscInt *idx_R_local = NULL; 4981 PetscInt n_vertices, i, j, n_R, n_D, n_B; 4982 PetscInt vbs, bs; 4983 PetscBT bitmask = NULL; 4984 4985 PetscFunctionBegin; 4986 /* 4987 No need to setup local scatters if 4988 - primal space is unchanged 4989 AND 4990 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4991 AND 4992 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4993 */ 4994 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 4995 /* destroy old objects */ 4996 PetscCall(ISDestroy(&pcbddc->is_R_local)); 4997 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 4998 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 4999 /* Set Non-overlapping dimensions */ 5000 n_B = pcis->n_B; 5001 n_D = pcis->n - n_B; 5002 n_vertices = pcbddc->n_vertices; 5003 5004 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5005 5006 /* create auxiliary bitmask and allocate workspace */ 5007 if (!sub_schurs || !sub_schurs->reuse_solver) { 5008 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5009 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5010 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5011 5012 for (i = 0, n_R = 0; i < pcis->n; i++) { 5013 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5014 } 5015 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5016 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5017 5018 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5019 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5020 } 5021 5022 /* Block code */ 5023 vbs = 1; 5024 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5025 if (bs > 1 && !(n_vertices % bs)) { 5026 PetscBool is_blocked = PETSC_TRUE; 5027 PetscInt *vary; 5028 if (!sub_schurs || !sub_schurs->reuse_solver) { 5029 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5030 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5031 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5032 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5033 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5034 for (i = 0; i < pcis->n / bs; i++) { 5035 if (vary[i] != 0 && vary[i] != bs) { 5036 is_blocked = PETSC_FALSE; 5037 break; 5038 } 5039 } 5040 PetscCall(PetscFree(vary)); 5041 } else { 5042 /* Verify directly the R set */ 5043 for (i = 0; i < n_R / bs; i++) { 5044 PetscInt j, node = idx_R_local[bs * i]; 5045 for (j = 1; j < bs; j++) { 5046 if (node != idx_R_local[bs * i + j] - j) { 5047 is_blocked = PETSC_FALSE; 5048 break; 5049 } 5050 } 5051 } 5052 } 5053 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5054 vbs = bs; 5055 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5056 } 5057 } 5058 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5059 if (sub_schurs && sub_schurs->reuse_solver) { 5060 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5061 5062 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5063 PetscCall(ISDestroy(&reuse_solver->is_R)); 5064 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5065 reuse_solver->is_R = pcbddc->is_R_local; 5066 } else { 5067 PetscCall(PetscFree(idx_R_local)); 5068 } 5069 5070 /* print some info if requested */ 5071 if (pcbddc->dbg_flag) { 5072 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5073 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5074 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5075 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5076 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5077 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5078 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5079 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5080 } 5081 5082 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5083 if (!sub_schurs || !sub_schurs->reuse_solver) { 5084 IS is_aux1, is_aux2; 5085 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5086 5087 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5088 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5089 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5090 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5091 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5092 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5093 for (i = 0, j = 0; i < n_R; i++) { 5094 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5095 } 5096 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5097 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5098 for (i = 0, j = 0; i < n_B; i++) { 5099 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5100 } 5101 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5102 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5103 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5104 PetscCall(ISDestroy(&is_aux1)); 5105 PetscCall(ISDestroy(&is_aux2)); 5106 5107 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5108 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5109 for (i = 0, j = 0; i < n_R; i++) { 5110 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5111 } 5112 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5113 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5114 PetscCall(ISDestroy(&is_aux1)); 5115 } 5116 PetscCall(PetscBTDestroy(&bitmask)); 5117 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5118 } else { 5119 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5120 IS tis; 5121 PetscInt schur_size; 5122 5123 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5124 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5125 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5126 PetscCall(ISDestroy(&tis)); 5127 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5128 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5129 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5130 PetscCall(ISDestroy(&tis)); 5131 } 5132 } 5133 PetscFunctionReturn(PETSC_SUCCESS); 5134 } 5135 5136 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5137 { 5138 MatNullSpace NullSpace; 5139 Mat dmat; 5140 const Vec *nullvecs; 5141 Vec v, v2, *nullvecs2; 5142 VecScatter sct = NULL; 5143 PetscContainer c; 5144 PetscScalar *ddata; 5145 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5146 PetscBool nnsp_has_cnst; 5147 5148 PetscFunctionBegin; 5149 if (!is && !B) { /* MATIS */ 5150 Mat_IS *matis = (Mat_IS *)A->data; 5151 5152 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5153 sct = matis->cctx; 5154 PetscCall(PetscObjectReference((PetscObject)sct)); 5155 } else { 5156 PetscCall(MatGetNullSpace(B, &NullSpace)); 5157 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5158 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5159 } 5160 PetscCall(MatGetNullSpace(A, &NullSpace)); 5161 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5162 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5163 5164 PetscCall(MatCreateVecs(A, &v, NULL)); 5165 PetscCall(MatCreateVecs(B, &v2, NULL)); 5166 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5167 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs)); 5168 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5169 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5170 PetscCall(VecGetBlockSize(v2, &bs)); 5171 PetscCall(VecGetSize(v2, &N)); 5172 PetscCall(VecGetLocalSize(v2, &n)); 5173 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5174 for (k = 0; k < nnsp_size; k++) { 5175 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5176 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5177 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5178 } 5179 if (nnsp_has_cnst) { 5180 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5181 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5182 } 5183 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5184 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5185 5186 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5187 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c)); 5188 PetscCall(PetscContainerSetPointer(c, ddata)); 5189 PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault)); 5190 PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c)); 5191 PetscCall(PetscContainerDestroy(&c)); 5192 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5193 PetscCall(MatDestroy(&dmat)); 5194 5195 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5196 PetscCall(PetscFree(nullvecs2)); 5197 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5198 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5199 PetscCall(VecDestroy(&v)); 5200 PetscCall(VecDestroy(&v2)); 5201 PetscCall(VecScatterDestroy(&sct)); 5202 PetscFunctionReturn(PETSC_SUCCESS); 5203 } 5204 5205 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5206 { 5207 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5208 PC_IS *pcis = (PC_IS *)pc->data; 5209 PC pc_temp; 5210 Mat A_RR; 5211 MatNullSpace nnsp; 5212 MatReuse reuse; 5213 PetscScalar m_one = -1.0; 5214 PetscReal value; 5215 PetscInt n_D, n_R; 5216 PetscBool issbaij, opts, isset, issym; 5217 void (*f)(void) = NULL; 5218 char dir_prefix[256], neu_prefix[256], str_level[16]; 5219 size_t len; 5220 5221 PetscFunctionBegin; 5222 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5223 /* approximate solver, propagate NearNullSpace if needed */ 5224 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5225 MatNullSpace gnnsp1, gnnsp2; 5226 PetscBool lhas, ghas; 5227 5228 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5229 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5230 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5231 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5232 PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5233 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5234 } 5235 5236 /* compute prefixes */ 5237 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5238 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5239 if (!pcbddc->current_level) { 5240 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5241 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5242 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5243 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5244 } else { 5245 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level))); 5246 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5247 len -= 15; /* remove "pc_bddc_coarse_" */ 5248 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5249 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5250 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5251 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5252 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5253 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5254 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5255 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5256 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5257 } 5258 5259 /* DIRICHLET PROBLEM */ 5260 if (dirichlet) { 5261 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5262 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5263 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5264 if (pcbddc->dbg_flag) { 5265 Mat A_IIn; 5266 5267 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5268 PetscCall(MatDestroy(&pcis->A_II)); 5269 pcis->A_II = A_IIn; 5270 } 5271 } 5272 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5273 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5274 5275 /* Matrix for Dirichlet problem is pcis->A_II */ 5276 n_D = pcis->n - pcis->n_B; 5277 opts = PETSC_FALSE; 5278 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5279 opts = PETSC_TRUE; 5280 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5281 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 5282 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5283 /* default */ 5284 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5285 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5286 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5287 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5288 if (issbaij) { 5289 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5290 } else { 5291 PetscCall(PCSetType(pc_temp, PCLU)); 5292 } 5293 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5294 } 5295 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5296 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5297 /* Allow user's customization */ 5298 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5299 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5300 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5301 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5302 } 5303 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5304 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5305 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5306 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5307 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5308 const PetscInt *idxs; 5309 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5310 5311 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5312 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5313 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5314 for (i = 0; i < nl; i++) { 5315 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5316 } 5317 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5318 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5319 PetscCall(PetscFree(scoords)); 5320 } 5321 if (sub_schurs && sub_schurs->reuse_solver) { 5322 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5323 5324 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5325 } 5326 5327 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5328 if (!n_D) { 5329 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5330 PetscCall(PCSetType(pc_temp, PCNONE)); 5331 } 5332 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5333 /* set ksp_D into pcis data */ 5334 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5335 PetscCall(KSPDestroy(&pcis->ksp_D)); 5336 pcis->ksp_D = pcbddc->ksp_D; 5337 } 5338 5339 /* NEUMANN PROBLEM */ 5340 A_RR = NULL; 5341 if (neumann) { 5342 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5343 PetscInt ibs, mbs; 5344 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5345 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5346 5347 reuse_neumann_solver = PETSC_FALSE; 5348 if (sub_schurs && sub_schurs->reuse_solver) { 5349 IS iP; 5350 5351 reuse_neumann_solver = PETSC_TRUE; 5352 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5353 if (iP) reuse_neumann_solver = PETSC_FALSE; 5354 } 5355 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5356 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5357 if (pcbddc->ksp_R) { /* already created ksp */ 5358 PetscInt nn_R; 5359 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5360 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5361 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5362 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5363 PetscCall(KSPReset(pcbddc->ksp_R)); 5364 PetscCall(MatDestroy(&A_RR)); 5365 reuse = MAT_INITIAL_MATRIX; 5366 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5367 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5368 PetscCall(MatDestroy(&A_RR)); 5369 reuse = MAT_INITIAL_MATRIX; 5370 } else { /* safe to reuse the matrix */ 5371 reuse = MAT_REUSE_MATRIX; 5372 } 5373 } 5374 /* last check */ 5375 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5376 PetscCall(MatDestroy(&A_RR)); 5377 reuse = MAT_INITIAL_MATRIX; 5378 } 5379 } else { /* first time, so we need to create the matrix */ 5380 reuse = MAT_INITIAL_MATRIX; 5381 } 5382 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5383 TODO: Get Rid of these conversions */ 5384 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5385 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5386 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5387 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5388 if (matis->A == pcbddc->local_mat) { 5389 PetscCall(MatDestroy(&pcbddc->local_mat)); 5390 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5391 } else { 5392 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5393 } 5394 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5395 if (matis->A == pcbddc->local_mat) { 5396 PetscCall(MatDestroy(&pcbddc->local_mat)); 5397 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5398 } else { 5399 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5400 } 5401 } 5402 /* extract A_RR */ 5403 if (reuse_neumann_solver) { 5404 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5405 5406 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5407 PetscCall(MatDestroy(&A_RR)); 5408 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5409 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 5410 } else { 5411 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 5412 } 5413 } else { 5414 PetscCall(MatDestroy(&A_RR)); 5415 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 5416 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5417 } 5418 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5419 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 5420 } 5421 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5422 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 5423 opts = PETSC_FALSE; 5424 if (!pcbddc->ksp_R) { /* create object if not present */ 5425 opts = PETSC_TRUE; 5426 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 5427 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 5428 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 5429 /* default */ 5430 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 5431 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 5432 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5433 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 5434 if (issbaij) { 5435 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5436 } else { 5437 PetscCall(PCSetType(pc_temp, PCLU)); 5438 } 5439 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 5440 } 5441 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 5442 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 5443 if (opts) { /* Allow user's customization once */ 5444 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5445 } 5446 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5447 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5448 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 5449 } 5450 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5451 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5452 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5453 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5454 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5455 const PetscInt *idxs; 5456 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5457 5458 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 5459 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 5460 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5461 for (i = 0; i < nl; i++) { 5462 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5463 } 5464 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 5465 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5466 PetscCall(PetscFree(scoords)); 5467 } 5468 5469 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5470 if (!n_R) { 5471 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5472 PetscCall(PCSetType(pc_temp, PCNONE)); 5473 } 5474 /* Reuse solver if it is present */ 5475 if (reuse_neumann_solver) { 5476 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5477 5478 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 5479 } 5480 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5481 } 5482 5483 if (pcbddc->dbg_flag) { 5484 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5485 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5486 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5487 } 5488 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5489 5490 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5491 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 5492 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 5493 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 5494 /* check Dirichlet and Neumann solvers */ 5495 if (pcbddc->dbg_flag) { 5496 if (dirichlet) { /* Dirichlet */ 5497 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 5498 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 5499 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 5500 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 5501 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 5502 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 5503 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_D))->prefix, (double)value)); 5504 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5505 } 5506 if (neumann) { /* Neumann */ 5507 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 5508 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 5509 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 5510 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5511 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 5512 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 5513 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_R))->prefix, (double)value)); 5514 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5515 } 5516 } 5517 /* free Neumann problem's matrix */ 5518 PetscCall(MatDestroy(&A_RR)); 5519 PetscFunctionReturn(PETSC_SUCCESS); 5520 } 5521 5522 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5523 { 5524 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5525 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5526 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5527 5528 PetscFunctionBegin; 5529 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 5530 if (!pcbddc->switch_static) { 5531 if (applytranspose && pcbddc->local_auxmat1) { 5532 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 5533 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5534 } 5535 if (!reuse_solver) { 5536 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5537 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5538 } else { 5539 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5540 5541 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5542 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5543 } 5544 } else { 5545 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5546 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5547 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5548 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5549 if (applytranspose && pcbddc->local_auxmat1) { 5550 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 5551 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5552 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5553 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5554 } 5555 } 5556 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5557 if (!reuse_solver || pcbddc->switch_static) { 5558 if (applytranspose) { 5559 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5560 } else { 5561 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5562 } 5563 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 5564 } else { 5565 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5566 5567 if (applytranspose) { 5568 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5569 } else { 5570 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5571 } 5572 } 5573 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5574 PetscCall(VecSet(inout_B, 0.)); 5575 if (!pcbddc->switch_static) { 5576 if (!reuse_solver) { 5577 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5578 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5579 } else { 5580 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5581 5582 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5583 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5584 } 5585 if (!applytranspose && pcbddc->local_auxmat1) { 5586 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5587 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 5588 } 5589 } else { 5590 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5591 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5592 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5593 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5594 if (!applytranspose && pcbddc->local_auxmat1) { 5595 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5596 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 5597 } 5598 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5599 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5600 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5601 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5602 } 5603 PetscFunctionReturn(PETSC_SUCCESS); 5604 } 5605 5606 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5607 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5608 { 5609 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5610 PC_IS *pcis = (PC_IS *)(pc->data); 5611 const PetscScalar zero = 0.0; 5612 5613 PetscFunctionBegin; 5614 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5615 if (!pcbddc->benign_apply_coarse_only) { 5616 if (applytranspose) { 5617 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 5618 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5619 } else { 5620 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 5621 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5622 } 5623 } else { 5624 PetscCall(VecSet(pcbddc->vec1_P, zero)); 5625 } 5626 5627 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5628 if (pcbddc->benign_n) { 5629 PetscScalar *array; 5630 PetscInt j; 5631 5632 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5633 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 5634 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5635 } 5636 5637 /* start communications from local primal nodes to rhs of coarse solver */ 5638 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 5639 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 5640 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 5641 5642 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5643 if (pcbddc->coarse_ksp) { 5644 Mat coarse_mat; 5645 Vec rhs, sol; 5646 MatNullSpace nullsp; 5647 PetscBool isbddc = PETSC_FALSE; 5648 5649 if (pcbddc->benign_have_null) { 5650 PC coarse_pc; 5651 5652 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5653 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 5654 /* we need to propagate to coarser levels the need for a possible benign correction */ 5655 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5656 PC_BDDC *coarsepcbddc = (PC_BDDC *)(coarse_pc->data); 5657 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5658 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5659 } 5660 } 5661 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 5662 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 5663 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 5664 if (applytranspose) { 5665 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 5666 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5667 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 5668 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5669 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5670 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 5671 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5672 } else { 5673 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 5674 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5675 PC coarse_pc; 5676 5677 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 5678 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5679 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 5680 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 5681 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 5682 } else { 5683 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5684 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 5685 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5686 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5687 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5688 } 5689 } 5690 /* we don't need the benign correction at coarser levels anymore */ 5691 if (pcbddc->benign_have_null && isbddc) { 5692 PC coarse_pc; 5693 PC_BDDC *coarsepcbddc; 5694 5695 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5696 coarsepcbddc = (PC_BDDC *)(coarse_pc->data); 5697 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5698 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5699 } 5700 } 5701 5702 /* Local solution on R nodes */ 5703 if (pcis->n && !pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 5704 /* communications from coarse sol to local primal nodes */ 5705 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 5706 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 5707 5708 /* Sum contributions from the two levels */ 5709 if (!pcbddc->benign_apply_coarse_only) { 5710 if (applytranspose) { 5711 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5712 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5713 } else { 5714 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5715 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5716 } 5717 /* store p0 */ 5718 if (pcbddc->benign_n) { 5719 PetscScalar *array; 5720 PetscInt j; 5721 5722 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5723 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 5724 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5725 } 5726 } else { /* expand the coarse solution */ 5727 if (applytranspose) { 5728 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 5729 } else { 5730 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 5731 } 5732 } 5733 PetscFunctionReturn(PETSC_SUCCESS); 5734 } 5735 5736 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 5737 { 5738 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5739 Vec from, to; 5740 const PetscScalar *array; 5741 5742 PetscFunctionBegin; 5743 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5744 from = pcbddc->coarse_vec; 5745 to = pcbddc->vec1_P; 5746 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5747 Vec tvec; 5748 5749 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5750 PetscCall(VecResetArray(tvec)); 5751 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 5752 PetscCall(VecGetArrayRead(tvec, &array)); 5753 PetscCall(VecPlaceArray(from, array)); 5754 PetscCall(VecRestoreArrayRead(tvec, &array)); 5755 } 5756 } else { /* from local to global -> put data in coarse right hand side */ 5757 from = pcbddc->vec1_P; 5758 to = pcbddc->coarse_vec; 5759 } 5760 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5761 PetscFunctionReturn(PETSC_SUCCESS); 5762 } 5763 5764 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5765 { 5766 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5767 Vec from, to; 5768 const PetscScalar *array; 5769 5770 PetscFunctionBegin; 5771 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5772 from = pcbddc->coarse_vec; 5773 to = pcbddc->vec1_P; 5774 } else { /* from local to global -> put data in coarse right hand side */ 5775 from = pcbddc->vec1_P; 5776 to = pcbddc->coarse_vec; 5777 } 5778 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5779 if (smode == SCATTER_FORWARD) { 5780 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5781 Vec tvec; 5782 5783 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5784 PetscCall(VecGetArrayRead(to, &array)); 5785 PetscCall(VecPlaceArray(tvec, array)); 5786 PetscCall(VecRestoreArrayRead(to, &array)); 5787 } 5788 } else { 5789 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5790 PetscCall(VecResetArray(from)); 5791 } 5792 } 5793 PetscFunctionReturn(PETSC_SUCCESS); 5794 } 5795 5796 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5797 { 5798 PC_IS *pcis = (PC_IS *)(pc->data); 5799 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5800 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5801 /* one and zero */ 5802 PetscScalar one = 1.0, zero = 0.0; 5803 /* space to store constraints and their local indices */ 5804 PetscScalar *constraints_data; 5805 PetscInt *constraints_idxs, *constraints_idxs_B; 5806 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 5807 PetscInt *constraints_n; 5808 /* iterators */ 5809 PetscInt i, j, k, total_counts, total_counts_cc, cum; 5810 /* BLAS integers */ 5811 PetscBLASInt lwork, lierr; 5812 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 5813 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 5814 /* reuse */ 5815 PetscInt olocal_primal_size, olocal_primal_size_cc; 5816 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 5817 /* change of basis */ 5818 PetscBool qr_needed; 5819 PetscBT change_basis, qr_needed_idx; 5820 /* auxiliary stuff */ 5821 PetscInt *nnz, *is_indices; 5822 PetscInt ncc; 5823 /* some quantities */ 5824 PetscInt n_vertices, total_primal_vertices, valid_constraints; 5825 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 5826 PetscReal tol; /* tolerance for retaining eigenmodes */ 5827 5828 PetscFunctionBegin; 5829 tol = PetscSqrtReal(PETSC_SMALL); 5830 /* Destroy Mat objects computed previously */ 5831 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 5832 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 5833 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 5834 /* save info on constraints from previous setup (if any) */ 5835 olocal_primal_size = pcbddc->local_primal_size; 5836 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5837 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 5838 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 5839 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 5840 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 5841 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 5842 5843 if (!pcbddc->adaptive_selection) { 5844 IS ISForVertices, *ISForFaces, *ISForEdges; 5845 MatNullSpace nearnullsp; 5846 const Vec *nearnullvecs; 5847 Vec *localnearnullsp; 5848 PetscScalar *array; 5849 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 5850 PetscBool nnsp_has_cnst; 5851 /* LAPACK working arrays for SVD or POD */ 5852 PetscBool skip_lapack, boolforchange; 5853 PetscScalar *work; 5854 PetscReal *singular_vals; 5855 #if defined(PETSC_USE_COMPLEX) 5856 PetscReal *rwork; 5857 #endif 5858 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 5859 PetscBLASInt dummy_int = 1; 5860 PetscScalar dummy_scalar = 1.; 5861 PetscBool use_pod = PETSC_FALSE; 5862 5863 /* MKL SVD with same input gives different results on different processes! */ 5864 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 5865 use_pod = PETSC_TRUE; 5866 #endif 5867 /* Get index sets for faces, edges and vertices from graph */ 5868 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 5869 o_nf = n_ISForFaces; 5870 o_ne = n_ISForEdges; 5871 n_vertices = 0; 5872 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 5873 /* print some info */ 5874 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5875 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 5876 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 5877 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5878 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 5879 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 5880 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 5881 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 5882 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5883 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 5884 } 5885 5886 if (!pcbddc->use_vertices) n_vertices = 0; 5887 if (!pcbddc->use_edges) n_ISForEdges = 0; 5888 if (!pcbddc->use_faces) n_ISForFaces = 0; 5889 5890 /* check if near null space is attached to global mat */ 5891 if (pcbddc->use_nnsp) { 5892 PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 5893 } else nearnullsp = NULL; 5894 5895 if (nearnullsp) { 5896 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 5897 /* remove any stored info */ 5898 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 5899 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 5900 /* store information for BDDC solver reuse */ 5901 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 5902 pcbddc->onearnullspace = nearnullsp; 5903 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 5904 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 5905 } else { /* if near null space is not provided BDDC uses constants by default */ 5906 nnsp_size = 0; 5907 nnsp_has_cnst = PETSC_TRUE; 5908 } 5909 /* get max number of constraints on a single cc */ 5910 max_constraints = nnsp_size; 5911 if (nnsp_has_cnst) max_constraints++; 5912 5913 /* 5914 Evaluate maximum storage size needed by the procedure 5915 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5916 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5917 There can be multiple constraints per connected component 5918 */ 5919 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 5920 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 5921 5922 total_counts = n_ISForFaces + n_ISForEdges; 5923 total_counts *= max_constraints; 5924 total_counts += n_vertices; 5925 PetscCall(PetscBTCreate(total_counts, &change_basis)); 5926 5927 total_counts = 0; 5928 max_size_of_constraint = 0; 5929 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 5930 IS used_is; 5931 if (i < n_ISForEdges) { 5932 used_is = ISForEdges[i]; 5933 } else { 5934 used_is = ISForFaces[i - n_ISForEdges]; 5935 } 5936 PetscCall(ISGetSize(used_is, &j)); 5937 total_counts += j; 5938 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 5939 } 5940 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 5941 5942 /* get local part of global near null space vectors */ 5943 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 5944 for (k = 0; k < nnsp_size; k++) { 5945 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 5946 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 5947 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 5948 } 5949 5950 /* whether or not to skip lapack calls */ 5951 skip_lapack = PETSC_TRUE; 5952 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5953 5954 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5955 if (!skip_lapack) { 5956 PetscScalar temp_work; 5957 5958 if (use_pod) { 5959 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5960 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 5961 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 5962 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 5963 #if defined(PETSC_USE_COMPLEX) 5964 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 5965 #endif 5966 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5967 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 5968 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 5969 lwork = -1; 5970 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 5971 #if !defined(PETSC_USE_COMPLEX) 5972 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 5973 #else 5974 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 5975 #endif 5976 PetscCall(PetscFPTrapPop()); 5977 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr); 5978 } else { 5979 #if !defined(PETSC_MISSING_LAPACK_GESVD) 5980 /* SVD */ 5981 PetscInt max_n, min_n; 5982 max_n = max_size_of_constraint; 5983 min_n = max_constraints; 5984 if (max_size_of_constraint < max_constraints) { 5985 min_n = max_size_of_constraint; 5986 max_n = max_constraints; 5987 } 5988 PetscCall(PetscMalloc1(min_n, &singular_vals)); 5989 #if defined(PETSC_USE_COMPLEX) 5990 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 5991 #endif 5992 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5993 lwork = -1; 5994 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 5995 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 5996 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 5997 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 5998 #if !defined(PETSC_USE_COMPLEX) 5999 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6000 #else 6001 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6002 #endif 6003 PetscCall(PetscFPTrapPop()); 6004 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr); 6005 #else 6006 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6007 #endif /* on missing GESVD */ 6008 } 6009 /* Allocate optimal workspace */ 6010 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6011 PetscCall(PetscMalloc1(lwork, &work)); 6012 } 6013 /* Now we can loop on constraining sets */ 6014 total_counts = 0; 6015 constraints_idxs_ptr[0] = 0; 6016 constraints_data_ptr[0] = 0; 6017 /* vertices */ 6018 if (n_vertices) { 6019 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6020 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6021 for (i = 0; i < n_vertices; i++) { 6022 constraints_n[total_counts] = 1; 6023 constraints_data[total_counts] = 1.0; 6024 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6025 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6026 total_counts++; 6027 } 6028 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6029 } 6030 6031 /* edges and faces */ 6032 total_counts_cc = total_counts; 6033 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6034 IS used_is; 6035 PetscBool idxs_copied = PETSC_FALSE; 6036 6037 if (ncc < n_ISForEdges) { 6038 used_is = ISForEdges[ncc]; 6039 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6040 } else { 6041 used_is = ISForFaces[ncc - n_ISForEdges]; 6042 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6043 } 6044 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6045 6046 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6047 if (!size_of_constraint) continue; 6048 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6049 /* change of basis should not be performed on local periodic nodes */ 6050 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6051 if (nnsp_has_cnst) { 6052 PetscScalar quad_value; 6053 6054 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6055 idxs_copied = PETSC_TRUE; 6056 6057 if (!pcbddc->use_nnsp_true) { 6058 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6059 } else { 6060 quad_value = 1.0; 6061 } 6062 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6063 temp_constraints++; 6064 total_counts++; 6065 } 6066 for (k = 0; k < nnsp_size; k++) { 6067 PetscReal real_value; 6068 PetscScalar *ptr_to_data; 6069 6070 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6071 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6072 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6073 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6074 /* check if array is null on the connected component */ 6075 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6076 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6077 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6078 temp_constraints++; 6079 total_counts++; 6080 if (!idxs_copied) { 6081 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6082 idxs_copied = PETSC_TRUE; 6083 } 6084 } 6085 } 6086 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6087 valid_constraints = temp_constraints; 6088 if (!pcbddc->use_nnsp_true && temp_constraints) { 6089 if (temp_constraints == 1) { /* just normalize the constraint */ 6090 PetscScalar norm, *ptr_to_data; 6091 6092 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6093 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6094 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6095 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6096 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6097 } else { /* perform SVD */ 6098 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6099 6100 if (use_pod) { 6101 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6102 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6103 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6104 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6105 from that computed using LAPACKgesvd 6106 -> This is due to a different computation of eigenvectors in LAPACKheev 6107 -> The quality of the POD-computed basis will be the same */ 6108 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6109 /* Store upper triangular part of correlation matrix */ 6110 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6111 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6112 for (j = 0; j < temp_constraints; j++) { 6113 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6114 } 6115 /* compute eigenvalues and eigenvectors of correlation matrix */ 6116 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6117 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6118 #if !defined(PETSC_USE_COMPLEX) 6119 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6120 #else 6121 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6122 #endif 6123 PetscCall(PetscFPTrapPop()); 6124 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr); 6125 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6126 j = 0; 6127 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6128 total_counts = total_counts - j; 6129 valid_constraints = temp_constraints - j; 6130 /* scale and copy POD basis into used quadrature memory */ 6131 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6132 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6133 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6134 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6135 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6136 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6137 if (j < temp_constraints) { 6138 PetscInt ii; 6139 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6140 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6141 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6142 PetscCall(PetscFPTrapPop()); 6143 for (k = 0; k < temp_constraints - j; k++) { 6144 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6145 } 6146 } 6147 } else { 6148 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6149 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6150 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6151 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6152 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6153 #if !defined(PETSC_USE_COMPLEX) 6154 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6155 #else 6156 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6157 #endif 6158 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 6159 PetscCall(PetscFPTrapPop()); 6160 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6161 k = temp_constraints; 6162 if (k > size_of_constraint) k = size_of_constraint; 6163 j = 0; 6164 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6165 valid_constraints = k - j; 6166 total_counts = total_counts - temp_constraints + valid_constraints; 6167 #else 6168 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6169 #endif /* on missing GESVD */ 6170 } 6171 } 6172 } 6173 /* update pointers information */ 6174 if (valid_constraints) { 6175 constraints_n[total_counts_cc] = valid_constraints; 6176 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6177 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6178 /* set change_of_basis flag */ 6179 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6180 total_counts_cc++; 6181 } 6182 } 6183 /* free workspace */ 6184 if (!skip_lapack) { 6185 PetscCall(PetscFree(work)); 6186 #if defined(PETSC_USE_COMPLEX) 6187 PetscCall(PetscFree(rwork)); 6188 #endif 6189 PetscCall(PetscFree(singular_vals)); 6190 PetscCall(PetscFree(correlation_mat)); 6191 PetscCall(PetscFree(temp_basis)); 6192 } 6193 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6194 PetscCall(PetscFree(localnearnullsp)); 6195 /* free index sets of faces, edges and vertices */ 6196 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6197 } else { 6198 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6199 6200 total_counts = 0; 6201 n_vertices = 0; 6202 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6203 max_constraints = 0; 6204 total_counts_cc = 0; 6205 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6206 total_counts += pcbddc->adaptive_constraints_n[i]; 6207 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6208 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6209 } 6210 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6211 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6212 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6213 constraints_data = pcbddc->adaptive_constraints_data; 6214 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6215 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6216 total_counts_cc = 0; 6217 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6218 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6219 } 6220 6221 max_size_of_constraint = 0; 6222 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6223 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6224 /* Change of basis */ 6225 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6226 if (pcbddc->use_change_of_basis) { 6227 for (i = 0; i < sub_schurs->n_subs; i++) { 6228 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6229 } 6230 } 6231 } 6232 pcbddc->local_primal_size = total_counts; 6233 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6234 6235 /* map constraints_idxs in boundary numbering */ 6236 if (pcbddc->use_change_of_basis) { 6237 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6238 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6239 } 6240 6241 /* Create constraint matrix */ 6242 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6243 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6244 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6245 6246 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6247 /* determine if a QR strategy is needed for change of basis */ 6248 qr_needed = pcbddc->use_qr_single; 6249 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6250 total_primal_vertices = 0; 6251 pcbddc->local_primal_size_cc = 0; 6252 for (i = 0; i < total_counts_cc; i++) { 6253 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6254 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6255 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6256 pcbddc->local_primal_size_cc += 1; 6257 } else if (PetscBTLookup(change_basis, i)) { 6258 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6259 pcbddc->local_primal_size_cc += constraints_n[i]; 6260 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6261 PetscCall(PetscBTSet(qr_needed_idx, i)); 6262 qr_needed = PETSC_TRUE; 6263 } 6264 } else { 6265 pcbddc->local_primal_size_cc += 1; 6266 } 6267 } 6268 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6269 pcbddc->n_vertices = total_primal_vertices; 6270 /* permute indices in order to have a sorted set of vertices */ 6271 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6272 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6273 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6274 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6275 6276 /* nonzero structure of constraint matrix */ 6277 /* and get reference dof for local constraints */ 6278 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6279 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6280 6281 j = total_primal_vertices; 6282 total_counts = total_primal_vertices; 6283 cum = total_primal_vertices; 6284 for (i = n_vertices; i < total_counts_cc; i++) { 6285 if (!PetscBTLookup(change_basis, i)) { 6286 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6287 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6288 cum++; 6289 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6290 for (k = 0; k < constraints_n[i]; k++) { 6291 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6292 nnz[j + k] = size_of_constraint; 6293 } 6294 j += constraints_n[i]; 6295 } 6296 } 6297 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6298 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6299 PetscCall(PetscFree(nnz)); 6300 6301 /* set values in constraint matrix */ 6302 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6303 total_counts = total_primal_vertices; 6304 for (i = n_vertices; i < total_counts_cc; i++) { 6305 if (!PetscBTLookup(change_basis, i)) { 6306 PetscInt *cols; 6307 6308 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6309 cols = constraints_idxs + constraints_idxs_ptr[i]; 6310 for (k = 0; k < constraints_n[i]; k++) { 6311 PetscInt row = total_counts + k; 6312 PetscScalar *vals; 6313 6314 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6315 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6316 } 6317 total_counts += constraints_n[i]; 6318 } 6319 } 6320 /* assembling */ 6321 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6322 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6323 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6324 6325 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6326 if (pcbddc->use_change_of_basis) { 6327 /* dual and primal dofs on a single cc */ 6328 PetscInt dual_dofs, primal_dofs; 6329 /* working stuff for GEQRF */ 6330 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6331 PetscBLASInt lqr_work; 6332 /* working stuff for UNGQR */ 6333 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6334 PetscBLASInt lgqr_work; 6335 /* working stuff for TRTRS */ 6336 PetscScalar *trs_rhs = NULL; 6337 PetscBLASInt Blas_NRHS; 6338 /* pointers for values insertion into change of basis matrix */ 6339 PetscInt *start_rows, *start_cols; 6340 PetscScalar *start_vals; 6341 /* working stuff for values insertion */ 6342 PetscBT is_primal; 6343 PetscInt *aux_primal_numbering_B; 6344 /* matrix sizes */ 6345 PetscInt global_size, local_size; 6346 /* temporary change of basis */ 6347 Mat localChangeOfBasisMatrix; 6348 /* extra space for debugging */ 6349 PetscScalar *dbg_work = NULL; 6350 6351 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6352 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6353 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6354 /* nonzeros for local mat */ 6355 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6356 if (!pcbddc->benign_change || pcbddc->fake_change) { 6357 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6358 } else { 6359 const PetscInt *ii; 6360 PetscInt n; 6361 PetscBool flg_row; 6362 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6363 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6364 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6365 } 6366 for (i = n_vertices; i < total_counts_cc; i++) { 6367 if (PetscBTLookup(change_basis, i)) { 6368 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6369 if (PetscBTLookup(qr_needed_idx, i)) { 6370 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6371 } else { 6372 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6373 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6374 } 6375 } 6376 } 6377 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6378 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6379 PetscCall(PetscFree(nnz)); 6380 /* Set interior change in the matrix */ 6381 if (!pcbddc->benign_change || pcbddc->fake_change) { 6382 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 6383 } else { 6384 const PetscInt *ii, *jj; 6385 PetscScalar *aa; 6386 PetscInt n; 6387 PetscBool flg_row; 6388 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6389 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6390 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 6391 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6392 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6393 } 6394 6395 if (pcbddc->dbg_flag) { 6396 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6397 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 6398 } 6399 6400 /* Now we loop on the constraints which need a change of basis */ 6401 /* 6402 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6403 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6404 6405 Basic blocks of change of basis matrix T computed: 6406 6407 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6408 6409 | 1 0 ... 0 s_1/S | 6410 | 0 1 ... 0 s_2/S | 6411 | ... | 6412 | 0 ... 1 s_{n-1}/S | 6413 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6414 6415 with S = \sum_{i=1}^n s_i^2 6416 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6417 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6418 6419 - QR decomposition of constraints otherwise 6420 */ 6421 if (qr_needed && max_size_of_constraint) { 6422 /* space to store Q */ 6423 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 6424 /* array to store scaling factors for reflectors */ 6425 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 6426 /* first we issue queries for optimal work */ 6427 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6428 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6429 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6430 lqr_work = -1; 6431 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 6432 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr); 6433 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 6434 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 6435 lgqr_work = -1; 6436 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6437 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 6438 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 6439 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6440 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 6441 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 6442 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr); 6443 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 6444 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 6445 /* array to store rhs and solution of triangular solver */ 6446 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 6447 /* allocating workspace for check */ 6448 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 6449 } 6450 /* array to store whether a node is primal or not */ 6451 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 6452 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 6453 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 6454 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 6455 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 6456 PetscCall(PetscFree(aux_primal_numbering_B)); 6457 6458 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6459 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 6460 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 6461 if (PetscBTLookup(change_basis, total_counts)) { 6462 /* get constraint info */ 6463 primal_dofs = constraints_n[total_counts]; 6464 dual_dofs = size_of_constraint - primal_dofs; 6465 6466 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 6467 6468 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 6469 6470 /* copy quadrature constraints for change of basis check */ 6471 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6472 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6473 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6474 6475 /* compute QR decomposition of constraints */ 6476 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6477 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6478 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6479 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6480 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 6481 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr); 6482 PetscCall(PetscFPTrapPop()); 6483 6484 /* explicitly compute R^-T */ 6485 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 6486 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 6487 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6488 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 6489 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6490 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6491 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6492 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 6493 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr); 6494 PetscCall(PetscFPTrapPop()); 6495 6496 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6497 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6498 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6499 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6500 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6501 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6502 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 6503 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr); 6504 PetscCall(PetscFPTrapPop()); 6505 6506 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6507 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6508 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6509 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6510 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6511 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6512 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6513 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6514 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6515 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6516 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 6517 PetscCall(PetscFPTrapPop()); 6518 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6519 6520 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6521 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6522 /* insert cols for primal dofs */ 6523 for (j = 0; j < primal_dofs; j++) { 6524 start_vals = &qr_basis[j * size_of_constraint]; 6525 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6526 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6527 } 6528 /* insert cols for dual dofs */ 6529 for (j = 0, k = 0; j < dual_dofs; k++) { 6530 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 6531 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 6532 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6533 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6534 j++; 6535 } 6536 } 6537 6538 /* check change of basis */ 6539 if (pcbddc->dbg_flag) { 6540 PetscInt ii, jj; 6541 PetscBool valid_qr = PETSC_TRUE; 6542 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 6543 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6544 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 6545 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6546 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 6547 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 6548 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6549 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 6550 PetscCall(PetscFPTrapPop()); 6551 for (jj = 0; jj < size_of_constraint; jj++) { 6552 for (ii = 0; ii < primal_dofs; ii++) { 6553 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6554 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6555 } 6556 } 6557 if (!valid_qr) { 6558 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 6559 for (jj = 0; jj < size_of_constraint; jj++) { 6560 for (ii = 0; ii < primal_dofs; ii++) { 6561 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 6562 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6563 } 6564 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 6565 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6566 } 6567 } 6568 } 6569 } else { 6570 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 6571 } 6572 } 6573 } else { /* simple transformation block */ 6574 PetscInt row, col; 6575 PetscScalar val, norm; 6576 6577 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6578 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 6579 for (j = 0; j < size_of_constraint; j++) { 6580 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 6581 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6582 if (!PetscBTLookup(is_primal, row_B)) { 6583 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6584 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 6585 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 6586 } else { 6587 for (k = 0; k < size_of_constraint; k++) { 6588 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6589 if (row != col) { 6590 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 6591 } else { 6592 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 6593 } 6594 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 6595 } 6596 } 6597 } 6598 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 6599 } 6600 } else { 6601 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 6602 } 6603 } 6604 6605 /* free workspace */ 6606 if (qr_needed) { 6607 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 6608 PetscCall(PetscFree(trs_rhs)); 6609 PetscCall(PetscFree(qr_tau)); 6610 PetscCall(PetscFree(qr_work)); 6611 PetscCall(PetscFree(gqr_work)); 6612 PetscCall(PetscFree(qr_basis)); 6613 } 6614 PetscCall(PetscBTDestroy(&is_primal)); 6615 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6616 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6617 6618 /* assembling of global change of variable */ 6619 if (!pcbddc->fake_change) { 6620 Mat tmat; 6621 PetscInt bs; 6622 6623 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 6624 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 6625 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 6626 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 6627 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 6628 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 6629 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 6630 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 6631 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 6632 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 6633 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 6634 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 6635 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 6636 PetscCall(MatDestroy(&tmat)); 6637 PetscCall(VecSet(pcis->vec1_global, 0.0)); 6638 PetscCall(VecSet(pcis->vec1_N, 1.0)); 6639 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6640 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6641 PetscCall(VecReciprocal(pcis->vec1_global)); 6642 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 6643 6644 /* check */ 6645 if (pcbddc->dbg_flag) { 6646 PetscReal error; 6647 Vec x, x_change; 6648 6649 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 6650 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 6651 PetscCall(VecSetRandom(x, NULL)); 6652 PetscCall(VecCopy(x, pcis->vec1_global)); 6653 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6654 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6655 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 6656 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6657 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6658 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 6659 PetscCall(VecAXPY(x, -1.0, x_change)); 6660 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 6661 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 6662 PetscCall(VecDestroy(&x)); 6663 PetscCall(VecDestroy(&x_change)); 6664 } 6665 /* adapt sub_schurs computed (if any) */ 6666 if (pcbddc->use_deluxe_scaling) { 6667 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6668 6669 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6670 if (sub_schurs && sub_schurs->S_Ej_all) { 6671 Mat S_new, tmat; 6672 IS is_all_N, is_V_Sall = NULL; 6673 6674 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 6675 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 6676 if (pcbddc->deluxe_zerorows) { 6677 ISLocalToGlobalMapping NtoSall; 6678 IS is_V; 6679 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 6680 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 6681 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 6682 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6683 PetscCall(ISDestroy(&is_V)); 6684 } 6685 PetscCall(ISDestroy(&is_all_N)); 6686 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6687 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6688 PetscCall(PetscObjectReference((PetscObject)S_new)); 6689 if (pcbddc->deluxe_zerorows) { 6690 const PetscScalar *array; 6691 const PetscInt *idxs_V, *idxs_all; 6692 PetscInt i, n_V; 6693 6694 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6695 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 6696 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 6697 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 6698 PetscCall(VecGetArrayRead(pcis->D, &array)); 6699 for (i = 0; i < n_V; i++) { 6700 PetscScalar val; 6701 PetscInt idx; 6702 6703 idx = idxs_V[i]; 6704 val = array[idxs_all[idxs_V[i]]]; 6705 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 6706 } 6707 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 6708 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 6709 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 6710 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 6711 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 6712 } 6713 sub_schurs->S_Ej_all = S_new; 6714 PetscCall(MatDestroy(&S_new)); 6715 if (sub_schurs->sum_S_Ej_all) { 6716 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6717 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 6718 PetscCall(PetscObjectReference((PetscObject)S_new)); 6719 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6720 sub_schurs->sum_S_Ej_all = S_new; 6721 PetscCall(MatDestroy(&S_new)); 6722 } 6723 PetscCall(ISDestroy(&is_V_Sall)); 6724 PetscCall(MatDestroy(&tmat)); 6725 } 6726 /* destroy any change of basis context in sub_schurs */ 6727 if (sub_schurs && sub_schurs->change) { 6728 PetscInt i; 6729 6730 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 6731 PetscCall(PetscFree(sub_schurs->change)); 6732 } 6733 } 6734 if (pcbddc->switch_static) { /* need to save the local change */ 6735 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6736 } else { 6737 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 6738 } 6739 /* determine if any process has changed the pressures locally */ 6740 pcbddc->change_interior = pcbddc->benign_have_null; 6741 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6742 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6743 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6744 pcbddc->use_qr_single = qr_needed; 6745 } 6746 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6747 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6748 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 6749 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6750 } else { 6751 Mat benign_global = NULL; 6752 if (pcbddc->benign_have_null) { 6753 Mat M; 6754 6755 pcbddc->change_interior = PETSC_TRUE; 6756 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 6757 PetscCall(VecReciprocal(pcis->vec1_N)); 6758 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 6759 if (pcbddc->benign_change) { 6760 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 6761 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 6762 } else { 6763 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 6764 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 6765 } 6766 PetscCall(MatISSetLocalMat(benign_global, M)); 6767 PetscCall(MatDestroy(&M)); 6768 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 6769 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 6770 } 6771 if (pcbddc->user_ChangeOfBasisMatrix) { 6772 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix)); 6773 PetscCall(MatDestroy(&benign_global)); 6774 } else if (pcbddc->benign_have_null) { 6775 pcbddc->ChangeOfBasisMatrix = benign_global; 6776 } 6777 } 6778 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6779 IS is_global; 6780 const PetscInt *gidxs; 6781 6782 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 6783 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 6784 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 6785 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 6786 PetscCall(ISDestroy(&is_global)); 6787 } 6788 } 6789 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 6790 6791 if (!pcbddc->fake_change) { 6792 /* add pressure dofs to set of primal nodes for numbering purposes */ 6793 for (i = 0; i < pcbddc->benign_n; i++) { 6794 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6795 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6796 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6797 pcbddc->local_primal_size_cc++; 6798 pcbddc->local_primal_size++; 6799 } 6800 6801 /* check if a new primal space has been introduced (also take into account benign trick) */ 6802 pcbddc->new_primal_space_local = PETSC_TRUE; 6803 if (olocal_primal_size == pcbddc->local_primal_size) { 6804 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6805 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6806 if (!pcbddc->new_primal_space_local) { 6807 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6808 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6809 } 6810 } 6811 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6812 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 6813 } 6814 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 6815 6816 /* flush dbg viewer */ 6817 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6818 6819 /* free workspace */ 6820 PetscCall(PetscBTDestroy(&qr_needed_idx)); 6821 PetscCall(PetscBTDestroy(&change_basis)); 6822 if (!pcbddc->adaptive_selection) { 6823 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 6824 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 6825 } else { 6826 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 6827 PetscCall(PetscFree(constraints_n)); 6828 PetscCall(PetscFree(constraints_idxs_B)); 6829 } 6830 PetscFunctionReturn(PETSC_SUCCESS); 6831 } 6832 6833 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6834 { 6835 ISLocalToGlobalMapping map; 6836 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6837 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6838 PetscInt i, N; 6839 PetscBool rcsr = PETSC_FALSE; 6840 6841 PetscFunctionBegin; 6842 if (pcbddc->recompute_topography) { 6843 pcbddc->graphanalyzed = PETSC_FALSE; 6844 /* Reset previously computed graph */ 6845 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 6846 /* Init local Graph struct */ 6847 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 6848 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 6849 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 6850 6851 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 6852 /* Check validity of the csr graph passed in by the user */ 6853 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 6854 pcbddc->mat_graph->nvtxs); 6855 6856 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6857 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6858 PetscInt *xadj, *adjncy; 6859 PetscInt nvtxs; 6860 PetscBool flg_row = PETSC_FALSE; 6861 6862 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6863 if (flg_row) { 6864 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 6865 pcbddc->computed_rowadj = PETSC_TRUE; 6866 } 6867 PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6868 rcsr = PETSC_TRUE; 6869 } 6870 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6871 6872 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6873 PetscReal *lcoords; 6874 PetscInt n; 6875 MPI_Datatype dimrealtype; 6876 6877 /* TODO: support for blocked */ 6878 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 6879 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6880 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 6881 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype)); 6882 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 6883 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6884 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6885 PetscCallMPI(MPI_Type_free(&dimrealtype)); 6886 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 6887 6888 pcbddc->mat_graph->coords = lcoords; 6889 pcbddc->mat_graph->cloc = PETSC_TRUE; 6890 pcbddc->mat_graph->cnloc = n; 6891 } 6892 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 6893 pcbddc->mat_graph->nvtxs); 6894 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 6895 6896 /* Setup of Graph */ 6897 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6898 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 6899 6900 /* attach info on disconnected subdomains if present */ 6901 if (pcbddc->n_local_subs) { 6902 PetscInt *local_subs, n, totn; 6903 6904 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6905 PetscCall(PetscMalloc1(n, &local_subs)); 6906 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 6907 for (i = 0; i < pcbddc->n_local_subs; i++) { 6908 const PetscInt *idxs; 6909 PetscInt nl, j; 6910 6911 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 6912 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 6913 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 6914 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 6915 } 6916 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 6917 pcbddc->mat_graph->n_local_subs = totn + 1; 6918 pcbddc->mat_graph->local_subs = local_subs; 6919 } 6920 } 6921 6922 if (!pcbddc->graphanalyzed) { 6923 /* Graph's connected components analysis */ 6924 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 6925 pcbddc->graphanalyzed = PETSC_TRUE; 6926 pcbddc->corner_selected = pcbddc->corner_selection; 6927 } 6928 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6929 PetscFunctionReturn(PETSC_SUCCESS); 6930 } 6931 6932 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 6933 { 6934 PetscInt i, j, n; 6935 PetscScalar *alphas; 6936 PetscReal norm, *onorms; 6937 6938 PetscFunctionBegin; 6939 n = *nio; 6940 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 6941 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 6942 PetscCall(VecNormalize(vecs[0], &norm)); 6943 if (norm < PETSC_SMALL) { 6944 onorms[0] = 0.0; 6945 PetscCall(VecSet(vecs[0], 0.0)); 6946 } else { 6947 onorms[0] = norm; 6948 } 6949 6950 for (i = 1; i < n; i++) { 6951 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 6952 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 6953 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 6954 PetscCall(VecNormalize(vecs[i], &norm)); 6955 if (norm < PETSC_SMALL) { 6956 onorms[i] = 0.0; 6957 PetscCall(VecSet(vecs[i], 0.0)); 6958 } else { 6959 onorms[i] = norm; 6960 } 6961 } 6962 /* push nonzero vectors at the beginning */ 6963 for (i = 0; i < n; i++) { 6964 if (onorms[i] == 0.0) { 6965 for (j = i + 1; j < n; j++) { 6966 if (onorms[j] != 0.0) { 6967 PetscCall(VecCopy(vecs[j], vecs[i])); 6968 onorms[j] = 0.0; 6969 } 6970 } 6971 } 6972 } 6973 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 6974 PetscCall(PetscFree2(alphas, onorms)); 6975 PetscFunctionReturn(PETSC_SUCCESS); 6976 } 6977 6978 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 6979 { 6980 ISLocalToGlobalMapping mapping; 6981 Mat A; 6982 PetscInt n_neighs, *neighs, *n_shared, **shared; 6983 PetscMPIInt size, rank, color; 6984 PetscInt *xadj, *adjncy; 6985 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 6986 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 6987 PetscInt void_procs, *procs_candidates = NULL; 6988 PetscInt xadj_count, *count; 6989 PetscBool ismatis, use_vwgt = PETSC_FALSE; 6990 PetscSubcomm psubcomm; 6991 MPI_Comm subcomm; 6992 6993 PetscFunctionBegin; 6994 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 6995 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 6996 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 6997 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 6998 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 6999 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7000 7001 if (have_void) *have_void = PETSC_FALSE; 7002 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7003 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7004 PetscCall(MatISGetLocalMat(mat, &A)); 7005 PetscCall(MatGetLocalSize(A, &n, NULL)); 7006 im_active = !!n; 7007 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7008 void_procs = size - active_procs; 7009 /* get ranks of of non-active processes in mat communicator */ 7010 if (void_procs) { 7011 PetscInt ncand; 7012 7013 if (have_void) *have_void = PETSC_TRUE; 7014 PetscCall(PetscMalloc1(size, &procs_candidates)); 7015 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7016 for (i = 0, ncand = 0; i < size; i++) { 7017 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7018 } 7019 /* force n_subdomains to be not greater that the number of non-active processes */ 7020 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7021 } 7022 7023 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7024 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7025 PetscCall(MatGetSize(mat, &N, NULL)); 7026 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7027 PetscInt issize, isidx, dest; 7028 if (*n_subdomains == 1) dest = 0; 7029 else dest = rank; 7030 if (im_active) { 7031 issize = 1; 7032 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7033 isidx = procs_candidates[dest]; 7034 } else { 7035 isidx = dest; 7036 } 7037 } else { 7038 issize = 0; 7039 isidx = -1; 7040 } 7041 if (*n_subdomains != 1) *n_subdomains = active_procs; 7042 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7043 PetscCall(PetscFree(procs_candidates)); 7044 PetscFunctionReturn(PETSC_SUCCESS); 7045 } 7046 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL)); 7047 PetscCall(PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL)); 7048 threshold = PetscMax(threshold, 2); 7049 7050 /* Get info on mapping */ 7051 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7052 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7053 7054 /* build local CSR graph of subdomains' connectivity */ 7055 PetscCall(PetscMalloc1(2, &xadj)); 7056 xadj[0] = 0; 7057 xadj[1] = PetscMax(n_neighs - 1, 0); 7058 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7059 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7060 PetscCall(PetscCalloc1(n, &count)); 7061 for (i = 1; i < n_neighs; i++) 7062 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7063 7064 xadj_count = 0; 7065 for (i = 1; i < n_neighs; i++) { 7066 for (j = 0; j < n_shared[i]; j++) { 7067 if (count[shared[i][j]] < threshold) { 7068 adjncy[xadj_count] = neighs[i]; 7069 adjncy_wgt[xadj_count] = n_shared[i]; 7070 xadj_count++; 7071 break; 7072 } 7073 } 7074 } 7075 xadj[1] = xadj_count; 7076 PetscCall(PetscFree(count)); 7077 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7078 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7079 7080 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7081 7082 /* Restrict work on active processes only */ 7083 PetscCall(PetscMPIIntCast(im_active, &color)); 7084 if (void_procs) { 7085 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7086 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7087 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7088 subcomm = PetscSubcommChild(psubcomm); 7089 } else { 7090 psubcomm = NULL; 7091 subcomm = PetscObjectComm((PetscObject)mat); 7092 } 7093 7094 v_wgt = NULL; 7095 if (!color) { 7096 PetscCall(PetscFree(xadj)); 7097 PetscCall(PetscFree(adjncy)); 7098 PetscCall(PetscFree(adjncy_wgt)); 7099 } else { 7100 Mat subdomain_adj; 7101 IS new_ranks, new_ranks_contig; 7102 MatPartitioning partitioner; 7103 PetscInt rstart = 0, rend = 0; 7104 PetscInt *is_indices, *oldranks; 7105 PetscMPIInt size; 7106 PetscBool aggregate; 7107 7108 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7109 if (void_procs) { 7110 PetscInt prank = rank; 7111 PetscCall(PetscMalloc1(size, &oldranks)); 7112 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7113 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7114 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7115 } else { 7116 oldranks = NULL; 7117 } 7118 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7119 if (aggregate) { /* TODO: all this part could be made more efficient */ 7120 PetscInt lrows, row, ncols, *cols; 7121 PetscMPIInt nrank; 7122 PetscScalar *vals; 7123 7124 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7125 lrows = 0; 7126 if (nrank < redprocs) { 7127 lrows = size / redprocs; 7128 if (nrank < size % redprocs) lrows++; 7129 } 7130 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7131 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7132 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7133 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7134 row = nrank; 7135 ncols = xadj[1] - xadj[0]; 7136 cols = adjncy; 7137 PetscCall(PetscMalloc1(ncols, &vals)); 7138 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7139 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7140 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7141 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7142 PetscCall(PetscFree(xadj)); 7143 PetscCall(PetscFree(adjncy)); 7144 PetscCall(PetscFree(adjncy_wgt)); 7145 PetscCall(PetscFree(vals)); 7146 if (use_vwgt) { 7147 Vec v; 7148 const PetscScalar *array; 7149 PetscInt nl; 7150 7151 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7152 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7153 PetscCall(VecAssemblyBegin(v)); 7154 PetscCall(VecAssemblyEnd(v)); 7155 PetscCall(VecGetLocalSize(v, &nl)); 7156 PetscCall(VecGetArrayRead(v, &array)); 7157 PetscCall(PetscMalloc1(nl, &v_wgt)); 7158 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7159 PetscCall(VecRestoreArrayRead(v, &array)); 7160 PetscCall(VecDestroy(&v)); 7161 } 7162 } else { 7163 PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7164 if (use_vwgt) { 7165 PetscCall(PetscMalloc1(1, &v_wgt)); 7166 v_wgt[0] = n; 7167 } 7168 } 7169 /* PetscCall(MatView(subdomain_adj,0)); */ 7170 7171 /* Partition */ 7172 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7173 #if defined(PETSC_HAVE_PTSCOTCH) 7174 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7175 #elif defined(PETSC_HAVE_PARMETIS) 7176 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7177 #else 7178 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7179 #endif 7180 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7181 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7182 *n_subdomains = PetscMin((PetscInt)size, *n_subdomains); 7183 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7184 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7185 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7186 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7187 7188 /* renumber new_ranks to avoid "holes" in new set of processors */ 7189 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7190 PetscCall(ISDestroy(&new_ranks)); 7191 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7192 if (!aggregate) { 7193 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7194 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7195 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7196 } else if (oldranks) { 7197 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7198 } else { 7199 ranks_send_to_idx[0] = is_indices[0]; 7200 } 7201 } else { 7202 PetscInt idx = 0; 7203 PetscMPIInt tag; 7204 MPI_Request *reqs; 7205 7206 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7207 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7208 for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7209 PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7210 PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE)); 7211 PetscCall(PetscFree(reqs)); 7212 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7213 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7214 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7215 } else if (oldranks) { 7216 ranks_send_to_idx[0] = oldranks[idx]; 7217 } else { 7218 ranks_send_to_idx[0] = idx; 7219 } 7220 } 7221 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7222 /* clean up */ 7223 PetscCall(PetscFree(oldranks)); 7224 PetscCall(ISDestroy(&new_ranks_contig)); 7225 PetscCall(MatDestroy(&subdomain_adj)); 7226 PetscCall(MatPartitioningDestroy(&partitioner)); 7227 } 7228 PetscCall(PetscSubcommDestroy(&psubcomm)); 7229 PetscCall(PetscFree(procs_candidates)); 7230 7231 /* assemble parallel IS for sends */ 7232 i = 1; 7233 if (!color) i = 0; 7234 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7235 PetscFunctionReturn(PETSC_SUCCESS); 7236 } 7237 7238 typedef enum { 7239 MATDENSE_PRIVATE = 0, 7240 MATAIJ_PRIVATE, 7241 MATBAIJ_PRIVATE, 7242 MATSBAIJ_PRIVATE 7243 } MatTypePrivate; 7244 7245 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7246 { 7247 Mat local_mat; 7248 IS is_sends_internal; 7249 PetscInt rows, cols, new_local_rows; 7250 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7251 PetscBool ismatis, isdense, newisdense, destroy_mat; 7252 ISLocalToGlobalMapping l2gmap; 7253 PetscInt *l2gmap_indices; 7254 const PetscInt *is_indices; 7255 MatType new_local_type; 7256 /* buffers */ 7257 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7258 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7259 PetscInt *recv_buffer_idxs_local; 7260 PetscScalar *ptr_vals, *recv_buffer_vals; 7261 const PetscScalar *send_buffer_vals; 7262 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7263 /* MPI */ 7264 MPI_Comm comm, comm_n; 7265 PetscSubcomm subcomm; 7266 PetscMPIInt n_sends, n_recvs, size; 7267 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7268 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7269 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7270 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7271 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7272 7273 PetscFunctionBegin; 7274 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7275 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7276 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7277 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7278 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7279 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7280 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7281 PetscValidLogicalCollectiveInt(mat, nis, 8); 7282 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7283 if (nvecs) { 7284 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7285 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7286 } 7287 /* further checks */ 7288 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7289 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7290 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7291 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7292 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7293 if (reuse && *mat_n) { 7294 PetscInt mrows, mcols, mnrows, mncols; 7295 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7296 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7297 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7298 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7299 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7300 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7301 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7302 } 7303 PetscCall(MatGetBlockSize(local_mat, &bs)); 7304 PetscValidLogicalCollectiveInt(mat, bs, 1); 7305 7306 /* prepare IS for sending if not provided */ 7307 if (!is_sends) { 7308 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7309 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7310 } else { 7311 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7312 is_sends_internal = is_sends; 7313 } 7314 7315 /* get comm */ 7316 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7317 7318 /* compute number of sends */ 7319 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7320 PetscCall(PetscMPIIntCast(i, &n_sends)); 7321 7322 /* compute number of receives */ 7323 PetscCallMPI(MPI_Comm_size(comm, &size)); 7324 PetscCall(PetscMalloc1(size, &iflags)); 7325 PetscCall(PetscArrayzero(iflags, size)); 7326 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7327 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7328 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7329 PetscCall(PetscFree(iflags)); 7330 7331 /* restrict comm if requested */ 7332 subcomm = NULL; 7333 destroy_mat = PETSC_FALSE; 7334 if (restrict_comm) { 7335 PetscMPIInt color, subcommsize; 7336 7337 color = 0; 7338 if (restrict_full) { 7339 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7340 } else { 7341 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7342 } 7343 PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7344 subcommsize = size - subcommsize; 7345 /* check if reuse has been requested */ 7346 if (reuse) { 7347 if (*mat_n) { 7348 PetscMPIInt subcommsize2; 7349 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7350 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7351 comm_n = PetscObjectComm((PetscObject)*mat_n); 7352 } else { 7353 comm_n = PETSC_COMM_SELF; 7354 } 7355 } else { /* MAT_INITIAL_MATRIX */ 7356 PetscMPIInt rank; 7357 7358 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7359 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7360 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7361 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7362 comm_n = PetscSubcommChild(subcomm); 7363 } 7364 /* flag to destroy *mat_n if not significative */ 7365 if (color) destroy_mat = PETSC_TRUE; 7366 } else { 7367 comm_n = comm; 7368 } 7369 7370 /* prepare send/receive buffers */ 7371 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 7372 PetscCall(PetscArrayzero(ilengths_idxs, size)); 7373 PetscCall(PetscMalloc1(size, &ilengths_vals)); 7374 PetscCall(PetscArrayzero(ilengths_vals, size)); 7375 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 7376 7377 /* Get data from local matrices */ 7378 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 7379 /* TODO: See below some guidelines on how to prepare the local buffers */ 7380 /* 7381 send_buffer_vals should contain the raw values of the local matrix 7382 send_buffer_idxs should contain: 7383 - MatType_PRIVATE type 7384 - PetscInt size_of_l2gmap 7385 - PetscInt global_row_indices[size_of_l2gmap] 7386 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7387 */ 7388 { 7389 ISLocalToGlobalMapping mapping; 7390 7391 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7392 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 7393 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 7394 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 7395 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7396 send_buffer_idxs[1] = i; 7397 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 7398 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 7399 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 7400 PetscCall(PetscMPIIntCast(i, &len)); 7401 for (i = 0; i < n_sends; i++) { 7402 ilengths_vals[is_indices[i]] = len * len; 7403 ilengths_idxs[is_indices[i]] = len + 2; 7404 } 7405 } 7406 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 7407 /* additional is (if any) */ 7408 if (nis) { 7409 PetscMPIInt psum; 7410 PetscInt j; 7411 for (j = 0, psum = 0; j < nis; j++) { 7412 PetscInt plen; 7413 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7414 PetscCall(PetscMPIIntCast(plen, &len)); 7415 psum += len + 1; /* indices + length */ 7416 } 7417 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 7418 for (j = 0, psum = 0; j < nis; j++) { 7419 PetscInt plen; 7420 const PetscInt *is_array_idxs; 7421 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7422 send_buffer_idxs_is[psum] = plen; 7423 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 7424 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 7425 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 7426 psum += plen + 1; /* indices + length */ 7427 } 7428 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 7429 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 7430 } 7431 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7432 7433 buf_size_idxs = 0; 7434 buf_size_vals = 0; 7435 buf_size_idxs_is = 0; 7436 buf_size_vecs = 0; 7437 for (i = 0; i < n_recvs; i++) { 7438 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7439 buf_size_vals += (PetscInt)olengths_vals[i]; 7440 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7441 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7442 } 7443 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 7444 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 7445 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 7446 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 7447 7448 /* get new tags for clean communications */ 7449 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 7450 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 7451 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 7452 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 7453 7454 /* allocate for requests */ 7455 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 7456 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 7457 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 7458 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 7459 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 7460 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 7461 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 7462 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 7463 7464 /* communications */ 7465 ptr_idxs = recv_buffer_idxs; 7466 ptr_vals = recv_buffer_vals; 7467 ptr_idxs_is = recv_buffer_idxs_is; 7468 ptr_vecs = recv_buffer_vecs; 7469 for (i = 0; i < n_recvs; i++) { 7470 source_dest = onodes[i]; 7471 PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i])); 7472 PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i])); 7473 ptr_idxs += olengths_idxs[i]; 7474 ptr_vals += olengths_vals[i]; 7475 if (nis) { 7476 source_dest = onodes_is[i]; 7477 PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i])); 7478 ptr_idxs_is += olengths_idxs_is[i]; 7479 } 7480 if (nvecs) { 7481 source_dest = onodes[i]; 7482 PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i])); 7483 ptr_vecs += olengths_idxs[i] - 2; 7484 } 7485 } 7486 for (i = 0; i < n_sends; i++) { 7487 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 7488 PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 7489 PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 7490 if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 7491 if (nvecs) { 7492 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7493 PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 7494 } 7495 } 7496 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 7497 PetscCall(ISDestroy(&is_sends_internal)); 7498 7499 /* assemble new l2g map */ 7500 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 7501 ptr_idxs = recv_buffer_idxs; 7502 new_local_rows = 0; 7503 for (i = 0; i < n_recvs; i++) { 7504 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7505 ptr_idxs += olengths_idxs[i]; 7506 } 7507 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 7508 ptr_idxs = recv_buffer_idxs; 7509 new_local_rows = 0; 7510 for (i = 0; i < n_recvs; i++) { 7511 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 7512 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7513 ptr_idxs += olengths_idxs[i]; 7514 } 7515 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 7516 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 7517 PetscCall(PetscFree(l2gmap_indices)); 7518 7519 /* infer new local matrix type from received local matrices type */ 7520 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7521 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7522 if (n_recvs) { 7523 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7524 ptr_idxs = recv_buffer_idxs; 7525 for (i = 0; i < n_recvs; i++) { 7526 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7527 new_local_type_private = MATAIJ_PRIVATE; 7528 break; 7529 } 7530 ptr_idxs += olengths_idxs[i]; 7531 } 7532 switch (new_local_type_private) { 7533 case MATDENSE_PRIVATE: 7534 new_local_type = MATSEQAIJ; 7535 bs = 1; 7536 break; 7537 case MATAIJ_PRIVATE: 7538 new_local_type = MATSEQAIJ; 7539 bs = 1; 7540 break; 7541 case MATBAIJ_PRIVATE: 7542 new_local_type = MATSEQBAIJ; 7543 break; 7544 case MATSBAIJ_PRIVATE: 7545 new_local_type = MATSEQSBAIJ; 7546 break; 7547 default: 7548 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 7549 } 7550 } else { /* by default, new_local_type is seqaij */ 7551 new_local_type = MATSEQAIJ; 7552 bs = 1; 7553 } 7554 7555 /* create MATIS object if needed */ 7556 if (!reuse) { 7557 PetscCall(MatGetSize(mat, &rows, &cols)); 7558 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7559 } else { 7560 /* it also destroys the local matrices */ 7561 if (*mat_n) { 7562 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 7563 } else { /* this is a fake object */ 7564 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7565 } 7566 } 7567 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 7568 PetscCall(MatSetType(local_mat, new_local_type)); 7569 7570 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 7571 7572 /* Global to local map of received indices */ 7573 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 7574 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 7575 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7576 7577 /* restore attributes -> type of incoming data and its size */ 7578 buf_size_idxs = 0; 7579 for (i = 0; i < n_recvs; i++) { 7580 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7581 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 7582 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7583 } 7584 PetscCall(PetscFree(recv_buffer_idxs)); 7585 7586 /* set preallocation */ 7587 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 7588 if (!newisdense) { 7589 PetscInt *new_local_nnz = NULL; 7590 7591 ptr_idxs = recv_buffer_idxs_local; 7592 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 7593 for (i = 0; i < n_recvs; i++) { 7594 PetscInt j; 7595 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7596 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 7597 } else { 7598 /* TODO */ 7599 } 7600 ptr_idxs += olengths_idxs[i]; 7601 } 7602 if (new_local_nnz) { 7603 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 7604 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 7605 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 7606 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7607 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 7608 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7609 } else { 7610 PetscCall(MatSetUp(local_mat)); 7611 } 7612 PetscCall(PetscFree(new_local_nnz)); 7613 } else { 7614 PetscCall(MatSetUp(local_mat)); 7615 } 7616 7617 /* set values */ 7618 ptr_vals = recv_buffer_vals; 7619 ptr_idxs = recv_buffer_idxs_local; 7620 for (i = 0; i < n_recvs; i++) { 7621 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7622 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 7623 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 7624 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 7625 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 7626 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 7627 } else { 7628 /* TODO */ 7629 } 7630 ptr_idxs += olengths_idxs[i]; 7631 ptr_vals += olengths_vals[i]; 7632 } 7633 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 7634 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 7635 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 7636 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 7637 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 7638 PetscCall(PetscFree(recv_buffer_vals)); 7639 7640 #if 0 7641 if (!restrict_comm) { /* check */ 7642 Vec lvec,rvec; 7643 PetscReal infty_error; 7644 7645 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7646 PetscCall(VecSetRandom(rvec,NULL)); 7647 PetscCall(MatMult(mat,rvec,lvec)); 7648 PetscCall(VecScale(lvec,-1.0)); 7649 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7650 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7651 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7652 PetscCall(VecDestroy(&rvec)); 7653 PetscCall(VecDestroy(&lvec)); 7654 } 7655 #endif 7656 7657 /* assemble new additional is (if any) */ 7658 if (nis) { 7659 PetscInt **temp_idxs, *count_is, j, psum; 7660 7661 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 7662 PetscCall(PetscCalloc1(nis, &count_is)); 7663 ptr_idxs = recv_buffer_idxs_is; 7664 psum = 0; 7665 for (i = 0; i < n_recvs; i++) { 7666 for (j = 0; j < nis; j++) { 7667 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7668 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7669 psum += plen; 7670 ptr_idxs += plen + 1; /* shift pointer to received data */ 7671 } 7672 } 7673 PetscCall(PetscMalloc1(nis, &temp_idxs)); 7674 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 7675 for (i = 1; i < nis; i++) temp_idxs[i] = temp_idxs[i - 1] + count_is[i - 1]; 7676 PetscCall(PetscArrayzero(count_is, nis)); 7677 ptr_idxs = recv_buffer_idxs_is; 7678 for (i = 0; i < n_recvs; i++) { 7679 for (j = 0; j < nis; j++) { 7680 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7681 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 7682 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7683 ptr_idxs += plen + 1; /* shift pointer to received data */ 7684 } 7685 } 7686 for (i = 0; i < nis; i++) { 7687 PetscCall(ISDestroy(&isarray[i])); 7688 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 7689 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 7690 } 7691 PetscCall(PetscFree(count_is)); 7692 PetscCall(PetscFree(temp_idxs[0])); 7693 PetscCall(PetscFree(temp_idxs)); 7694 } 7695 /* free workspace */ 7696 PetscCall(PetscFree(recv_buffer_idxs_is)); 7697 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 7698 PetscCall(PetscFree(send_buffer_idxs)); 7699 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 7700 if (isdense) { 7701 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7702 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 7703 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7704 } else { 7705 /* PetscCall(PetscFree(send_buffer_vals)); */ 7706 } 7707 if (nis) { 7708 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 7709 PetscCall(PetscFree(send_buffer_idxs_is)); 7710 } 7711 7712 if (nvecs) { 7713 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 7714 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 7715 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7716 PetscCall(VecDestroy(&nnsp_vec[0])); 7717 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 7718 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 7719 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 7720 /* set values */ 7721 ptr_vals = recv_buffer_vecs; 7722 ptr_idxs = recv_buffer_idxs_local; 7723 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7724 for (i = 0; i < n_recvs; i++) { 7725 PetscInt j; 7726 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 7727 ptr_idxs += olengths_idxs[i]; 7728 ptr_vals += olengths_idxs[i] - 2; 7729 } 7730 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7731 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 7732 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 7733 } 7734 7735 PetscCall(PetscFree(recv_buffer_vecs)); 7736 PetscCall(PetscFree(recv_buffer_idxs_local)); 7737 PetscCall(PetscFree(recv_req_idxs)); 7738 PetscCall(PetscFree(recv_req_vals)); 7739 PetscCall(PetscFree(recv_req_vecs)); 7740 PetscCall(PetscFree(recv_req_idxs_is)); 7741 PetscCall(PetscFree(send_req_idxs)); 7742 PetscCall(PetscFree(send_req_vals)); 7743 PetscCall(PetscFree(send_req_vecs)); 7744 PetscCall(PetscFree(send_req_idxs_is)); 7745 PetscCall(PetscFree(ilengths_vals)); 7746 PetscCall(PetscFree(ilengths_idxs)); 7747 PetscCall(PetscFree(olengths_vals)); 7748 PetscCall(PetscFree(olengths_idxs)); 7749 PetscCall(PetscFree(onodes)); 7750 if (nis) { 7751 PetscCall(PetscFree(ilengths_idxs_is)); 7752 PetscCall(PetscFree(olengths_idxs_is)); 7753 PetscCall(PetscFree(onodes_is)); 7754 } 7755 PetscCall(PetscSubcommDestroy(&subcomm)); 7756 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 7757 PetscCall(MatDestroy(mat_n)); 7758 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 7759 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7760 PetscCall(VecDestroy(&nnsp_vec[0])); 7761 } 7762 *mat_n = NULL; 7763 } 7764 PetscFunctionReturn(PETSC_SUCCESS); 7765 } 7766 7767 /* temporary hack into ksp private data structure */ 7768 #include <petsc/private/kspimpl.h> 7769 7770 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals) 7771 { 7772 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7773 PC_IS *pcis = (PC_IS *)pc->data; 7774 Mat coarse_mat, coarse_mat_is, coarse_submat_dense; 7775 Mat coarsedivudotp = NULL; 7776 Mat coarseG, t_coarse_mat_is; 7777 MatNullSpace CoarseNullSpace = NULL; 7778 ISLocalToGlobalMapping coarse_islg; 7779 IS coarse_is, *isarray, corners; 7780 PetscInt i, im_active = -1, active_procs = -1; 7781 PetscInt nis, nisdofs, nisneu, nisvert; 7782 PetscInt coarse_eqs_per_proc; 7783 PC pc_temp; 7784 PCType coarse_pc_type; 7785 KSPType coarse_ksp_type; 7786 PetscBool multilevel_requested, multilevel_allowed; 7787 PetscBool coarse_reuse; 7788 PetscInt ncoarse, nedcfield; 7789 PetscBool compute_vecs = PETSC_FALSE; 7790 PetscScalar *array; 7791 MatReuse coarse_mat_reuse; 7792 PetscBool restr, full_restr, have_void; 7793 PetscMPIInt size; 7794 7795 PetscFunctionBegin; 7796 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 7797 /* Assign global numbering to coarse dofs */ 7798 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7799 PetscInt ocoarse_size; 7800 compute_vecs = PETSC_TRUE; 7801 7802 pcbddc->new_primal_space = PETSC_TRUE; 7803 ocoarse_size = pcbddc->coarse_size; 7804 PetscCall(PetscFree(pcbddc->global_primal_indices)); 7805 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 7806 /* see if we can avoid some work */ 7807 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7808 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7809 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7810 PetscCall(KSPReset(pcbddc->coarse_ksp)); 7811 coarse_reuse = PETSC_FALSE; 7812 } else { /* we can safely reuse already computed coarse matrix */ 7813 coarse_reuse = PETSC_TRUE; 7814 } 7815 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7816 coarse_reuse = PETSC_FALSE; 7817 } 7818 /* reset any subassembling information */ 7819 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 7820 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7821 coarse_reuse = PETSC_TRUE; 7822 } 7823 if (coarse_reuse && pcbddc->coarse_ksp) { 7824 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 7825 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 7826 coarse_mat_reuse = MAT_REUSE_MATRIX; 7827 } else { 7828 coarse_mat = NULL; 7829 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7830 } 7831 7832 /* creates temporary l2gmap and IS for coarse indexes */ 7833 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 7834 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 7835 7836 /* creates temporary MATIS object for coarse matrix */ 7837 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense)); 7838 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is)); 7839 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense)); 7840 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7841 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7842 PetscCall(MatDestroy(&coarse_submat_dense)); 7843 7844 /* count "active" (i.e. with positive local size) and "void" processes */ 7845 im_active = !!(pcis->n); 7846 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7847 7848 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7849 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 7850 /* full_restr : just use the receivers from the subassembling pattern */ 7851 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 7852 coarse_mat_is = NULL; 7853 multilevel_allowed = PETSC_FALSE; 7854 multilevel_requested = PETSC_FALSE; 7855 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 7856 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 7857 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7858 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 7859 if (multilevel_requested) { 7860 ncoarse = active_procs / pcbddc->coarsening_ratio; 7861 restr = PETSC_FALSE; 7862 full_restr = PETSC_FALSE; 7863 } else { 7864 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 7865 restr = PETSC_TRUE; 7866 full_restr = PETSC_TRUE; 7867 } 7868 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7869 ncoarse = PetscMax(1, ncoarse); 7870 if (!pcbddc->coarse_subassembling) { 7871 if (pcbddc->coarsening_ratio > 1) { 7872 if (multilevel_requested) { 7873 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7874 } else { 7875 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7876 } 7877 } else { 7878 PetscMPIInt rank; 7879 7880 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 7881 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7882 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 7883 } 7884 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7885 PetscInt psum; 7886 if (pcbddc->coarse_ksp) psum = 1; 7887 else psum = 0; 7888 PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7889 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 7890 } 7891 /* determine if we can go multilevel */ 7892 if (multilevel_requested) { 7893 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7894 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7895 } 7896 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7897 7898 /* dump subassembling pattern */ 7899 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 7900 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7901 nedcfield = -1; 7902 corners = NULL; 7903 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 7904 PetscInt *tidxs, *tidxs2, nout, tsize, i; 7905 const PetscInt *idxs; 7906 ISLocalToGlobalMapping tmap; 7907 7908 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7909 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 7910 /* allocate space for temporary storage */ 7911 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 7912 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 7913 /* allocate for IS array */ 7914 nisdofs = pcbddc->n_ISForDofsLocal; 7915 if (pcbddc->nedclocal) { 7916 if (pcbddc->nedfield > -1) { 7917 nedcfield = pcbddc->nedfield; 7918 } else { 7919 nedcfield = 0; 7920 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 7921 nisdofs = 1; 7922 } 7923 } 7924 nisneu = !!pcbddc->NeumannBoundariesLocal; 7925 nisvert = 0; /* nisvert is not used */ 7926 nis = nisdofs + nisneu + nisvert; 7927 PetscCall(PetscMalloc1(nis, &isarray)); 7928 /* dofs splitting */ 7929 for (i = 0; i < nisdofs; i++) { 7930 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 7931 if (nedcfield != i) { 7932 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 7933 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7934 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7935 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7936 } else { 7937 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 7938 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 7939 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7940 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 7941 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 7942 } 7943 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7944 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 7945 /* PetscCall(ISView(isarray[i],0)); */ 7946 } 7947 /* neumann boundaries */ 7948 if (pcbddc->NeumannBoundariesLocal) { 7949 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 7950 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 7951 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 7952 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7953 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 7954 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7955 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 7956 /* PetscCall(ISView(isarray[nisdofs],0)); */ 7957 } 7958 /* coordinates */ 7959 if (pcbddc->corner_selected) { 7960 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 7961 PetscCall(ISGetLocalSize(corners, &tsize)); 7962 PetscCall(ISGetIndices(corners, &idxs)); 7963 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7964 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 7965 PetscCall(ISRestoreIndices(corners, &idxs)); 7966 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 7967 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 7968 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 7969 } 7970 PetscCall(PetscFree(tidxs)); 7971 PetscCall(PetscFree(tidxs2)); 7972 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 7973 } else { 7974 nis = 0; 7975 nisdofs = 0; 7976 nisneu = 0; 7977 nisvert = 0; 7978 isarray = NULL; 7979 } 7980 /* destroy no longer needed map */ 7981 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 7982 7983 /* subassemble */ 7984 if (multilevel_allowed) { 7985 Vec vp[1]; 7986 PetscInt nvecs = 0; 7987 PetscBool reuse, reuser; 7988 7989 if (coarse_mat) reuse = PETSC_TRUE; 7990 else reuse = PETSC_FALSE; 7991 PetscCall(MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7992 vp[0] = NULL; 7993 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7994 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 7995 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 7996 PetscCall(VecSetType(vp[0], VECSTANDARD)); 7997 nvecs = 1; 7998 7999 if (pcbddc->divudotp) { 8000 Mat B, loc_divudotp; 8001 Vec v, p; 8002 IS dummy; 8003 PetscInt np; 8004 8005 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8006 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8007 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8008 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8009 PetscCall(MatCreateVecs(B, &v, &p)); 8010 PetscCall(VecSet(p, 1.)); 8011 PetscCall(MatMultTranspose(B, p, v)); 8012 PetscCall(VecDestroy(&p)); 8013 PetscCall(MatDestroy(&B)); 8014 PetscCall(VecGetArray(vp[0], &array)); 8015 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8016 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8017 PetscCall(VecResetArray(pcbddc->vec1_P)); 8018 PetscCall(VecRestoreArray(vp[0], &array)); 8019 PetscCall(ISDestroy(&dummy)); 8020 PetscCall(VecDestroy(&v)); 8021 } 8022 } 8023 if (reuser) { 8024 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8025 } else { 8026 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8027 } 8028 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8029 PetscScalar *arraym; 8030 const PetscScalar *arrayv; 8031 PetscInt nl; 8032 PetscCall(VecGetLocalSize(vp[0], &nl)); 8033 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8034 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8035 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8036 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8037 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8038 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8039 PetscCall(VecDestroy(&vp[0])); 8040 } else { 8041 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8042 } 8043 } else { 8044 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8045 } 8046 if (coarse_mat_is || coarse_mat) { 8047 if (!multilevel_allowed) { 8048 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8049 } else { 8050 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8051 if (coarse_mat_is) { 8052 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8053 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8054 coarse_mat = coarse_mat_is; 8055 } 8056 } 8057 } 8058 PetscCall(MatDestroy(&t_coarse_mat_is)); 8059 PetscCall(MatDestroy(&coarse_mat_is)); 8060 8061 /* create local to global scatters for coarse problem */ 8062 if (compute_vecs) { 8063 PetscInt lrows; 8064 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8065 if (coarse_mat) { 8066 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8067 } else { 8068 lrows = 0; 8069 } 8070 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8071 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8072 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8073 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8074 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8075 } 8076 PetscCall(ISDestroy(&coarse_is)); 8077 8078 /* set defaults for coarse KSP and PC */ 8079 if (multilevel_allowed) { 8080 coarse_ksp_type = KSPRICHARDSON; 8081 coarse_pc_type = PCBDDC; 8082 } else { 8083 coarse_ksp_type = KSPPREONLY; 8084 coarse_pc_type = PCREDUNDANT; 8085 } 8086 8087 /* print some info if requested */ 8088 if (pcbddc->dbg_flag) { 8089 if (!multilevel_allowed) { 8090 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8091 if (multilevel_requested) { 8092 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio)); 8093 } else if (pcbddc->max_levels) { 8094 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8095 } 8096 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8097 } 8098 } 8099 8100 /* communicate coarse discrete gradient */ 8101 coarseG = NULL; 8102 if (pcbddc->nedcG && multilevel_allowed) { 8103 MPI_Comm ccomm; 8104 if (coarse_mat) { 8105 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8106 } else { 8107 ccomm = MPI_COMM_NULL; 8108 } 8109 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8110 } 8111 8112 /* create the coarse KSP object only once with defaults */ 8113 if (coarse_mat) { 8114 PetscBool isredundant, isbddc, force, valid; 8115 PetscViewer dbg_viewer = NULL; 8116 PetscBool isset, issym, isher, isspd; 8117 8118 if (pcbddc->dbg_flag) { 8119 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8120 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8121 } 8122 if (!pcbddc->coarse_ksp) { 8123 char prefix[256], str_level[16]; 8124 size_t len; 8125 8126 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8127 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8128 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8129 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8130 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1)); 8131 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8132 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8133 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8134 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8135 /* TODO is this logic correct? should check for coarse_mat type */ 8136 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8137 /* prefix */ 8138 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8139 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8140 if (!pcbddc->current_level) { 8141 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8142 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8143 } else { 8144 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8145 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8146 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8147 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8148 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8149 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level))); 8150 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8151 } 8152 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8153 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8154 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8155 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8156 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8157 /* allow user customization */ 8158 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8159 /* get some info after set from options */ 8160 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8161 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8162 force = PETSC_FALSE; 8163 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8164 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8165 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8166 if (multilevel_allowed && !force && !valid) { 8167 isbddc = PETSC_TRUE; 8168 PetscCall(PCSetType(pc_temp, PCBDDC)); 8169 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8170 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8171 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8172 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8173 PetscObjectOptionsBegin((PetscObject)pc_temp); 8174 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8175 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8176 PetscOptionsEnd(); 8177 pc_temp->setfromoptionscalled++; 8178 } 8179 } 8180 } 8181 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8182 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8183 if (nisdofs) { 8184 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8185 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8186 } 8187 if (nisneu) { 8188 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8189 PetscCall(ISDestroy(&isarray[nisdofs])); 8190 } 8191 if (nisvert) { 8192 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8193 PetscCall(ISDestroy(&isarray[nis - 1])); 8194 } 8195 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8196 8197 /* get some info after set from options */ 8198 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8199 8200 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8201 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8202 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8203 force = PETSC_FALSE; 8204 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8205 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8206 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8207 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8208 if (isredundant) { 8209 KSP inner_ksp; 8210 PC inner_pc; 8211 8212 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8213 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8214 } 8215 8216 /* parameters which miss an API */ 8217 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8218 if (isbddc) { 8219 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8220 8221 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8222 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8223 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8224 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8225 if (pcbddc_coarse->benign_saddle_point) { 8226 Mat coarsedivudotp_is; 8227 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8228 IS row, col; 8229 const PetscInt *gidxs; 8230 PetscInt n, st, M, N; 8231 8232 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8233 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8234 st = st - n; 8235 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8236 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8237 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8238 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8239 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8240 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8241 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8242 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8243 PetscCall(ISGetSize(row, &M)); 8244 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8245 PetscCall(ISDestroy(&row)); 8246 PetscCall(ISDestroy(&col)); 8247 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8248 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8249 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8250 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8251 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8252 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8253 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8254 PetscCall(MatDestroy(&coarsedivudotp)); 8255 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8256 PetscCall(MatDestroy(&coarsedivudotp_is)); 8257 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8258 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8259 } 8260 } 8261 8262 /* propagate symmetry info of coarse matrix */ 8263 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8264 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8265 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8266 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8267 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8268 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8269 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8270 8271 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8272 /* set operators */ 8273 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8274 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8275 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8276 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8277 } 8278 PetscCall(MatDestroy(&coarseG)); 8279 PetscCall(PetscFree(isarray)); 8280 #if 0 8281 { 8282 PetscViewer viewer; 8283 char filename[256]; 8284 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 8285 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8286 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8287 PetscCall(MatView(coarse_mat,viewer)); 8288 PetscCall(PetscViewerPopFormat(viewer)); 8289 PetscCall(PetscViewerDestroy(&viewer)); 8290 } 8291 #endif 8292 8293 if (corners) { 8294 Vec gv; 8295 IS is; 8296 const PetscInt *idxs; 8297 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8298 PetscScalar *coords; 8299 8300 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8301 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8302 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8303 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8304 PetscCall(VecSetBlockSize(gv, cdim)); 8305 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8306 PetscCall(VecSetType(gv, VECSTANDARD)); 8307 PetscCall(VecSetFromOptions(gv)); 8308 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8309 8310 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8311 PetscCall(ISGetLocalSize(is, &n)); 8312 PetscCall(ISGetIndices(is, &idxs)); 8313 PetscCall(PetscMalloc1(n * cdim, &coords)); 8314 for (i = 0; i < n; i++) { 8315 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 8316 } 8317 PetscCall(ISRestoreIndices(is, &idxs)); 8318 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8319 8320 PetscCall(ISGetLocalSize(corners, &n)); 8321 PetscCall(ISGetIndices(corners, &idxs)); 8322 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8323 PetscCall(ISRestoreIndices(corners, &idxs)); 8324 PetscCall(PetscFree(coords)); 8325 PetscCall(VecAssemblyBegin(gv)); 8326 PetscCall(VecAssemblyEnd(gv)); 8327 PetscCall(VecGetArray(gv, &coords)); 8328 if (pcbddc->coarse_ksp) { 8329 PC coarse_pc; 8330 PetscBool isbddc; 8331 8332 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8333 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8334 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8335 PetscReal *realcoords; 8336 8337 PetscCall(VecGetLocalSize(gv, &n)); 8338 #if defined(PETSC_USE_COMPLEX) 8339 PetscCall(PetscMalloc1(n, &realcoords)); 8340 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8341 #else 8342 realcoords = coords; 8343 #endif 8344 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8345 #if defined(PETSC_USE_COMPLEX) 8346 PetscCall(PetscFree(realcoords)); 8347 #endif 8348 } 8349 } 8350 PetscCall(VecRestoreArray(gv, &coords)); 8351 PetscCall(VecDestroy(&gv)); 8352 } 8353 PetscCall(ISDestroy(&corners)); 8354 8355 if (pcbddc->coarse_ksp) { 8356 Vec crhs, csol; 8357 8358 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 8359 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 8360 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &((pcbddc->coarse_ksp)->vec_sol), NULL)); 8361 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &((pcbddc->coarse_ksp)->vec_rhs))); 8362 } 8363 PetscCall(MatDestroy(&coarsedivudotp)); 8364 8365 /* compute null space for coarse solver if the benign trick has been requested */ 8366 if (pcbddc->benign_null) { 8367 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 8368 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 8369 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8370 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8371 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8372 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8373 if (coarse_mat) { 8374 Vec nullv; 8375 PetscScalar *array, *array2; 8376 PetscInt nl; 8377 8378 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 8379 PetscCall(VecGetLocalSize(nullv, &nl)); 8380 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8381 PetscCall(VecGetArray(nullv, &array2)); 8382 PetscCall(PetscArraycpy(array2, array, nl)); 8383 PetscCall(VecRestoreArray(nullv, &array2)); 8384 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8385 PetscCall(VecNormalize(nullv, NULL)); 8386 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 8387 PetscCall(VecDestroy(&nullv)); 8388 } 8389 } 8390 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8391 8392 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8393 if (pcbddc->coarse_ksp) { 8394 PetscBool ispreonly; 8395 8396 if (CoarseNullSpace) { 8397 PetscBool isnull; 8398 8399 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 8400 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 8401 /* TODO: add local nullspaces (if any) */ 8402 } 8403 /* setup coarse ksp */ 8404 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8405 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8406 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 8407 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8408 KSP check_ksp; 8409 KSPType check_ksp_type; 8410 PC check_pc; 8411 Vec check_vec, coarse_vec; 8412 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 8413 PetscInt its; 8414 PetscBool compute_eigs; 8415 PetscReal *eigs_r, *eigs_c; 8416 PetscInt neigs; 8417 const char *prefix; 8418 8419 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8420 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 8421 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 8422 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 8423 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 8424 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 8425 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size)); 8426 /* prevent from setup unneeded object */ 8427 PetscCall(KSPGetPC(check_ksp, &check_pc)); 8428 PetscCall(PCSetType(check_pc, PCNONE)); 8429 if (ispreonly) { 8430 check_ksp_type = KSPPREONLY; 8431 compute_eigs = PETSC_FALSE; 8432 } else { 8433 check_ksp_type = KSPGMRES; 8434 compute_eigs = PETSC_TRUE; 8435 } 8436 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 8437 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 8438 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 8439 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 8440 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 8441 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 8442 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 8443 PetscCall(KSPSetFromOptions(check_ksp)); 8444 PetscCall(KSPSetUp(check_ksp)); 8445 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 8446 PetscCall(KSPSetPC(check_ksp, check_pc)); 8447 /* create random vec */ 8448 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 8449 PetscCall(VecSetRandom(check_vec, NULL)); 8450 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8451 /* solve coarse problem */ 8452 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 8453 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 8454 /* set eigenvalue estimation if preonly has not been requested */ 8455 if (compute_eigs) { 8456 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 8457 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 8458 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 8459 if (neigs) { 8460 lambda_max = eigs_r[neigs - 1]; 8461 lambda_min = eigs_r[0]; 8462 if (pcbddc->use_coarse_estimates) { 8463 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8464 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 8465 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 8466 } 8467 } 8468 } 8469 } 8470 8471 /* check coarse problem residual error */ 8472 if (pcbddc->dbg_flag) { 8473 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8474 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8475 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 8476 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 8477 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8478 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 8479 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 8480 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp), dbg_viewer)); 8481 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer)); 8482 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 8483 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 8484 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 8485 if (compute_eigs) { 8486 PetscReal lambda_max_s, lambda_min_s; 8487 KSPConvergedReason reason; 8488 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 8489 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 8490 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 8491 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 8492 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 8493 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 8494 } 8495 PetscCall(PetscViewerFlush(dbg_viewer)); 8496 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8497 } 8498 PetscCall(VecDestroy(&check_vec)); 8499 PetscCall(VecDestroy(&coarse_vec)); 8500 PetscCall(KSPDestroy(&check_ksp)); 8501 if (compute_eigs) { 8502 PetscCall(PetscFree(eigs_r)); 8503 PetscCall(PetscFree(eigs_c)); 8504 } 8505 } 8506 } 8507 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8508 /* print additional info */ 8509 if (pcbddc->dbg_flag) { 8510 /* waits until all processes reaches this point */ 8511 PetscCall(PetscBarrier((PetscObject)pc)); 8512 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 8513 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8514 } 8515 8516 /* free memory */ 8517 PetscCall(MatDestroy(&coarse_mat)); 8518 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8519 PetscFunctionReturn(PETSC_SUCCESS); 8520 } 8521 8522 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 8523 { 8524 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8525 PC_IS *pcis = (PC_IS *)pc->data; 8526 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 8527 IS subset, subset_mult, subset_n; 8528 PetscInt local_size, coarse_size = 0; 8529 PetscInt *local_primal_indices = NULL; 8530 const PetscInt *t_local_primal_indices; 8531 8532 PetscFunctionBegin; 8533 /* Compute global number of coarse dofs */ 8534 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 8535 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 8536 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 8537 PetscCall(ISDestroy(&subset_n)); 8538 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 8539 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 8540 PetscCall(ISDestroy(&subset)); 8541 PetscCall(ISDestroy(&subset_mult)); 8542 PetscCall(ISGetLocalSize(subset_n, &local_size)); 8543 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 8544 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 8545 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 8546 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 8547 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 8548 PetscCall(ISDestroy(&subset_n)); 8549 8550 /* check numbering */ 8551 if (pcbddc->dbg_flag) { 8552 PetscScalar coarsesum, *array, *array2; 8553 PetscInt i; 8554 PetscBool set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE; 8555 8556 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8557 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8558 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n")); 8559 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8560 /* counter */ 8561 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8562 PetscCall(VecSet(pcis->vec1_N, 1.0)); 8563 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8564 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8565 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8566 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8567 PetscCall(VecSet(pcis->vec1_N, 0.0)); 8568 for (i = 0; i < pcbddc->local_primal_size; i++) PetscCall(VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES)); 8569 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8570 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8571 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8572 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8573 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8574 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8575 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8576 PetscCall(VecGetArray(pcis->vec1_N, &array)); 8577 PetscCall(VecGetArray(pcis->vec2_N, &array2)); 8578 for (i = 0; i < pcis->n; i++) { 8579 if (array[i] != 0.0 && array[i] != array2[i]) { 8580 PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi; 8581 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8582 set_error = PETSC_TRUE; 8583 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi)); 8584 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh)); 8585 } 8586 } 8587 PetscCall(VecRestoreArray(pcis->vec2_N, &array2)); 8588 PetscCall(MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8589 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8590 for (i = 0; i < pcis->n; i++) { 8591 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]); 8592 } 8593 PetscCall(VecRestoreArray(pcis->vec1_N, &array)); 8594 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8595 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8596 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8597 PetscCall(VecSum(pcis->vec1_global, &coarsesum)); 8598 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum))); 8599 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8600 PetscInt *gidxs; 8601 8602 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &gidxs)); 8603 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs)); 8604 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n")); 8605 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8606 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank)); 8607 for (i = 0; i < pcbddc->local_primal_size; i++) { 8608 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i])); 8609 } 8610 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8611 PetscCall(PetscFree(gidxs)); 8612 } 8613 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8614 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8615 PetscCheck(!set_error_reduced, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "BDDC Numbering of coarse dofs failed"); 8616 } 8617 8618 /* get back data */ 8619 *coarse_size_n = coarse_size; 8620 *local_primal_indices_n = local_primal_indices; 8621 PetscFunctionReturn(PETSC_SUCCESS); 8622 } 8623 8624 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 8625 { 8626 IS localis_t; 8627 PetscInt i, lsize, *idxs, n; 8628 PetscScalar *vals; 8629 8630 PetscFunctionBegin; 8631 /* get indices in local ordering exploiting local to global map */ 8632 PetscCall(ISGetLocalSize(globalis, &lsize)); 8633 PetscCall(PetscMalloc1(lsize, &vals)); 8634 for (i = 0; i < lsize; i++) vals[i] = 1.0; 8635 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 8636 PetscCall(VecSet(gwork, 0.0)); 8637 PetscCall(VecSet(lwork, 0.0)); 8638 if (idxs) { /* multilevel guard */ 8639 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 8640 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 8641 } 8642 PetscCall(VecAssemblyBegin(gwork)); 8643 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 8644 PetscCall(PetscFree(vals)); 8645 PetscCall(VecAssemblyEnd(gwork)); 8646 /* now compute set in local ordering */ 8647 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8648 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8649 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 8650 PetscCall(VecGetSize(lwork, &n)); 8651 for (i = 0, lsize = 0; i < n; i++) { 8652 if (PetscRealPart(vals[i]) > 0.5) lsize++; 8653 } 8654 PetscCall(PetscMalloc1(lsize, &idxs)); 8655 for (i = 0, lsize = 0; i < n; i++) { 8656 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 8657 } 8658 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 8659 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 8660 *localis = localis_t; 8661 PetscFunctionReturn(PETSC_SUCCESS); 8662 } 8663 8664 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 8665 { 8666 PC_IS *pcis = (PC_IS *)pc->data; 8667 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8668 PC_IS *pcisf; 8669 PC_BDDC *pcbddcf; 8670 PC pcf; 8671 8672 PetscFunctionBegin; 8673 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 8674 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 8675 PetscCall(PCSetType(pcf, PCBDDC)); 8676 8677 pcisf = (PC_IS *)pcf->data; 8678 pcbddcf = (PC_BDDC *)pcf->data; 8679 8680 pcisf->is_B_local = pcis->is_B_local; 8681 pcisf->vec1_N = pcis->vec1_N; 8682 pcisf->BtoNmap = pcis->BtoNmap; 8683 pcisf->n = pcis->n; 8684 pcisf->n_B = pcis->n_B; 8685 8686 PetscCall(PetscFree(pcbddcf->mat_graph)); 8687 PetscCall(PetscFree(pcbddcf->sub_schurs)); 8688 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 8689 pcbddcf->sub_schurs = schurs; 8690 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 8691 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 8692 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 8693 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 8694 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 8695 pcbddcf->use_faces = PETSC_TRUE; 8696 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 8697 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 8698 pcbddcf->use_qr_single = (PetscBool)!constraints; 8699 pcbddcf->fake_change = PETSC_TRUE; 8700 pcbddcf->dbg_flag = pcbddc->dbg_flag; 8701 8702 PetscCall(PCBDDCAdaptiveSelection(pcf)); 8703 PetscCall(PCBDDCConstraintsSetUp(pcf)); 8704 8705 *change = pcbddcf->ConstraintMatrix; 8706 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 8707 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 8708 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 8709 8710 if (schurs) pcbddcf->sub_schurs = NULL; 8711 pcbddcf->ConstraintMatrix = NULL; 8712 pcbddcf->mat_graph = NULL; 8713 pcisf->is_B_local = NULL; 8714 pcisf->vec1_N = NULL; 8715 pcisf->BtoNmap = NULL; 8716 PetscCall(PCDestroy(&pcf)); 8717 PetscFunctionReturn(PETSC_SUCCESS); 8718 } 8719 8720 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8721 { 8722 PC_IS *pcis = (PC_IS *)pc->data; 8723 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8724 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 8725 Mat S_j; 8726 PetscInt *used_xadj, *used_adjncy; 8727 PetscBool free_used_adj; 8728 8729 PetscFunctionBegin; 8730 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8731 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8732 free_used_adj = PETSC_FALSE; 8733 if (pcbddc->sub_schurs_layers == -1) { 8734 used_xadj = NULL; 8735 used_adjncy = NULL; 8736 } else { 8737 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8738 used_xadj = pcbddc->mat_graph->xadj; 8739 used_adjncy = pcbddc->mat_graph->adjncy; 8740 } else if (pcbddc->computed_rowadj) { 8741 used_xadj = pcbddc->mat_graph->xadj; 8742 used_adjncy = pcbddc->mat_graph->adjncy; 8743 } else { 8744 PetscBool flg_row = PETSC_FALSE; 8745 const PetscInt *xadj, *adjncy; 8746 PetscInt nvtxs; 8747 8748 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8749 if (flg_row) { 8750 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 8751 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 8752 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 8753 free_used_adj = PETSC_TRUE; 8754 } else { 8755 pcbddc->sub_schurs_layers = -1; 8756 used_xadj = NULL; 8757 used_adjncy = NULL; 8758 } 8759 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8760 } 8761 } 8762 8763 /* setup sub_schurs data */ 8764 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 8765 if (!sub_schurs->schur_explicit) { 8766 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8767 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 8768 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 8769 } else { 8770 Mat change = NULL; 8771 Vec scaling = NULL; 8772 IS change_primal = NULL, iP; 8773 PetscInt benign_n; 8774 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8775 PetscBool need_change = PETSC_FALSE; 8776 PetscBool discrete_harmonic = PETSC_FALSE; 8777 8778 if (!pcbddc->use_vertices && reuse_solvers) { 8779 PetscInt n_vertices; 8780 8781 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 8782 reuse_solvers = (PetscBool)!n_vertices; 8783 } 8784 if (!pcbddc->benign_change_explicit) { 8785 benign_n = pcbddc->benign_n; 8786 } else { 8787 benign_n = 0; 8788 } 8789 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8790 We need a global reduction to avoid possible deadlocks. 8791 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8792 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8793 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8794 PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8795 need_change = (PetscBool)(!need_change); 8796 } 8797 /* If the user defines additional constraints, we import them here */ 8798 if (need_change) { 8799 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 8800 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 8801 } 8802 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8803 8804 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 8805 if (iP) { 8806 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 8807 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 8808 PetscOptionsEnd(); 8809 } 8810 if (discrete_harmonic) { 8811 Mat A; 8812 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 8813 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 8814 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 8815 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 8816 pcbddc->benign_zerodiag_subs, change, change_primal)); 8817 PetscCall(MatDestroy(&A)); 8818 } else { 8819 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 8820 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 8821 } 8822 PetscCall(MatDestroy(&change)); 8823 PetscCall(ISDestroy(&change_primal)); 8824 } 8825 PetscCall(MatDestroy(&S_j)); 8826 8827 /* free adjacency */ 8828 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 8829 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8830 PetscFunctionReturn(PETSC_SUCCESS); 8831 } 8832 8833 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8834 { 8835 PC_IS *pcis = (PC_IS *)pc->data; 8836 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8837 PCBDDCGraph graph; 8838 8839 PetscFunctionBegin; 8840 /* attach interface graph for determining subsets */ 8841 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8842 IS verticesIS, verticescomm; 8843 PetscInt vsize, *idxs; 8844 8845 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8846 PetscCall(ISGetSize(verticesIS, &vsize)); 8847 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 8848 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 8849 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 8850 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8851 PetscCall(PCBDDCGraphCreate(&graph)); 8852 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 8853 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 8854 PetscCall(ISDestroy(&verticescomm)); 8855 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 8856 } else { 8857 graph = pcbddc->mat_graph; 8858 } 8859 /* print some info */ 8860 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8861 IS vertices; 8862 PetscInt nv, nedges, nfaces; 8863 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 8864 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8865 PetscCall(ISGetSize(vertices, &nv)); 8866 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8867 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 8868 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 8869 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 8870 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 8871 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8872 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 8873 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8874 } 8875 8876 /* sub_schurs init */ 8877 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 8878 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 8879 8880 /* free graph struct */ 8881 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 8882 PetscFunctionReturn(PETSC_SUCCESS); 8883 } 8884 8885 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8886 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8887 { 8888 Mat At; 8889 IS rows; 8890 PetscInt rst, ren; 8891 PetscLayout rmap; 8892 8893 PetscFunctionBegin; 8894 rst = ren = 0; 8895 if (ccomm != MPI_COMM_NULL) { 8896 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 8897 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 8898 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 8899 PetscCall(PetscLayoutSetUp(rmap)); 8900 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 8901 } 8902 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 8903 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 8904 PetscCall(ISDestroy(&rows)); 8905 8906 if (ccomm != MPI_COMM_NULL) { 8907 Mat_MPIAIJ *a, *b; 8908 IS from, to; 8909 Vec gvec; 8910 PetscInt lsize; 8911 8912 PetscCall(MatCreate(ccomm, B)); 8913 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 8914 PetscCall(MatSetType(*B, MATAIJ)); 8915 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 8916 PetscCall(PetscLayoutSetUp((*B)->cmap)); 8917 a = (Mat_MPIAIJ *)At->data; 8918 b = (Mat_MPIAIJ *)(*B)->data; 8919 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 8920 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 8921 PetscCall(PetscObjectReference((PetscObject)a->A)); 8922 PetscCall(PetscObjectReference((PetscObject)a->B)); 8923 b->A = a->A; 8924 b->B = a->B; 8925 8926 b->donotstash = a->donotstash; 8927 b->roworiented = a->roworiented; 8928 b->rowindices = NULL; 8929 b->rowvalues = NULL; 8930 b->getrowactive = PETSC_FALSE; 8931 8932 (*B)->rmap = rmap; 8933 (*B)->factortype = A->factortype; 8934 (*B)->assembled = PETSC_TRUE; 8935 (*B)->insertmode = NOT_SET_VALUES; 8936 (*B)->preallocated = PETSC_TRUE; 8937 8938 if (a->colmap) { 8939 #if defined(PETSC_USE_CTABLE) 8940 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 8941 #else 8942 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 8943 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 8944 #endif 8945 } else b->colmap = NULL; 8946 if (a->garray) { 8947 PetscInt len; 8948 len = a->B->cmap->n; 8949 PetscCall(PetscMalloc1(len + 1, &b->garray)); 8950 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 8951 } else b->garray = NULL; 8952 8953 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 8954 b->lvec = a->lvec; 8955 8956 /* cannot use VecScatterCopy */ 8957 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 8958 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 8959 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 8960 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 8961 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 8962 PetscCall(ISDestroy(&from)); 8963 PetscCall(ISDestroy(&to)); 8964 PetscCall(VecDestroy(&gvec)); 8965 } 8966 PetscCall(MatDestroy(&At)); 8967 PetscFunctionReturn(PETSC_SUCCESS); 8968 } 8969