1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork, *data, *U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM, bN, lwork, lierr, di = 1; 20 PetscInt ulw, i, nr, nc, n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A, &nr, &nc)); 27 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 32 PetscCall(PetscMalloc1(ulw, &uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr, nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n, &sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr * nr, &U)); 46 PetscCall(PetscBLASIntCast(nr, &bM)); 47 PetscCall(PetscBLASIntCast(nc, &bN)); 48 PetscCall(PetscBLASIntCast(ulw, &lwork)); 49 PetscCall(MatDenseGetArray(A, &data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 53 #else 54 PetscCall(PetscMalloc1(5 * n, &rwork2)); 55 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 60 PetscCall(MatDenseRestoreArray(A, &data)); 61 for (i = 0; i < n; i++) 62 if (sing[i] < PETSC_SMALL) break; 63 if (!rwork) PetscCall(PetscFree(sing)); 64 if (!work) PetscCall(PetscFree(uwork)); 65 /* create B */ 66 if (!range) { 67 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 68 PetscCall(MatDenseGetArray(*B, &data)); 69 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 70 } else { 71 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 72 PetscCall(MatDenseGetArray(*B, &data)); 73 PetscCall(PetscArraycpy(data, U, i * nr)); 74 } 75 PetscCall(MatDenseRestoreArray(*B, &data)); 76 PetscCall(PetscFree(U)); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 Mat GE, GEd; 89 PetscInt rsize, csize, esize; 90 PetscScalar *ptr; 91 92 PetscFunctionBegin; 93 PetscCall(ISGetSize(edge, &esize)); 94 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 95 PetscCall(ISGetSize(extrow, &rsize)); 96 PetscCall(ISGetSize(extcol, &csize)); 97 98 /* gradients */ 99 ptr = work + 5 * esize; 100 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 101 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 102 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 103 PetscCall(MatDestroy(&GE)); 104 105 /* constants */ 106 ptr += rsize * csize; 107 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 108 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 109 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 110 PetscCall(MatDestroy(&GE)); 111 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 112 PetscCall(MatDestroy(&GEd)); 113 114 if (corners) { 115 Mat GEc; 116 const PetscScalar *vals; 117 PetscScalar v; 118 119 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 120 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 121 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 122 /* v = PetscAbsScalar(vals[0]); */ 123 v = 1.; 124 cvals[0] = vals[0] / v; 125 cvals[1] = vals[1] / v; 126 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 127 PetscCall(MatScale(*GKins, 1. / v)); 128 #if defined(PRINT_GDET) 129 { 130 PetscViewer viewer; 131 char filename[256]; 132 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 133 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 134 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 135 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 136 PetscCall(MatView(GEc, viewer)); 137 PetscCall(PetscObjectSetName((PetscObject)(*GKins), "GK")); 138 PetscCall(MatView(*GKins, viewer)); 139 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 140 PetscCall(MatView(GEd, viewer)); 141 PetscCall(PetscViewerDestroy(&viewer)); 142 } 143 #endif 144 PetscCall(MatDestroy(&GEd)); 145 PetscCall(MatDestroy(&GEc)); 146 } 147 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 PetscErrorCode PCBDDCNedelecSupport(PC pc) 152 { 153 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 154 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 155 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 156 Vec tvec; 157 PetscSF sfv; 158 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 159 MPI_Comm comm; 160 IS lned, primals, allprimals, nedfieldlocal; 161 IS *eedges, *extrows, *extcols, *alleedges; 162 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 163 PetscScalar *vals, *work; 164 PetscReal *rwork; 165 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 166 PetscInt ne, nv, Lv, order, n, field; 167 PetscInt n_neigh, *neigh, *n_shared, **shared; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, singular, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 singular = PETSC_FALSE; 187 188 /* Command line customization */ 189 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 190 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 191 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL)); 192 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 193 /* print debug info TODO: to be removed */ 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 195 PetscOptionsEnd(); 196 197 /* Return if there are no edges in the decomposition and the problem is not singular */ 198 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 199 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 200 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 201 if (!singular) { 202 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 203 lrc[0] = PETSC_FALSE; 204 for (i = 0; i < n; i++) { 205 if (PetscRealPart(vals[i]) > 2.) { 206 lrc[0] = PETSC_TRUE; 207 break; 208 } 209 } 210 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 211 PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 212 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 215 /* Get Nedelec field */ 216 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 217 if (pcbddc->n_ISForDofsLocal && field >= 0) { 218 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 219 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 220 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 221 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 222 ne = n; 223 nedfieldlocal = NULL; 224 global = PETSC_TRUE; 225 } else if (field == PETSC_DECIDE) { 226 PetscInt rst, ren, *idx; 227 228 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 229 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 230 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 231 for (i = rst; i < ren; i++) { 232 PetscInt nc; 233 234 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 235 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 236 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 237 } 238 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 239 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 240 PetscCall(PetscMalloc1(n, &idx)); 241 for (i = 0, ne = 0; i < n; i++) 242 if (matis->sf_leafdata[i]) idx[ne++] = i; 243 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 244 } else { 245 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 246 } 247 248 /* Sanity checks */ 249 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 250 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 251 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 252 253 /* Just set primal dofs and return */ 254 if (setprimal) { 255 IS enedfieldlocal; 256 PetscInt *eidxs; 257 258 PetscCall(PetscMalloc1(ne, &eidxs)); 259 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 260 if (nedfieldlocal) { 261 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 262 for (i = 0, cum = 0; i < ne; i++) { 263 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 264 } 265 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 266 } else { 267 for (i = 0, cum = 0; i < ne; i++) { 268 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 269 } 270 } 271 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 272 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 273 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 274 PetscCall(PetscFree(eidxs)); 275 PetscCall(ISDestroy(&nedfieldlocal)); 276 PetscCall(ISDestroy(&enedfieldlocal)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 /* Compute some l2g maps */ 281 if (nedfieldlocal) { 282 IS is; 283 284 /* need to map from the local Nedelec field to local numbering */ 285 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 286 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 287 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 288 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 289 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 290 if (global) { 291 PetscCall(PetscObjectReference((PetscObject)al2g)); 292 el2g = al2g; 293 } else { 294 IS gis; 295 296 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 297 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 298 PetscCall(ISDestroy(&gis)); 299 } 300 PetscCall(ISDestroy(&is)); 301 } else { 302 /* restore default */ 303 pcbddc->nedfield = -1; 304 /* one ref for the destruction of al2g, one for el2g */ 305 PetscCall(PetscObjectReference((PetscObject)al2g)); 306 PetscCall(PetscObjectReference((PetscObject)al2g)); 307 el2g = al2g; 308 fl2g = NULL; 309 } 310 311 /* Start communication to drop connections for interior edges (for cc analysis only) */ 312 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 313 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 314 if (nedfieldlocal) { 315 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 316 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 317 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 318 } else { 319 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 320 } 321 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 322 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 323 324 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 325 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 326 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 327 if (global) { 328 PetscInt rst; 329 330 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 331 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 332 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 333 } 334 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 335 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 336 } else { 337 PetscInt *tbz; 338 339 PetscCall(PetscMalloc1(ne, &tbz)); 340 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 342 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 343 for (i = 0, cum = 0; i < ne; i++) 344 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 345 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 346 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 347 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 348 PetscCall(PetscFree(tbz)); 349 } 350 } else { /* we need the entire G to infer the nullspace */ 351 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 352 G = pcbddc->discretegradient; 353 } 354 355 /* Extract subdomain relevant rows of G */ 356 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 357 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 358 PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); 359 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 360 PetscCall(ISDestroy(&lned)); 361 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 362 PetscCall(MatDestroy(&lGall)); 363 PetscCall(MatISGetLocalMat(lGis, &lG)); 364 365 /* SF for nodal dofs communications */ 366 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 367 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 368 PetscCall(PetscObjectReference((PetscObject)vl2g)); 369 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 370 PetscCall(PetscSFCreate(comm, &sfv)); 371 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 372 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 373 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 374 i = singular ? 2 : 1; 375 PetscCall(PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots)); 376 377 /* Destroy temporary G created in MATIS format and modified G */ 378 PetscCall(PetscObjectReference((PetscObject)lG)); 379 PetscCall(MatDestroy(&lGis)); 380 PetscCall(MatDestroy(&G)); 381 382 if (print) { 383 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 384 PetscCall(MatView(lG, NULL)); 385 } 386 387 /* Save lG for values insertion in change of basis */ 388 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 389 390 /* Analyze the edge-nodes connections (duplicate lG) */ 391 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 392 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 393 PetscCall(PetscBTCreate(nv, &btv)); 394 PetscCall(PetscBTCreate(ne, &bte)); 395 PetscCall(PetscBTCreate(ne, &btb)); 396 PetscCall(PetscBTCreate(ne, &btbd)); 397 PetscCall(PetscBTCreate(nv, &btvcand)); 398 /* need to import the boundary specification to ensure the 399 proper detection of coarse edges' endpoints */ 400 if (pcbddc->DirichletBoundariesLocal) { 401 IS is; 402 403 if (fl2g) { 404 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 405 } else { 406 is = pcbddc->DirichletBoundariesLocal; 407 } 408 PetscCall(ISGetLocalSize(is, &cum)); 409 PetscCall(ISGetIndices(is, &idxs)); 410 for (i = 0; i < cum; i++) { 411 if (idxs[i] >= 0) { 412 PetscCall(PetscBTSet(btb, idxs[i])); 413 PetscCall(PetscBTSet(btbd, idxs[i])); 414 } 415 } 416 PetscCall(ISRestoreIndices(is, &idxs)); 417 if (fl2g) PetscCall(ISDestroy(&is)); 418 } 419 if (pcbddc->NeumannBoundariesLocal) { 420 IS is; 421 422 if (fl2g) { 423 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 424 } else { 425 is = pcbddc->NeumannBoundariesLocal; 426 } 427 PetscCall(ISGetLocalSize(is, &cum)); 428 PetscCall(ISGetIndices(is, &idxs)); 429 for (i = 0; i < cum; i++) { 430 if (idxs[i] >= 0) PetscCall(PetscBTSet(btb, idxs[i])); 431 } 432 PetscCall(ISRestoreIndices(is, &idxs)); 433 if (fl2g) PetscCall(ISDestroy(&is)); 434 } 435 436 /* Count neighs per dof */ 437 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs)); 438 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs)); 439 440 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 441 for proper detection of coarse edges' endpoints */ 442 PetscCall(PetscBTCreate(ne, &btee)); 443 for (i = 0; i < ne; i++) { 444 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 445 } 446 PetscCall(PetscMalloc1(ne, &marks)); 447 if (!conforming) { 448 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 449 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 450 } 451 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 452 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 453 cum = 0; 454 for (i = 0; i < ne; i++) { 455 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 456 if (!PetscBTLookup(btee, i)) { 457 marks[cum++] = i; 458 continue; 459 } 460 /* set badly connected edge dofs as primal */ 461 if (!conforming) { 462 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 463 marks[cum++] = i; 464 PetscCall(PetscBTSet(bte, i)); 465 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 466 } else { 467 /* every edge dofs should be connected through a certain number of nodal dofs 468 to other edge dofs belonging to coarse edges 469 - at most 2 endpoints 470 - order-1 interior nodal dofs 471 - no undefined nodal dofs (nconn < order) 472 */ 473 PetscInt ends = 0, ints = 0, undef = 0; 474 for (j = ii[i]; j < ii[i + 1]; j++) { 475 PetscInt v = jj[j], k; 476 PetscInt nconn = iit[v + 1] - iit[v]; 477 for (k = iit[v]; k < iit[v + 1]; k++) 478 if (!PetscBTLookup(btee, jjt[k])) nconn--; 479 if (nconn > order) ends++; 480 else if (nconn == order) ints++; 481 else undef++; 482 } 483 if (undef || ends > 2 || ints != order - 1) { 484 marks[cum++] = i; 485 PetscCall(PetscBTSet(bte, i)); 486 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 487 } 488 } 489 } 490 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 491 if (!order && ii[i + 1] != ii[i]) { 492 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 493 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 494 } 495 } 496 PetscCall(PetscBTDestroy(&btee)); 497 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 498 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 499 if (!conforming) { 500 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 501 PetscCall(MatDestroy(&lGt)); 502 } 503 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 504 505 /* identify splitpoints and corner candidates */ 506 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 507 if (print) { 508 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 509 PetscCall(MatView(lGe, NULL)); 510 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 511 PetscCall(MatView(lGt, NULL)); 512 } 513 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 514 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 515 for (i = 0; i < nv; i++) { 516 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 517 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 518 if (!order) { /* variable order */ 519 PetscReal vorder = 0.; 520 521 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 522 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 523 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 524 ord = 1; 525 } 526 PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord); 527 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 528 if (PetscBTLookup(btbd, jj[j])) { 529 bdir = PETSC_TRUE; 530 break; 531 } 532 if (vc != ecount[jj[j]]) { 533 sneighs = PETSC_FALSE; 534 } else { 535 PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]]; 536 for (k = 0; k < vc; k++) { 537 if (vn[k] != en[k]) { 538 sneighs = PETSC_FALSE; 539 break; 540 } 541 } 542 } 543 } 544 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 545 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 546 PetscCall(PetscBTSet(btv, i)); 547 } else if (test == ord) { 548 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 549 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 550 PetscCall(PetscBTSet(btv, i)); 551 } else { 552 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 553 PetscCall(PetscBTSet(btvcand, i)); 554 } 555 } 556 } 557 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 558 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 559 PetscCall(PetscBTDestroy(&btbd)); 560 561 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 562 if (order != 1) { 563 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 564 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 565 for (i = 0; i < nv; i++) { 566 if (PetscBTLookup(btvcand, i)) { 567 PetscBool found = PETSC_FALSE; 568 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 569 PetscInt k, e = jj[j]; 570 if (PetscBTLookup(bte, e)) continue; 571 for (k = iit[e]; k < iit[e + 1]; k++) { 572 PetscInt v = jjt[k]; 573 if (v != i && PetscBTLookup(btvcand, v)) { 574 found = PETSC_TRUE; 575 break; 576 } 577 } 578 } 579 if (!found) { 580 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 581 PetscCall(PetscBTClear(btvcand, i)); 582 } else { 583 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 584 } 585 } 586 } 587 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 588 } 589 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 590 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 591 PetscCall(MatDestroy(&lGe)); 592 593 /* Get the local G^T explicitly */ 594 PetscCall(MatDestroy(&lGt)); 595 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 596 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 597 598 /* Mark interior nodal dofs */ 599 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 600 PetscCall(PetscBTCreate(nv, &btvi)); 601 for (i = 1; i < n_neigh; i++) { 602 for (j = 0; j < n_shared[i]; j++) PetscCall(PetscBTSet(btvi, shared[i][j])); 603 } 604 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 605 606 /* communicate corners and splitpoints */ 607 PetscCall(PetscMalloc1(nv, &vmarks)); 608 PetscCall(PetscArrayzero(sfvleaves, nv)); 609 PetscCall(PetscArrayzero(sfvroots, Lv)); 610 for (i = 0; i < nv; i++) 611 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 612 613 if (print) { 614 IS tbz; 615 616 cum = 0; 617 for (i = 0; i < nv; i++) 618 if (sfvleaves[i]) vmarks[cum++] = i; 619 620 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 621 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 622 PetscCall(ISView(tbz, NULL)); 623 PetscCall(ISDestroy(&tbz)); 624 } 625 626 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 627 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 628 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 629 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 630 631 /* Zero rows of lGt corresponding to identified corners 632 and interior nodal dofs */ 633 cum = 0; 634 for (i = 0; i < nv; i++) { 635 if (sfvleaves[i]) { 636 vmarks[cum++] = i; 637 PetscCall(PetscBTSet(btv, i)); 638 } 639 if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 640 } 641 PetscCall(PetscBTDestroy(&btvi)); 642 if (print) { 643 IS tbz; 644 645 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 646 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 647 PetscCall(ISView(tbz, NULL)); 648 PetscCall(ISDestroy(&tbz)); 649 } 650 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 651 PetscCall(PetscFree(vmarks)); 652 PetscCall(PetscSFDestroy(&sfv)); 653 PetscCall(PetscFree2(sfvleaves, sfvroots)); 654 655 /* Recompute G */ 656 PetscCall(MatDestroy(&lG)); 657 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 658 if (print) { 659 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 660 PetscCall(MatView(lG, NULL)); 661 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 662 PetscCall(MatView(lGt, NULL)); 663 } 664 665 /* Get primal dofs (if any) */ 666 cum = 0; 667 for (i = 0; i < ne; i++) { 668 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 669 } 670 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 671 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 672 if (print) { 673 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 674 PetscCall(ISView(primals, NULL)); 675 } 676 PetscCall(PetscBTDestroy(&bte)); 677 /* TODO: what if the user passed in some of them ? */ 678 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 679 PetscCall(ISDestroy(&primals)); 680 681 /* Compute edge connectivity */ 682 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 683 684 /* Symbolic conn = lG*lGt */ 685 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 686 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 687 PetscCall(MatProductSetAlgorithm(conn, "default")); 688 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 689 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 690 PetscCall(MatProductSetFromOptions(conn)); 691 PetscCall(MatProductSymbolic(conn)); 692 693 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 694 if (fl2g) { 695 PetscBT btf; 696 PetscInt *iia, *jja, *iiu, *jju; 697 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 698 699 /* create CSR for all local dofs */ 700 PetscCall(PetscMalloc1(n + 1, &iia)); 701 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 702 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 703 iiu = pcbddc->mat_graph->xadj; 704 jju = pcbddc->mat_graph->adjncy; 705 } else if (pcbddc->use_local_adj) { 706 rest = PETSC_TRUE; 707 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 708 } else { 709 free = PETSC_TRUE; 710 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 711 iiu[0] = 0; 712 for (i = 0; i < n; i++) { 713 iiu[i + 1] = i + 1; 714 jju[i] = -1; 715 } 716 } 717 718 /* import sizes of CSR */ 719 iia[0] = 0; 720 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 721 722 /* overwrite entries corresponding to the Nedelec field */ 723 PetscCall(PetscBTCreate(n, &btf)); 724 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 725 for (i = 0; i < ne; i++) { 726 PetscCall(PetscBTSet(btf, idxs[i])); 727 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 728 } 729 730 /* iia in CSR */ 731 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 732 733 /* jja in CSR */ 734 PetscCall(PetscMalloc1(iia[n], &jja)); 735 for (i = 0; i < n; i++) 736 if (!PetscBTLookup(btf, i)) 737 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 738 739 /* map edge dofs connectivity */ 740 if (jj) { 741 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 742 for (i = 0; i < ne; i++) { 743 PetscInt e = idxs[i]; 744 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 745 } 746 } 747 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 748 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER)); 749 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 750 if (free) PetscCall(PetscFree2(iiu, jju)); 751 PetscCall(PetscBTDestroy(&btf)); 752 } else { 753 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER)); 754 } 755 756 /* Analyze interface for edge dofs */ 757 PetscCall(PCBDDCAnalyzeInterface(pc)); 758 pcbddc->mat_graph->twodim = PETSC_FALSE; 759 760 /* Get coarse edges in the edge space */ 761 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 762 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 763 764 if (fl2g) { 765 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 766 PetscCall(PetscMalloc1(nee, &eedges)); 767 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 768 } else { 769 eedges = alleedges; 770 primals = allprimals; 771 } 772 773 /* Mark fine edge dofs with their coarse edge id */ 774 PetscCall(PetscArrayzero(marks, ne)); 775 PetscCall(ISGetLocalSize(primals, &cum)); 776 PetscCall(ISGetIndices(primals, &idxs)); 777 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 778 PetscCall(ISRestoreIndices(primals, &idxs)); 779 if (print) { 780 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 781 PetscCall(ISView(primals, NULL)); 782 } 783 784 maxsize = 0; 785 for (i = 0; i < nee; i++) { 786 PetscInt size, mark = i + 1; 787 788 PetscCall(ISGetLocalSize(eedges[i], &size)); 789 PetscCall(ISGetIndices(eedges[i], &idxs)); 790 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 791 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 792 maxsize = PetscMax(maxsize, size); 793 } 794 795 /* Find coarse edge endpoints */ 796 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 797 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 798 for (i = 0; i < nee; i++) { 799 PetscInt mark = i + 1, size; 800 801 PetscCall(ISGetLocalSize(eedges[i], &size)); 802 if (!size && nedfieldlocal) continue; 803 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 804 PetscCall(ISGetIndices(eedges[i], &idxs)); 805 if (print) { 806 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 807 PetscCall(ISView(eedges[i], NULL)); 808 } 809 for (j = 0; j < size; j++) { 810 PetscInt k, ee = idxs[j]; 811 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 812 for (k = ii[ee]; k < ii[ee + 1]; k++) { 813 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 814 if (PetscBTLookup(btv, jj[k])) { 815 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 816 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 817 PetscInt k2; 818 PetscBool corner = PETSC_FALSE; 819 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 820 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 821 /* it's a corner if either is connected with an edge dof belonging to a different cc or 822 if the edge dof lie on the natural part of the boundary */ 823 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 824 corner = PETSC_TRUE; 825 break; 826 } 827 } 828 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 829 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 830 PetscCall(PetscBTSet(btv, jj[k])); 831 } else { 832 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 833 } 834 } 835 } 836 } 837 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 838 } 839 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 840 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 841 PetscCall(PetscBTDestroy(&btb)); 842 843 /* Reset marked primal dofs */ 844 PetscCall(ISGetLocalSize(primals, &cum)); 845 PetscCall(ISGetIndices(primals, &idxs)); 846 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 847 PetscCall(ISRestoreIndices(primals, &idxs)); 848 849 /* Now use the initial lG */ 850 PetscCall(MatDestroy(&lG)); 851 PetscCall(MatDestroy(&lGt)); 852 lG = lGinit; 853 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 854 855 /* Compute extended cols indices */ 856 PetscCall(PetscBTCreate(nv, &btvc)); 857 PetscCall(PetscBTCreate(nee, &bter)); 858 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 859 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 860 i *= maxsize; 861 PetscCall(PetscCalloc1(nee, &extcols)); 862 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 863 eerr = PETSC_FALSE; 864 for (i = 0; i < nee; i++) { 865 PetscInt size, found = 0; 866 867 cum = 0; 868 PetscCall(ISGetLocalSize(eedges[i], &size)); 869 if (!size && nedfieldlocal) continue; 870 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 871 PetscCall(ISGetIndices(eedges[i], &idxs)); 872 PetscCall(PetscBTMemzero(nv, btvc)); 873 for (j = 0; j < size; j++) { 874 PetscInt k, ee = idxs[j]; 875 for (k = ii[ee]; k < ii[ee + 1]; k++) { 876 PetscInt vv = jj[k]; 877 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 878 else if (!PetscBTLookupSet(btvc, vv)) found++; 879 } 880 } 881 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 882 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 883 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 884 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 885 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 886 /* it may happen that endpoints are not defined at this point 887 if it is the case, mark this edge for a second pass */ 888 if (cum != size - 1 || found != 2) { 889 PetscCall(PetscBTSet(bter, i)); 890 if (print) { 891 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 892 PetscCall(ISView(eedges[i], NULL)); 893 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 894 PetscCall(ISView(extcols[i], NULL)); 895 } 896 eerr = PETSC_TRUE; 897 } 898 } 899 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 900 PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 901 if (done) { 902 PetscInt *newprimals; 903 904 PetscCall(PetscMalloc1(ne, &newprimals)); 905 PetscCall(ISGetLocalSize(primals, &cum)); 906 PetscCall(ISGetIndices(primals, &idxs)); 907 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 908 PetscCall(ISRestoreIndices(primals, &idxs)); 909 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 910 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 911 for (i = 0; i < nee; i++) { 912 PetscBool has_candidates = PETSC_FALSE; 913 if (PetscBTLookup(bter, i)) { 914 PetscInt size, mark = i + 1; 915 916 PetscCall(ISGetLocalSize(eedges[i], &size)); 917 PetscCall(ISGetIndices(eedges[i], &idxs)); 918 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 919 for (j = 0; j < size; j++) { 920 PetscInt k, ee = idxs[j]; 921 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 922 for (k = ii[ee]; k < ii[ee + 1]; k++) { 923 /* set all candidates located on the edge as corners */ 924 if (PetscBTLookup(btvcand, jj[k])) { 925 PetscInt k2, vv = jj[k]; 926 has_candidates = PETSC_TRUE; 927 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 928 PetscCall(PetscBTSet(btv, vv)); 929 /* set all edge dofs connected to candidate as primals */ 930 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 931 if (marks[jjt[k2]] == mark) { 932 PetscInt k3, ee2 = jjt[k2]; 933 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 934 newprimals[cum++] = ee2; 935 /* finally set the new corners */ 936 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 937 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 938 PetscCall(PetscBTSet(btv, jj[k3])); 939 } 940 } 941 } 942 } else { 943 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 944 } 945 } 946 } 947 if (!has_candidates) { /* circular edge */ 948 PetscInt k, ee = idxs[0], *tmarks; 949 950 PetscCall(PetscCalloc1(ne, &tmarks)); 951 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 952 for (k = ii[ee]; k < ii[ee + 1]; k++) { 953 PetscInt k2; 954 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 955 PetscCall(PetscBTSet(btv, jj[k])); 956 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 957 } 958 for (j = 0; j < size; j++) { 959 if (tmarks[idxs[j]] > 1) { 960 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 961 newprimals[cum++] = idxs[j]; 962 } 963 } 964 PetscCall(PetscFree(tmarks)); 965 } 966 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 967 } 968 PetscCall(ISDestroy(&extcols[i])); 969 } 970 PetscCall(PetscFree(extcols)); 971 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 972 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 973 if (fl2g) { 974 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 975 PetscCall(ISDestroy(&primals)); 976 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 977 PetscCall(PetscFree(eedges)); 978 } 979 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 980 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 981 PetscCall(PetscFree(newprimals)); 982 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 983 PetscCall(ISDestroy(&primals)); 984 PetscCall(PCBDDCAnalyzeInterface(pc)); 985 pcbddc->mat_graph->twodim = PETSC_FALSE; 986 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 987 if (fl2g) { 988 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 989 PetscCall(PetscMalloc1(nee, &eedges)); 990 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 991 } else { 992 eedges = alleedges; 993 primals = allprimals; 994 } 995 PetscCall(PetscCalloc1(nee, &extcols)); 996 997 /* Mark again */ 998 PetscCall(PetscArrayzero(marks, ne)); 999 for (i = 0; i < nee; i++) { 1000 PetscInt size, mark = i + 1; 1001 1002 PetscCall(ISGetLocalSize(eedges[i], &size)); 1003 PetscCall(ISGetIndices(eedges[i], &idxs)); 1004 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1005 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1006 } 1007 if (print) { 1008 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1009 PetscCall(ISView(primals, NULL)); 1010 } 1011 1012 /* Recompute extended cols */ 1013 eerr = PETSC_FALSE; 1014 for (i = 0; i < nee; i++) { 1015 PetscInt size; 1016 1017 cum = 0; 1018 PetscCall(ISGetLocalSize(eedges[i], &size)); 1019 if (!size && nedfieldlocal) continue; 1020 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1021 PetscCall(ISGetIndices(eedges[i], &idxs)); 1022 for (j = 0; j < size; j++) { 1023 PetscInt k, ee = idxs[j]; 1024 for (k = ii[ee]; k < ii[ee + 1]; k++) 1025 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1026 } 1027 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1028 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1029 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1030 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1031 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1032 if (cum != size - 1) { 1033 if (print) { 1034 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1035 PetscCall(ISView(eedges[i], NULL)); 1036 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1037 PetscCall(ISView(extcols[i], NULL)); 1038 } 1039 eerr = PETSC_TRUE; 1040 } 1041 } 1042 } 1043 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1044 PetscCall(PetscFree2(extrow, gidxs)); 1045 PetscCall(PetscBTDestroy(&bter)); 1046 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1047 /* an error should not occur at this point */ 1048 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1049 1050 /* Check the number of endpoints */ 1051 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1052 PetscCall(PetscMalloc1(2 * nee, &corners)); 1053 PetscCall(PetscMalloc1(nee, &cedges)); 1054 for (i = 0; i < nee; i++) { 1055 PetscInt size, found = 0, gc[2]; 1056 1057 /* init with defaults */ 1058 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1059 PetscCall(ISGetLocalSize(eedges[i], &size)); 1060 if (!size && nedfieldlocal) continue; 1061 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1062 PetscCall(ISGetIndices(eedges[i], &idxs)); 1063 PetscCall(PetscBTMemzero(nv, btvc)); 1064 for (j = 0; j < size; j++) { 1065 PetscInt k, ee = idxs[j]; 1066 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1067 PetscInt vv = jj[k]; 1068 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1069 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1070 corners[i * 2 + found++] = vv; 1071 } 1072 } 1073 } 1074 if (found != 2) { 1075 PetscInt e; 1076 if (fl2g) { 1077 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1078 } else { 1079 e = idxs[0]; 1080 } 1081 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1082 } 1083 1084 /* get primal dof index on this coarse edge */ 1085 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1086 if (gc[0] > gc[1]) { 1087 PetscInt swap = corners[2 * i]; 1088 corners[2 * i] = corners[2 * i + 1]; 1089 corners[2 * i + 1] = swap; 1090 } 1091 cedges[i] = idxs[size - 1]; 1092 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1093 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1094 } 1095 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1096 PetscCall(PetscBTDestroy(&btvc)); 1097 1098 if (PetscDefined(USE_DEBUG)) { 1099 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1100 not interfere with neighbouring coarse edges */ 1101 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1102 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1103 for (i = 0; i < nv; i++) { 1104 PetscInt emax = 0, eemax = 0; 1105 1106 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1107 PetscCall(PetscArrayzero(emarks, nee + 1)); 1108 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1109 for (j = 1; j < nee + 1; j++) { 1110 if (emax < emarks[j]) { 1111 emax = emarks[j]; 1112 eemax = j; 1113 } 1114 } 1115 /* not relevant for edges */ 1116 if (!eemax) continue; 1117 1118 for (j = ii[i]; j < ii[i + 1]; j++) { 1119 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1120 } 1121 } 1122 PetscCall(PetscFree(emarks)); 1123 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1124 } 1125 1126 /* Compute extended rows indices for edge blocks of the change of basis */ 1127 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1128 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1129 extmem *= maxsize; 1130 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1131 PetscCall(PetscMalloc1(nee, &extrows)); 1132 PetscCall(PetscCalloc1(nee, &extrowcum)); 1133 for (i = 0; i < nv; i++) { 1134 PetscInt mark = 0, size, start; 1135 1136 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1137 for (j = ii[i]; j < ii[i + 1]; j++) 1138 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1139 1140 /* not relevant */ 1141 if (!mark) continue; 1142 1143 /* import extended row */ 1144 mark--; 1145 start = mark * extmem + extrowcum[mark]; 1146 size = ii[i + 1] - ii[i]; 1147 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1148 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1149 extrowcum[mark] += size; 1150 } 1151 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1152 PetscCall(MatDestroy(&lGt)); 1153 PetscCall(PetscFree(marks)); 1154 1155 /* Compress extrows */ 1156 cum = 0; 1157 for (i = 0; i < nee; i++) { 1158 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1159 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1160 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1161 cum = PetscMax(cum, size); 1162 } 1163 PetscCall(PetscFree(extrowcum)); 1164 PetscCall(PetscBTDestroy(&btv)); 1165 PetscCall(PetscBTDestroy(&btvcand)); 1166 1167 /* Workspace for lapack inner calls and VecSetValues */ 1168 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1169 1170 /* Create change of basis matrix (preallocation can be improved) */ 1171 PetscCall(MatCreate(comm, &T)); 1172 PetscCall(MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N)); 1173 PetscCall(MatSetType(T, MATAIJ)); 1174 PetscCall(MatSeqAIJSetPreallocation(T, 10, NULL)); 1175 PetscCall(MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL)); 1176 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1177 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1178 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1179 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1180 1181 /* Defaults to identity */ 1182 PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL)); 1183 PetscCall(VecSet(tvec, 1.0)); 1184 PetscCall(MatDiagonalSet(T, tvec, INSERT_VALUES)); 1185 PetscCall(VecDestroy(&tvec)); 1186 1187 /* Create discrete gradient for the coarser level if needed */ 1188 PetscCall(MatDestroy(&pcbddc->nedcG)); 1189 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1190 if (pcbddc->current_level < pcbddc->max_levels) { 1191 ISLocalToGlobalMapping cel2g, cvl2g; 1192 IS wis, gwis; 1193 PetscInt cnv, cne; 1194 1195 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1196 if (fl2g) { 1197 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1198 } else { 1199 PetscCall(PetscObjectReference((PetscObject)wis)); 1200 pcbddc->nedclocal = wis; 1201 } 1202 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1203 PetscCall(ISDestroy(&wis)); 1204 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1205 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1206 PetscCall(ISDestroy(&wis)); 1207 PetscCall(ISDestroy(&gwis)); 1208 1209 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1210 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1211 PetscCall(ISDestroy(&wis)); 1212 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1213 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1214 PetscCall(ISDestroy(&wis)); 1215 PetscCall(ISDestroy(&gwis)); 1216 1217 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1218 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1219 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1220 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1221 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1222 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1223 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1224 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1225 } 1226 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1227 1228 #if defined(PRINT_GDET) 1229 inc = 0; 1230 lev = pcbddc->current_level; 1231 #endif 1232 1233 /* Insert values in the change of basis matrix */ 1234 for (i = 0; i < nee; i++) { 1235 Mat Gins = NULL, GKins = NULL; 1236 IS cornersis = NULL; 1237 PetscScalar cvals[2]; 1238 1239 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1240 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1241 if (Gins && GKins) { 1242 const PetscScalar *data; 1243 const PetscInt *rows, *cols; 1244 PetscInt nrh, nch, nrc, ncc; 1245 1246 PetscCall(ISGetIndices(eedges[i], &cols)); 1247 /* H1 */ 1248 PetscCall(ISGetIndices(extrows[i], &rows)); 1249 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1250 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1251 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1252 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1253 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1254 /* complement */ 1255 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1256 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1257 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1258 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1259 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1260 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1261 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1262 1263 /* coarse discrete gradient */ 1264 if (pcbddc->nedcG) { 1265 PetscInt cols[2]; 1266 1267 cols[0] = 2 * i; 1268 cols[1] = 2 * i + 1; 1269 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1270 } 1271 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1272 } 1273 PetscCall(ISDestroy(&extrows[i])); 1274 PetscCall(ISDestroy(&extcols[i])); 1275 PetscCall(ISDestroy(&cornersis)); 1276 PetscCall(MatDestroy(&Gins)); 1277 PetscCall(MatDestroy(&GKins)); 1278 } 1279 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1280 1281 /* Start assembling */ 1282 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1283 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1284 1285 /* Free */ 1286 if (fl2g) { 1287 PetscCall(ISDestroy(&primals)); 1288 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1289 PetscCall(PetscFree(eedges)); 1290 } 1291 1292 /* hack mat_graph with primal dofs on the coarse edges */ 1293 { 1294 PCBDDCGraph graph = pcbddc->mat_graph; 1295 PetscInt *oqueue = graph->queue; 1296 PetscInt *ocptr = graph->cptr; 1297 PetscInt ncc, *idxs; 1298 1299 /* find first primal edge */ 1300 if (pcbddc->nedclocal) { 1301 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1302 } else { 1303 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1304 idxs = cedges; 1305 } 1306 cum = 0; 1307 while (cum < nee && cedges[cum] < 0) cum++; 1308 1309 /* adapt connected components */ 1310 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1311 graph->cptr[0] = 0; 1312 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1313 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1314 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1315 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1316 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1317 ncc++; 1318 lc--; 1319 cum++; 1320 while (cum < nee && cedges[cum] < 0) cum++; 1321 } 1322 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1323 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1324 ncc++; 1325 } 1326 graph->ncc = ncc; 1327 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1328 PetscCall(PetscFree2(ocptr, oqueue)); 1329 } 1330 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1331 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1332 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1333 PetscCall(MatDestroy(&conn)); 1334 1335 PetscCall(ISDestroy(&nedfieldlocal)); 1336 PetscCall(PetscFree(extrow)); 1337 PetscCall(PetscFree2(work, rwork)); 1338 PetscCall(PetscFree(corners)); 1339 PetscCall(PetscFree(cedges)); 1340 PetscCall(PetscFree(extrows)); 1341 PetscCall(PetscFree(extcols)); 1342 PetscCall(MatDestroy(&lG)); 1343 1344 /* Complete assembling */ 1345 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1346 if (pcbddc->nedcG) { 1347 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1348 #if 0 1349 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1350 PetscCall(MatView(pcbddc->nedcG,NULL)); 1351 #endif 1352 } 1353 1354 /* set change of basis */ 1355 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, singular)); 1356 PetscCall(MatDestroy(&T)); 1357 1358 PetscFunctionReturn(PETSC_SUCCESS); 1359 } 1360 1361 /* the near-null space of BDDC carries information on quadrature weights, 1362 and these can be collinear -> so cheat with MatNullSpaceCreate 1363 and create a suitable set of basis vectors first */ 1364 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1365 { 1366 PetscInt i; 1367 1368 PetscFunctionBegin; 1369 for (i = 0; i < nvecs; i++) { 1370 PetscInt first, last; 1371 1372 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1373 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1374 if (i >= first && i < last) { 1375 PetscScalar *data; 1376 PetscCall(VecGetArray(quad_vecs[i], &data)); 1377 if (!has_const) { 1378 data[i - first] = 1.; 1379 } else { 1380 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1381 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1382 } 1383 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1384 } 1385 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1386 } 1387 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1388 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1389 PetscInt first, last; 1390 PetscCall(VecLockReadPop(quad_vecs[i])); 1391 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1392 if (i >= first && i < last) { 1393 PetscScalar *data; 1394 PetscCall(VecGetArray(quad_vecs[i], &data)); 1395 if (!has_const) { 1396 data[i - first] = 0.; 1397 } else { 1398 data[2 * i - first] = 0.; 1399 data[2 * i - first + 1] = 0.; 1400 } 1401 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1402 } 1403 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1404 PetscCall(VecLockReadPush(quad_vecs[i])); 1405 } 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1410 { 1411 Mat loc_divudotp; 1412 Vec p, v, vins, quad_vec, *quad_vecs; 1413 ISLocalToGlobalMapping map; 1414 PetscScalar *vals; 1415 const PetscScalar *array; 1416 PetscInt i, maxneighs = 0, maxsize, *gidxs; 1417 PetscInt n_neigh, *neigh, *n_shared, **shared; 1418 PetscMPIInt rank; 1419 1420 PetscFunctionBegin; 1421 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1422 for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs); 1423 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A))); 1424 if (!maxneighs) { 1425 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1426 *nnsp = NULL; 1427 PetscFunctionReturn(PETSC_SUCCESS); 1428 } 1429 maxsize = 0; 1430 for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize); 1431 PetscCall(PetscMalloc2(maxsize, &gidxs, maxsize, &vals)); 1432 /* create vectors to hold quadrature weights */ 1433 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1434 if (!transpose) { 1435 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1436 } else { 1437 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1438 } 1439 PetscCall(VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs)); 1440 PetscCall(VecDestroy(&quad_vec)); 1441 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp)); 1442 for (i = 0; i < maxneighs; i++) PetscCall(VecLockReadPop(quad_vecs[i])); 1443 1444 /* compute local quad vec */ 1445 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1446 if (!transpose) { 1447 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1448 } else { 1449 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1450 } 1451 PetscCall(VecSet(p, 1.)); 1452 if (!transpose) { 1453 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1454 } else { 1455 PetscCall(MatMult(loc_divudotp, p, v)); 1456 } 1457 if (vl2l) { 1458 Mat lA; 1459 VecScatter sc; 1460 1461 PetscCall(MatISGetLocalMat(A, &lA)); 1462 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1463 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1464 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1466 PetscCall(VecScatterDestroy(&sc)); 1467 } else { 1468 vins = v; 1469 } 1470 PetscCall(VecGetArrayRead(vins, &array)); 1471 PetscCall(VecDestroy(&p)); 1472 1473 /* insert in global quadrature vecs */ 1474 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1475 for (i = 1; i < n_neigh; i++) { 1476 const PetscInt *idxs; 1477 PetscInt idx, nn, j; 1478 1479 idxs = shared[i]; 1480 nn = n_shared[i]; 1481 for (j = 0; j < nn; j++) vals[j] = array[idxs[j]]; 1482 PetscCall(PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx)); 1483 idx = -(idx + 1); 1484 PetscCheck(idx >= 0 && idx < maxneighs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")", idx, maxneighs); 1485 PetscCall(ISLocalToGlobalMappingApply(map, nn, idxs, gidxs)); 1486 PetscCall(VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES)); 1487 } 1488 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1489 PetscCall(VecRestoreArrayRead(vins, &array)); 1490 if (vl2l) PetscCall(VecDestroy(&vins)); 1491 PetscCall(VecDestroy(&v)); 1492 PetscCall(PetscFree2(gidxs, vals)); 1493 1494 /* assemble near null space */ 1495 for (i = 0; i < maxneighs; i++) PetscCall(VecAssemblyBegin(quad_vecs[i])); 1496 for (i = 0; i < maxneighs; i++) { 1497 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1498 PetscCall(VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view")); 1499 PetscCall(VecLockReadPush(quad_vecs[i])); 1500 } 1501 PetscCall(VecDestroyVecs(maxneighs, &quad_vecs)); 1502 PetscFunctionReturn(PETSC_SUCCESS); 1503 } 1504 1505 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1506 { 1507 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1508 1509 PetscFunctionBegin; 1510 if (primalv) { 1511 if (pcbddc->user_primal_vertices_local) { 1512 IS list[2], newp; 1513 1514 list[0] = primalv; 1515 list[1] = pcbddc->user_primal_vertices_local; 1516 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1517 PetscCall(ISSortRemoveDups(newp)); 1518 PetscCall(ISDestroy(&list[1])); 1519 pcbddc->user_primal_vertices_local = newp; 1520 } else { 1521 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1522 } 1523 } 1524 PetscFunctionReturn(PETSC_SUCCESS); 1525 } 1526 1527 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1528 { 1529 PetscInt f, *comp = (PetscInt *)ctx; 1530 1531 PetscFunctionBegin; 1532 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1533 PetscFunctionReturn(PETSC_SUCCESS); 1534 } 1535 1536 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1537 { 1538 Vec local, global; 1539 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1540 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1541 PetscBool monolithic = PETSC_FALSE; 1542 1543 PetscFunctionBegin; 1544 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1545 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1546 PetscOptionsEnd(); 1547 /* need to convert from global to local topology information and remove references to information in global ordering */ 1548 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1549 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1550 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1551 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1552 if (monolithic) { /* just get block size to properly compute vertices */ 1553 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1554 goto boundary; 1555 } 1556 1557 if (pcbddc->user_provided_isfordofs) { 1558 if (pcbddc->n_ISForDofs) { 1559 PetscInt i; 1560 1561 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1562 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1563 PetscInt bs; 1564 1565 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1566 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1567 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1568 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1569 } 1570 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1571 pcbddc->n_ISForDofs = 0; 1572 PetscCall(PetscFree(pcbddc->ISForDofs)); 1573 } 1574 } else { 1575 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1576 DM dm; 1577 1578 PetscCall(MatGetDM(pc->pmat, &dm)); 1579 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1580 if (dm) { 1581 IS *fields; 1582 PetscInt nf, i; 1583 1584 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1585 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1586 for (i = 0; i < nf; i++) { 1587 PetscInt bs; 1588 1589 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1590 PetscCall(ISGetBlockSize(fields[i], &bs)); 1591 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1592 PetscCall(ISDestroy(&fields[i])); 1593 } 1594 PetscCall(PetscFree(fields)); 1595 pcbddc->n_ISForDofsLocal = nf; 1596 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1597 PetscContainer c; 1598 1599 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1600 if (c) { 1601 MatISLocalFields lf; 1602 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1603 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1604 } else { /* fallback, create the default fields if bs > 1 */ 1605 PetscInt i, n = matis->A->rmap->n; 1606 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1607 if (i > 1) { 1608 pcbddc->n_ISForDofsLocal = i; 1609 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1610 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1611 } 1612 } 1613 } 1614 } else { 1615 PetscInt i; 1616 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1617 } 1618 } 1619 1620 boundary: 1621 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1622 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1623 } else if (pcbddc->DirichletBoundariesLocal) { 1624 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1625 } 1626 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1627 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1628 } else if (pcbddc->NeumannBoundariesLocal) { 1629 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1630 } 1631 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1632 PetscCall(VecDestroy(&global)); 1633 PetscCall(VecDestroy(&local)); 1634 /* detect local disconnected subdomains if requested (use matis->A) */ 1635 if (pcbddc->detect_disconnected) { 1636 IS primalv = NULL; 1637 PetscInt i; 1638 PetscBool filter = pcbddc->detect_disconnected_filter; 1639 1640 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1641 PetscCall(PetscFree(pcbddc->local_subs)); 1642 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1643 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1644 PetscCall(ISDestroy(&primalv)); 1645 } 1646 /* early stage corner detection */ 1647 { 1648 DM dm; 1649 1650 PetscCall(MatGetDM(pc->pmat, &dm)); 1651 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1652 if (dm) { 1653 PetscBool isda; 1654 1655 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1656 if (isda) { 1657 ISLocalToGlobalMapping l2l; 1658 IS corners; 1659 Mat lA; 1660 PetscBool gl, lo; 1661 1662 { 1663 Vec cvec; 1664 const PetscScalar *coords; 1665 PetscInt dof, n, cdim; 1666 PetscBool memc = PETSC_TRUE; 1667 1668 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1669 PetscCall(DMGetCoordinates(dm, &cvec)); 1670 PetscCall(VecGetLocalSize(cvec, &n)); 1671 PetscCall(VecGetBlockSize(cvec, &cdim)); 1672 n /= cdim; 1673 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1674 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1675 PetscCall(VecGetArrayRead(cvec, &coords)); 1676 #if defined(PETSC_USE_COMPLEX) 1677 memc = PETSC_FALSE; 1678 #endif 1679 if (dof != 1) memc = PETSC_FALSE; 1680 if (memc) { 1681 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1682 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1683 PetscReal *bcoords = pcbddc->mat_graph->coords; 1684 PetscInt i, b, d; 1685 1686 for (i = 0; i < n; i++) { 1687 for (b = 0; b < dof; b++) { 1688 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1689 } 1690 } 1691 } 1692 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1693 pcbddc->mat_graph->cdim = cdim; 1694 pcbddc->mat_graph->cnloc = dof * n; 1695 pcbddc->mat_graph->cloc = PETSC_FALSE; 1696 } 1697 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1698 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1699 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1700 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1701 lo = (PetscBool)(l2l && corners); 1702 PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1703 if (gl) { /* From PETSc's DMDA */ 1704 const PetscInt *idx; 1705 PetscInt dof, bs, *idxout, n; 1706 1707 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1708 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1709 PetscCall(ISGetLocalSize(corners, &n)); 1710 PetscCall(ISGetIndices(corners, &idx)); 1711 if (bs == dof) { 1712 PetscCall(PetscMalloc1(n, &idxout)); 1713 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1714 } else { /* the original DMDA local-to-local map have been modified */ 1715 PetscInt i, d; 1716 1717 PetscCall(PetscMalloc1(dof * n, &idxout)); 1718 for (i = 0; i < n; i++) 1719 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1720 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1721 1722 bs = 1; 1723 n *= dof; 1724 } 1725 PetscCall(ISRestoreIndices(corners, &idx)); 1726 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1727 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1728 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1729 PetscCall(ISDestroy(&corners)); 1730 pcbddc->corner_selected = PETSC_TRUE; 1731 pcbddc->corner_selection = PETSC_TRUE; 1732 } 1733 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1734 } 1735 } 1736 } 1737 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1738 DM dm; 1739 1740 PetscCall(MatGetDM(pc->pmat, &dm)); 1741 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1742 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1743 Vec vcoords; 1744 PetscSection section; 1745 PetscReal *coords; 1746 PetscInt d, cdim, nl, nf, **ctxs; 1747 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1748 /* debug coordinates */ 1749 PetscViewer viewer; 1750 PetscBool flg; 1751 PetscViewerFormat format; 1752 const char *prefix; 1753 1754 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1755 PetscCall(DMGetLocalSection(dm, §ion)); 1756 PetscCall(PetscSectionGetNumFields(section, &nf)); 1757 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1758 PetscCall(VecGetLocalSize(vcoords, &nl)); 1759 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1760 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1761 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1762 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1763 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1764 1765 /* debug coordinates */ 1766 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1767 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1768 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1769 for (d = 0; d < cdim; d++) { 1770 PetscInt i; 1771 const PetscScalar *v; 1772 char name[16]; 1773 1774 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1775 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d)); 1776 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1777 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1778 if (flg) PetscCall(VecView(vcoords, viewer)); 1779 PetscCall(VecGetArrayRead(vcoords, &v)); 1780 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1781 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1782 } 1783 PetscCall(VecDestroy(&vcoords)); 1784 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1785 PetscCall(PetscFree(coords)); 1786 PetscCall(PetscFree(ctxs[0])); 1787 PetscCall(PetscFree2(funcs, ctxs)); 1788 if (flg) { 1789 PetscCall(PetscViewerPopFormat(viewer)); 1790 PetscCall(PetscOptionsRestoreViewer(&viewer)); 1791 } 1792 } 1793 } 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1798 { 1799 Mat_IS *matis = (Mat_IS *)(pc->pmat->data); 1800 IS nis; 1801 const PetscInt *idxs; 1802 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1803 1804 PetscFunctionBegin; 1805 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1806 if (mop == MPI_LAND) { 1807 /* init rootdata with true */ 1808 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1809 } else { 1810 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1811 } 1812 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1813 PetscCall(ISGetLocalSize(*is, &nd)); 1814 PetscCall(ISGetIndices(*is, &idxs)); 1815 for (i = 0; i < nd; i++) 1816 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1817 PetscCall(ISRestoreIndices(*is, &idxs)); 1818 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1819 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1820 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1821 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1822 if (mop == MPI_LAND) { 1823 PetscCall(PetscMalloc1(nd, &nidxs)); 1824 } else { 1825 PetscCall(PetscMalloc1(n, &nidxs)); 1826 } 1827 for (i = 0, nnd = 0; i < n; i++) 1828 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 1829 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 1830 PetscCall(ISDestroy(is)); 1831 *is = nis; 1832 PetscFunctionReturn(PETSC_SUCCESS); 1833 } 1834 1835 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 1836 { 1837 PC_IS *pcis = (PC_IS *)(pc->data); 1838 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 1839 1840 PetscFunctionBegin; 1841 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 1842 if (pcbddc->ChangeOfBasisMatrix) { 1843 Vec swap; 1844 1845 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 1846 swap = pcbddc->work_change; 1847 pcbddc->work_change = r; 1848 r = swap; 1849 } 1850 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1851 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1852 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1853 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 1854 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1855 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 1856 PetscCall(VecSet(z, 0.)); 1857 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1858 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1859 if (pcbddc->ChangeOfBasisMatrix) { 1860 pcbddc->work_change = r; 1861 PetscCall(VecCopy(z, pcbddc->work_change)); 1862 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 1863 } 1864 PetscFunctionReturn(PETSC_SUCCESS); 1865 } 1866 1867 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1868 { 1869 PCBDDCBenignMatMult_ctx ctx; 1870 PetscBool apply_right, apply_left, reset_x; 1871 1872 PetscFunctionBegin; 1873 PetscCall(MatShellGetContext(A, &ctx)); 1874 if (transpose) { 1875 apply_right = ctx->apply_left; 1876 apply_left = ctx->apply_right; 1877 } else { 1878 apply_right = ctx->apply_right; 1879 apply_left = ctx->apply_left; 1880 } 1881 reset_x = PETSC_FALSE; 1882 if (apply_right) { 1883 const PetscScalar *ax; 1884 PetscInt nl, i; 1885 1886 PetscCall(VecGetLocalSize(x, &nl)); 1887 PetscCall(VecGetArrayRead(x, &ax)); 1888 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 1889 PetscCall(VecRestoreArrayRead(x, &ax)); 1890 for (i = 0; i < ctx->benign_n; i++) { 1891 PetscScalar sum, val; 1892 const PetscInt *idxs; 1893 PetscInt nz, j; 1894 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1895 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1896 sum = 0.; 1897 if (ctx->apply_p0) { 1898 val = ctx->work[idxs[nz - 1]]; 1899 for (j = 0; j < nz - 1; j++) { 1900 sum += ctx->work[idxs[j]]; 1901 ctx->work[idxs[j]] += val; 1902 } 1903 } else { 1904 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 1905 } 1906 ctx->work[idxs[nz - 1]] -= sum; 1907 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1908 } 1909 PetscCall(VecPlaceArray(x, ctx->work)); 1910 reset_x = PETSC_TRUE; 1911 } 1912 if (transpose) { 1913 PetscCall(MatMultTranspose(ctx->A, x, y)); 1914 } else { 1915 PetscCall(MatMult(ctx->A, x, y)); 1916 } 1917 if (reset_x) PetscCall(VecResetArray(x)); 1918 if (apply_left) { 1919 PetscScalar *ay; 1920 PetscInt i; 1921 1922 PetscCall(VecGetArray(y, &ay)); 1923 for (i = 0; i < ctx->benign_n; i++) { 1924 PetscScalar sum, val; 1925 const PetscInt *idxs; 1926 PetscInt nz, j; 1927 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1928 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1929 val = -ay[idxs[nz - 1]]; 1930 if (ctx->apply_p0) { 1931 sum = 0.; 1932 for (j = 0; j < nz - 1; j++) { 1933 sum += ay[idxs[j]]; 1934 ay[idxs[j]] += val; 1935 } 1936 ay[idxs[nz - 1]] += sum; 1937 } else { 1938 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 1939 ay[idxs[nz - 1]] = 0.; 1940 } 1941 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1942 } 1943 PetscCall(VecRestoreArray(y, &ay)); 1944 } 1945 PetscFunctionReturn(PETSC_SUCCESS); 1946 } 1947 1948 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1949 { 1950 PetscFunctionBegin; 1951 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 1952 PetscFunctionReturn(PETSC_SUCCESS); 1953 } 1954 1955 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1956 { 1957 PetscFunctionBegin; 1958 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 1959 PetscFunctionReturn(PETSC_SUCCESS); 1960 } 1961 1962 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1963 { 1964 PC_IS *pcis = (PC_IS *)pc->data; 1965 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1966 PCBDDCBenignMatMult_ctx ctx; 1967 1968 PetscFunctionBegin; 1969 if (!restore) { 1970 Mat A_IB, A_BI; 1971 PetscScalar *work; 1972 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1973 1974 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 1975 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 1976 PetscCall(PetscMalloc1(pcis->n, &work)); 1977 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 1978 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 1979 PetscCall(MatSetType(A_IB, MATSHELL)); 1980 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 1981 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 1982 PetscCall(PetscNew(&ctx)); 1983 PetscCall(MatShellSetContext(A_IB, ctx)); 1984 ctx->apply_left = PETSC_TRUE; 1985 ctx->apply_right = PETSC_FALSE; 1986 ctx->apply_p0 = PETSC_FALSE; 1987 ctx->benign_n = pcbddc->benign_n; 1988 if (reuse) { 1989 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1990 ctx->free = PETSC_FALSE; 1991 } else { /* TODO: could be optimized for successive solves */ 1992 ISLocalToGlobalMapping N_to_D; 1993 PetscInt i; 1994 1995 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 1996 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 1997 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 1998 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 1999 ctx->free = PETSC_TRUE; 2000 } 2001 ctx->A = pcis->A_IB; 2002 ctx->work = work; 2003 PetscCall(MatSetUp(A_IB)); 2004 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2005 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2006 pcis->A_IB = A_IB; 2007 2008 /* A_BI as A_IB^T */ 2009 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2010 pcbddc->benign_original_mat = pcis->A_BI; 2011 pcis->A_BI = A_BI; 2012 } else { 2013 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2014 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2015 PetscCall(MatDestroy(&pcis->A_IB)); 2016 pcis->A_IB = ctx->A; 2017 ctx->A = NULL; 2018 PetscCall(MatDestroy(&pcis->A_BI)); 2019 pcis->A_BI = pcbddc->benign_original_mat; 2020 pcbddc->benign_original_mat = NULL; 2021 if (ctx->free) { 2022 PetscInt i; 2023 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2024 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2025 } 2026 PetscCall(PetscFree(ctx->work)); 2027 PetscCall(PetscFree(ctx)); 2028 } 2029 PetscFunctionReturn(PETSC_SUCCESS); 2030 } 2031 2032 /* used just in bddc debug mode */ 2033 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2034 { 2035 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2036 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2037 Mat An; 2038 2039 PetscFunctionBegin; 2040 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2041 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2042 if (is1) { 2043 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2044 PetscCall(MatDestroy(&An)); 2045 } else { 2046 *B = An; 2047 } 2048 PetscFunctionReturn(PETSC_SUCCESS); 2049 } 2050 2051 /* TODO: add reuse flag */ 2052 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2053 { 2054 Mat Bt; 2055 PetscScalar *a, *bdata; 2056 const PetscInt *ii, *ij; 2057 PetscInt m, n, i, nnz, *bii, *bij; 2058 PetscBool flg_row; 2059 2060 PetscFunctionBegin; 2061 PetscCall(MatGetSize(A, &n, &m)); 2062 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2063 PetscCall(MatSeqAIJGetArray(A, &a)); 2064 nnz = n; 2065 for (i = 0; i < ii[n]; i++) { 2066 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2067 } 2068 PetscCall(PetscMalloc1(n + 1, &bii)); 2069 PetscCall(PetscMalloc1(nnz, &bij)); 2070 PetscCall(PetscMalloc1(nnz, &bdata)); 2071 nnz = 0; 2072 bii[0] = 0; 2073 for (i = 0; i < n; i++) { 2074 PetscInt j; 2075 for (j = ii[i]; j < ii[i + 1]; j++) { 2076 PetscScalar entry = a[j]; 2077 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2078 bij[nnz] = ij[j]; 2079 bdata[nnz] = entry; 2080 nnz++; 2081 } 2082 } 2083 bii[i + 1] = nnz; 2084 } 2085 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2086 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2087 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2088 { 2089 Mat_SeqAIJ *b = (Mat_SeqAIJ *)(Bt->data); 2090 b->free_a = PETSC_TRUE; 2091 b->free_ij = PETSC_TRUE; 2092 } 2093 if (*B == A) PetscCall(MatDestroy(&A)); 2094 *B = Bt; 2095 PetscFunctionReturn(PETSC_SUCCESS); 2096 } 2097 2098 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2099 { 2100 Mat B = NULL; 2101 DM dm; 2102 IS is_dummy, *cc_n; 2103 ISLocalToGlobalMapping l2gmap_dummy; 2104 PCBDDCGraph graph; 2105 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2106 PetscInt i, n; 2107 PetscInt *xadj, *adjncy; 2108 PetscBool isplex = PETSC_FALSE; 2109 2110 PetscFunctionBegin; 2111 if (ncc) *ncc = 0; 2112 if (cc) *cc = NULL; 2113 if (primalv) *primalv = NULL; 2114 PetscCall(PCBDDCGraphCreate(&graph)); 2115 PetscCall(MatGetDM(pc->pmat, &dm)); 2116 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2117 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2118 if (filter) isplex = PETSC_FALSE; 2119 2120 if (isplex) { /* this code has been modified from plexpartition.c */ 2121 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2122 PetscInt *adj = NULL; 2123 IS cellNumbering; 2124 const PetscInt *cellNum; 2125 PetscBool useCone, useClosure; 2126 PetscSection section; 2127 PetscSegBuffer adjBuffer; 2128 PetscSF sfPoint; 2129 2130 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2131 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2132 PetscCall(DMGetPointSF(dm, &sfPoint)); 2133 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2134 /* Build adjacency graph via a section/segbuffer */ 2135 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2136 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2137 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2138 /* Always use FVM adjacency to create partitioner graph */ 2139 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2140 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2141 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2142 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2143 for (n = 0, p = pStart; p < pEnd; p++) { 2144 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2145 if (nroots > 0) { 2146 if (cellNum[p] < 0) continue; 2147 } 2148 adjSize = PETSC_DETERMINE; 2149 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2150 for (a = 0; a < adjSize; ++a) { 2151 const PetscInt point = adj[a]; 2152 if (pStart <= point && point < pEnd) { 2153 PetscInt *PETSC_RESTRICT pBuf; 2154 PetscCall(PetscSectionAddDof(section, p, 1)); 2155 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2156 *pBuf = point; 2157 } 2158 } 2159 n++; 2160 } 2161 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2162 /* Derive CSR graph from section/segbuffer */ 2163 PetscCall(PetscSectionSetUp(section)); 2164 PetscCall(PetscSectionGetStorageSize(section, &size)); 2165 PetscCall(PetscMalloc1(n + 1, &xadj)); 2166 for (idx = 0, p = pStart; p < pEnd; p++) { 2167 if (nroots > 0) { 2168 if (cellNum[p] < 0) continue; 2169 } 2170 PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++]))); 2171 } 2172 xadj[n] = size; 2173 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2174 /* Clean up */ 2175 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2176 PetscCall(PetscSectionDestroy(§ion)); 2177 PetscCall(PetscFree(adj)); 2178 graph->xadj = xadj; 2179 graph->adjncy = adjncy; 2180 } else { 2181 Mat A; 2182 PetscBool isseqaij, flg_row; 2183 2184 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2185 if (!A->rmap->N || !A->cmap->N) { 2186 PetscCall(PCBDDCGraphDestroy(&graph)); 2187 PetscFunctionReturn(PETSC_SUCCESS); 2188 } 2189 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2190 if (!isseqaij && filter) { 2191 PetscBool isseqdense; 2192 2193 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2194 if (!isseqdense) { 2195 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2196 } else { /* TODO: rectangular case and LDA */ 2197 PetscScalar *array; 2198 PetscReal chop = 1.e-6; 2199 2200 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2201 PetscCall(MatDenseGetArray(B, &array)); 2202 PetscCall(MatGetSize(B, &n, NULL)); 2203 for (i = 0; i < n; i++) { 2204 PetscInt j; 2205 for (j = i + 1; j < n; j++) { 2206 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2207 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2208 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2209 } 2210 } 2211 PetscCall(MatDenseRestoreArray(B, &array)); 2212 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2213 } 2214 } else { 2215 PetscCall(PetscObjectReference((PetscObject)A)); 2216 B = A; 2217 } 2218 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2219 2220 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2221 if (filter) { 2222 PetscScalar *data; 2223 PetscInt j, cum; 2224 2225 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2226 PetscCall(MatSeqAIJGetArray(B, &data)); 2227 cum = 0; 2228 for (i = 0; i < n; i++) { 2229 PetscInt t; 2230 2231 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2232 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2233 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2234 } 2235 t = xadj_filtered[i]; 2236 xadj_filtered[i] = cum; 2237 cum += t; 2238 } 2239 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2240 graph->xadj = xadj_filtered; 2241 graph->adjncy = adjncy_filtered; 2242 } else { 2243 graph->xadj = xadj; 2244 graph->adjncy = adjncy; 2245 } 2246 } 2247 /* compute local connected components using PCBDDCGraph */ 2248 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2249 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2250 PetscCall(ISDestroy(&is_dummy)); 2251 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT)); 2252 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2253 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2254 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2255 2256 /* partial clean up */ 2257 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2258 if (B) { 2259 PetscBool flg_row; 2260 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2261 PetscCall(MatDestroy(&B)); 2262 } 2263 if (isplex) { 2264 PetscCall(PetscFree(xadj)); 2265 PetscCall(PetscFree(adjncy)); 2266 } 2267 2268 /* get back data */ 2269 if (isplex) { 2270 if (ncc) *ncc = graph->ncc; 2271 if (cc || primalv) { 2272 Mat A; 2273 PetscBT btv, btvt, btvc; 2274 PetscSection subSection; 2275 PetscInt *ids, cum, cump, *cids, *pids; 2276 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2277 2278 PetscCall(DMGetDimension(dm, &dim)); 2279 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2280 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2281 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2282 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2283 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2284 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2285 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2286 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2287 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2288 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2289 2290 /* First see if we find corners for the subdomains, i.e. a vertex 2291 shared by at least dim subdomain boundary faces. This does not 2292 cover all the possible cases with simplices but it is enough 2293 for tensor cells */ 2294 if (vStart != fStart && dim <= 3) { 2295 for (PetscInt c = cStart; c < cEnd; c++) { 2296 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2297 const PetscInt *faces; 2298 2299 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2300 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2301 PetscCall(DMPlexGetCone(dm, c, &faces)); 2302 for (PetscInt f = 0; f < nf; f++) { 2303 PetscInt nc, ff; 2304 2305 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2306 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2307 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2308 } 2309 if (cnt >= mcnt) { 2310 PetscInt size, *closure = NULL; 2311 2312 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2313 for (PetscInt k = 0; k < 2 * size; k += 2) { 2314 PetscInt v = closure[k]; 2315 if (v >= vStart && v < vEnd) { 2316 PetscInt vsize, *vclosure = NULL; 2317 2318 cnt = 0; 2319 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2320 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2321 PetscInt f = vclosure[vk]; 2322 if (f >= fStart && f < fEnd) { 2323 PetscInt nc, ff; 2324 PetscBool valid = PETSC_FALSE; 2325 2326 for (PetscInt fk = 0; fk < nf; fk++) 2327 if (f == cfaces[fk]) valid = PETSC_TRUE; 2328 if (!valid) continue; 2329 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2330 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2331 if (nc == 1 && f == ff) cnt++; 2332 } 2333 } 2334 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2335 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2336 } 2337 } 2338 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2339 } 2340 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2341 } 2342 } 2343 2344 cids[0] = 0; 2345 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2346 PetscInt j; 2347 2348 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2349 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2350 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2351 2352 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2353 for (k = 0; k < 2 * size; k += 2) { 2354 PetscInt s, pp, p = closure[k], off, dof, cdof; 2355 2356 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2357 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2358 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2359 for (s = 0; s < dof - cdof; s++) { 2360 if (PetscBTLookupSet(btvt, off + s)) continue; 2361 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2362 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2363 else pids[cump++] = off + s; /* cross-vertex */ 2364 } 2365 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2366 if (pp != p) { 2367 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2368 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2369 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2370 for (s = 0; s < dof - cdof; s++) { 2371 if (PetscBTLookupSet(btvt, off + s)) continue; 2372 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2373 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2374 else pids[cump++] = off + s; /* cross-vertex */ 2375 } 2376 } 2377 } 2378 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2379 } 2380 cids[i + 1] = cum; 2381 /* mark dofs as already assigned */ 2382 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2383 } 2384 if (cc) { 2385 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2386 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2387 *cc = cc_n; 2388 } 2389 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2390 PetscCall(PetscFree3(ids, cids, pids)); 2391 PetscCall(PetscBTDestroy(&btv)); 2392 PetscCall(PetscBTDestroy(&btvt)); 2393 PetscCall(PetscBTDestroy(&btvc)); 2394 PetscCall(DMDestroy(&dm)); 2395 } 2396 } else { 2397 if (ncc) *ncc = graph->ncc; 2398 if (cc) { 2399 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2400 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2401 *cc = cc_n; 2402 } 2403 } 2404 /* clean up graph */ 2405 graph->xadj = NULL; 2406 graph->adjncy = NULL; 2407 PetscCall(PCBDDCGraphDestroy(&graph)); 2408 PetscFunctionReturn(PETSC_SUCCESS); 2409 } 2410 2411 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2412 { 2413 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2414 PC_IS *pcis = (PC_IS *)(pc->data); 2415 IS dirIS = NULL; 2416 PetscInt i; 2417 2418 PetscFunctionBegin; 2419 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2420 if (zerodiag) { 2421 Mat A; 2422 Vec vec3_N; 2423 PetscScalar *vals; 2424 const PetscInt *idxs; 2425 PetscInt nz, *count; 2426 2427 /* p0 */ 2428 PetscCall(VecSet(pcis->vec1_N, 0.)); 2429 PetscCall(PetscMalloc1(pcis->n, &vals)); 2430 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2431 PetscCall(ISGetIndices(zerodiag, &idxs)); 2432 for (i = 0; i < nz; i++) vals[i] = 1.; 2433 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2434 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2435 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2436 /* v_I */ 2437 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2438 for (i = 0; i < nz; i++) vals[i] = 0.; 2439 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2440 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2441 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2442 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2443 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2444 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2445 if (dirIS) { 2446 PetscInt n; 2447 2448 PetscCall(ISGetLocalSize(dirIS, &n)); 2449 PetscCall(ISGetIndices(dirIS, &idxs)); 2450 for (i = 0; i < n; i++) vals[i] = 0.; 2451 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2452 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2453 } 2454 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2455 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2456 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2457 PetscCall(VecSet(vec3_N, 0.)); 2458 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2459 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2460 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2461 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2462 PetscCall(PetscFree(vals)); 2463 PetscCall(VecDestroy(&vec3_N)); 2464 2465 /* there should not be any pressure dofs lying on the interface */ 2466 PetscCall(PetscCalloc1(pcis->n, &count)); 2467 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2468 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2469 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2470 PetscCall(ISGetIndices(zerodiag, &idxs)); 2471 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2472 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2473 PetscCall(PetscFree(count)); 2474 } 2475 PetscCall(ISDestroy(&dirIS)); 2476 2477 /* check PCBDDCBenignGetOrSetP0 */ 2478 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2479 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2480 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2481 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2482 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2483 for (i = 0; i < pcbddc->benign_n; i++) { 2484 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2485 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2486 } 2487 PetscFunctionReturn(PETSC_SUCCESS); 2488 } 2489 2490 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2491 { 2492 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2493 Mat_IS *matis = (Mat_IS *)(pc->pmat->data); 2494 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2495 PetscInt nz, n, benign_n, bsp = 1; 2496 PetscInt *interior_dofs, n_interior_dofs, nneu; 2497 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2498 2499 PetscFunctionBegin; 2500 if (reuse) goto project_b0; 2501 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2502 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2503 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2504 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2505 has_null_pressures = PETSC_TRUE; 2506 have_null = PETSC_TRUE; 2507 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2508 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2509 Checks if all the pressure dofs in each subdomain have a zero diagonal 2510 If not, a change of basis on pressures is not needed 2511 since the local Schur complements are already SPD 2512 */ 2513 if (pcbddc->n_ISForDofsLocal) { 2514 IS iP = NULL; 2515 PetscInt p, *pp; 2516 PetscBool flg; 2517 2518 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2519 n = pcbddc->n_ISForDofsLocal; 2520 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2521 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2522 PetscOptionsEnd(); 2523 if (!flg) { 2524 n = 1; 2525 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2526 } 2527 2528 bsp = 0; 2529 for (p = 0; p < n; p++) { 2530 PetscInt bs; 2531 2532 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2533 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2534 bsp += bs; 2535 } 2536 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2537 bsp = 0; 2538 for (p = 0; p < n; p++) { 2539 const PetscInt *idxs; 2540 PetscInt b, bs, npl, *bidxs; 2541 2542 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2543 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2544 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2545 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2546 for (b = 0; b < bs; b++) { 2547 PetscInt i; 2548 2549 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2550 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2551 bsp++; 2552 } 2553 PetscCall(PetscFree(bidxs)); 2554 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2555 } 2556 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2557 2558 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2559 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2560 if (iP) { 2561 IS newpressures; 2562 2563 PetscCall(ISDifference(pressures, iP, &newpressures)); 2564 PetscCall(ISDestroy(&pressures)); 2565 pressures = newpressures; 2566 } 2567 PetscCall(ISSorted(pressures, &sorted)); 2568 if (!sorted) PetscCall(ISSort(pressures)); 2569 PetscCall(PetscFree(pp)); 2570 } 2571 2572 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2573 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2574 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2575 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2576 PetscCall(ISSorted(zerodiag, &sorted)); 2577 if (!sorted) PetscCall(ISSort(zerodiag)); 2578 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2579 zerodiag_save = zerodiag; 2580 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2581 if (!nz) { 2582 if (n) have_null = PETSC_FALSE; 2583 has_null_pressures = PETSC_FALSE; 2584 PetscCall(ISDestroy(&zerodiag)); 2585 } 2586 recompute_zerodiag = PETSC_FALSE; 2587 2588 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2589 zerodiag_subs = NULL; 2590 benign_n = 0; 2591 n_interior_dofs = 0; 2592 interior_dofs = NULL; 2593 nneu = 0; 2594 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2595 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2596 if (checkb) { /* need to compute interior nodes */ 2597 PetscInt n, i, j; 2598 PetscInt n_neigh, *neigh, *n_shared, **shared; 2599 PetscInt *iwork; 2600 2601 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping, &n)); 2602 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2603 PetscCall(PetscCalloc1(n, &iwork)); 2604 PetscCall(PetscMalloc1(n, &interior_dofs)); 2605 for (i = 1; i < n_neigh; i++) 2606 for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1; 2607 for (i = 0; i < n; i++) 2608 if (!iwork[i]) interior_dofs[n_interior_dofs++] = i; 2609 PetscCall(PetscFree(iwork)); 2610 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2611 } 2612 if (has_null_pressures) { 2613 IS *subs; 2614 PetscInt nsubs, i, j, nl; 2615 const PetscInt *idxs; 2616 PetscScalar *array; 2617 Vec *work; 2618 2619 subs = pcbddc->local_subs; 2620 nsubs = pcbddc->n_local_subs; 2621 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2622 if (checkb) { 2623 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2624 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2625 PetscCall(ISGetIndices(zerodiag, &idxs)); 2626 /* work[0] = 1_p */ 2627 PetscCall(VecSet(work[0], 0.)); 2628 PetscCall(VecGetArray(work[0], &array)); 2629 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2630 PetscCall(VecRestoreArray(work[0], &array)); 2631 /* work[0] = 1_v */ 2632 PetscCall(VecSet(work[1], 1.)); 2633 PetscCall(VecGetArray(work[1], &array)); 2634 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2635 PetscCall(VecRestoreArray(work[1], &array)); 2636 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2637 } 2638 2639 if (nsubs > 1 || bsp > 1) { 2640 IS *is; 2641 PetscInt b, totb; 2642 2643 totb = bsp; 2644 is = bsp > 1 ? bzerodiag : &zerodiag; 2645 nsubs = PetscMax(nsubs, 1); 2646 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2647 for (b = 0; b < totb; b++) { 2648 for (i = 0; i < nsubs; i++) { 2649 ISLocalToGlobalMapping l2g; 2650 IS t_zerodiag_subs; 2651 PetscInt nl; 2652 2653 if (subs) { 2654 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2655 } else { 2656 IS tis; 2657 2658 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2659 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2660 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2661 PetscCall(ISDestroy(&tis)); 2662 } 2663 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2664 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2665 if (nl) { 2666 PetscBool valid = PETSC_TRUE; 2667 2668 if (checkb) { 2669 PetscCall(VecSet(matis->x, 0)); 2670 PetscCall(ISGetLocalSize(subs[i], &nl)); 2671 PetscCall(ISGetIndices(subs[i], &idxs)); 2672 PetscCall(VecGetArray(matis->x, &array)); 2673 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2674 PetscCall(VecRestoreArray(matis->x, &array)); 2675 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2676 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2677 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2678 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2679 PetscCall(VecGetArray(matis->y, &array)); 2680 for (j = 0; j < n_interior_dofs; j++) { 2681 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2682 valid = PETSC_FALSE; 2683 break; 2684 } 2685 } 2686 PetscCall(VecRestoreArray(matis->y, &array)); 2687 } 2688 if (valid && nneu) { 2689 const PetscInt *idxs; 2690 PetscInt nzb; 2691 2692 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2693 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2694 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2695 if (nzb) valid = PETSC_FALSE; 2696 } 2697 if (valid && pressures) { 2698 IS t_pressure_subs, tmp; 2699 PetscInt i1, i2; 2700 2701 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2702 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2703 PetscCall(ISGetLocalSize(tmp, &i1)); 2704 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2705 if (i2 != i1) valid = PETSC_FALSE; 2706 PetscCall(ISDestroy(&t_pressure_subs)); 2707 PetscCall(ISDestroy(&tmp)); 2708 } 2709 if (valid) { 2710 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2711 benign_n++; 2712 } else recompute_zerodiag = PETSC_TRUE; 2713 } 2714 PetscCall(ISDestroy(&t_zerodiag_subs)); 2715 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2716 } 2717 } 2718 } else { /* there's just one subdomain (or zero if they have not been detected */ 2719 PetscBool valid = PETSC_TRUE; 2720 2721 if (nneu) valid = PETSC_FALSE; 2722 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2723 if (valid && checkb) { 2724 PetscCall(MatMult(matis->A, work[0], matis->x)); 2725 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2726 PetscCall(VecGetArray(matis->x, &array)); 2727 for (j = 0; j < n_interior_dofs; j++) { 2728 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2729 valid = PETSC_FALSE; 2730 break; 2731 } 2732 } 2733 PetscCall(VecRestoreArray(matis->x, &array)); 2734 } 2735 if (valid) { 2736 benign_n = 1; 2737 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2738 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2739 zerodiag_subs[0] = zerodiag; 2740 } 2741 } 2742 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2743 } 2744 PetscCall(PetscFree(interior_dofs)); 2745 2746 if (!benign_n) { 2747 PetscInt n; 2748 2749 PetscCall(ISDestroy(&zerodiag)); 2750 recompute_zerodiag = PETSC_FALSE; 2751 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2752 if (n) have_null = PETSC_FALSE; 2753 } 2754 2755 /* final check for null pressures */ 2756 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2757 2758 if (recompute_zerodiag) { 2759 PetscCall(ISDestroy(&zerodiag)); 2760 if (benign_n == 1) { 2761 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2762 zerodiag = zerodiag_subs[0]; 2763 } else { 2764 PetscInt i, nzn, *new_idxs; 2765 2766 nzn = 0; 2767 for (i = 0; i < benign_n; i++) { 2768 PetscInt ns; 2769 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2770 nzn += ns; 2771 } 2772 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2773 nzn = 0; 2774 for (i = 0; i < benign_n; i++) { 2775 PetscInt ns, *idxs; 2776 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2777 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2778 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2779 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2780 nzn += ns; 2781 } 2782 PetscCall(PetscSortInt(nzn, new_idxs)); 2783 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2784 } 2785 have_null = PETSC_FALSE; 2786 } 2787 2788 /* determines if the coarse solver will be singular or not */ 2789 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2790 2791 /* Prepare matrix to compute no-net-flux */ 2792 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2793 Mat A, loc_divudotp; 2794 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2795 IS row, col, isused = NULL; 2796 PetscInt M, N, n, st, n_isused; 2797 2798 if (pressures) { 2799 isused = pressures; 2800 } else { 2801 isused = zerodiag_save; 2802 } 2803 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2804 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2805 PetscCall(MatGetLocalSize(A, &n, NULL)); 2806 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2807 n_isused = 0; 2808 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 2809 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2810 st = st - n_isused; 2811 if (n) { 2812 const PetscInt *gidxs; 2813 2814 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2815 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2816 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2817 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2818 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2819 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2820 } else { 2821 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 2822 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2823 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 2824 } 2825 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 2826 PetscCall(ISGetSize(row, &M)); 2827 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 2828 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 2829 PetscCall(ISDestroy(&row)); 2830 PetscCall(ISDestroy(&col)); 2831 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 2832 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 2833 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 2834 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 2835 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2836 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2837 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 2838 PetscCall(MatDestroy(&loc_divudotp)); 2839 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2840 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2841 } 2842 PetscCall(ISDestroy(&zerodiag_save)); 2843 PetscCall(ISDestroy(&pressures)); 2844 if (bzerodiag) { 2845 PetscInt i; 2846 2847 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 2848 PetscCall(PetscFree(bzerodiag)); 2849 } 2850 pcbddc->benign_n = benign_n; 2851 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2852 2853 /* determines if the problem has subdomains with 0 pressure block */ 2854 have_null = (PetscBool)(!!pcbddc->benign_n); 2855 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 2856 2857 project_b0: 2858 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2859 /* change of basis and p0 dofs */ 2860 if (pcbddc->benign_n) { 2861 PetscInt i, s, *nnz; 2862 2863 /* local change of basis for pressures */ 2864 PetscCall(MatDestroy(&pcbddc->benign_change)); 2865 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 2866 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 2867 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 2868 PetscCall(PetscMalloc1(n, &nnz)); 2869 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 2870 for (i = 0; i < pcbddc->benign_n; i++) { 2871 const PetscInt *idxs; 2872 PetscInt nzs, j; 2873 2874 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 2875 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2876 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 2877 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 2878 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2879 } 2880 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 2881 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2882 PetscCall(PetscFree(nnz)); 2883 /* set identity by default */ 2884 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 2885 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 2886 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 2887 /* set change on pressures */ 2888 for (s = 0; s < pcbddc->benign_n; s++) { 2889 PetscScalar *array; 2890 const PetscInt *idxs; 2891 PetscInt nzs; 2892 2893 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 2894 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2895 for (i = 0; i < nzs - 1; i++) { 2896 PetscScalar vals[2]; 2897 PetscInt cols[2]; 2898 2899 cols[0] = idxs[i]; 2900 cols[1] = idxs[nzs - 1]; 2901 vals[0] = 1.; 2902 vals[1] = 1.; 2903 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 2904 } 2905 PetscCall(PetscMalloc1(nzs, &array)); 2906 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 2907 array[nzs - 1] = 1.; 2908 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 2909 /* store local idxs for p0 */ 2910 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 2911 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2912 PetscCall(PetscFree(array)); 2913 } 2914 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2915 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2916 2917 /* project if needed */ 2918 if (pcbddc->benign_change_explicit) { 2919 Mat M; 2920 2921 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 2922 PetscCall(MatDestroy(&pcbddc->local_mat)); 2923 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 2924 PetscCall(MatDestroy(&M)); 2925 } 2926 /* store global idxs for p0 */ 2927 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 2928 } 2929 *zerodiaglocal = zerodiag; 2930 PetscFunctionReturn(PETSC_SUCCESS); 2931 } 2932 2933 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2934 { 2935 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2936 PetscScalar *array; 2937 2938 PetscFunctionBegin; 2939 if (!pcbddc->benign_sf) { 2940 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 2941 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 2942 } 2943 if (get) { 2944 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 2945 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2946 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2947 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 2948 } else { 2949 PetscCall(VecGetArray(v, &array)); 2950 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2951 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2952 PetscCall(VecRestoreArray(v, &array)); 2953 } 2954 PetscFunctionReturn(PETSC_SUCCESS); 2955 } 2956 2957 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2958 { 2959 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2960 2961 PetscFunctionBegin; 2962 /* TODO: add error checking 2963 - avoid nested pop (or push) calls. 2964 - cannot push before pop. 2965 - cannot call this if pcbddc->local_mat is NULL 2966 */ 2967 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 2968 if (pop) { 2969 if (pcbddc->benign_change_explicit) { 2970 IS is_p0; 2971 MatReuse reuse; 2972 2973 /* extract B_0 */ 2974 reuse = MAT_INITIAL_MATRIX; 2975 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 2976 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 2977 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 2978 /* remove rows and cols from local problem */ 2979 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 2980 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 2981 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 2982 PetscCall(ISDestroy(&is_p0)); 2983 } else { 2984 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2985 PetscScalar *vals; 2986 PetscInt i, n, *idxs_ins; 2987 2988 PetscCall(VecGetLocalSize(matis->y, &n)); 2989 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 2990 if (!pcbddc->benign_B0) { 2991 PetscInt *nnz; 2992 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 2993 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 2994 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 2995 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 2996 for (i = 0; i < pcbddc->benign_n; i++) { 2997 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 2998 nnz[i] = n - nnz[i]; 2999 } 3000 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 3001 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3002 PetscCall(PetscFree(nnz)); 3003 } 3004 3005 for (i = 0; i < pcbddc->benign_n; i++) { 3006 PetscScalar *array; 3007 PetscInt *idxs, j, nz, cum; 3008 3009 PetscCall(VecSet(matis->x, 0.)); 3010 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3011 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3012 for (j = 0; j < nz; j++) vals[j] = 1.; 3013 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3014 PetscCall(VecAssemblyBegin(matis->x)); 3015 PetscCall(VecAssemblyEnd(matis->x)); 3016 PetscCall(VecSet(matis->y, 0.)); 3017 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3018 PetscCall(VecGetArray(matis->y, &array)); 3019 cum = 0; 3020 for (j = 0; j < n; j++) { 3021 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3022 vals[cum] = array[j]; 3023 idxs_ins[cum] = j; 3024 cum++; 3025 } 3026 } 3027 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3028 PetscCall(VecRestoreArray(matis->y, &array)); 3029 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3030 } 3031 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3032 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3033 PetscCall(PetscFree2(idxs_ins, vals)); 3034 } 3035 } else { /* push */ 3036 3037 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3038 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3039 PetscScalar *B0_vals; 3040 PetscInt *B0_cols, B0_ncol; 3041 3042 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3043 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3044 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3045 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3046 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3047 } 3048 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3049 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3050 } 3051 PetscFunctionReturn(PETSC_SUCCESS); 3052 } 3053 3054 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3055 { 3056 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3057 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3058 PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork; 3059 PetscBLASInt *B_iwork, *B_ifail; 3060 PetscScalar *work, lwork; 3061 PetscScalar *St, *S, *eigv; 3062 PetscScalar *Sarray, *Starray; 3063 PetscReal *eigs, thresh, lthresh, uthresh; 3064 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3065 PetscBool allocated_S_St, upart; 3066 #if defined(PETSC_USE_COMPLEX) 3067 PetscReal *rwork; 3068 #endif 3069 3070 PetscFunctionBegin; 3071 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3072 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3073 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3074 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3075 sub_schurs->is_posdef); 3076 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3077 3078 if (pcbddc->dbg_flag) { 3079 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3080 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3081 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3082 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3083 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3084 } 3085 3086 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3087 3088 /* max size of subsets */ 3089 mss = 0; 3090 for (i = 0; i < sub_schurs->n_subs; i++) { 3091 PetscInt subset_size; 3092 3093 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3094 mss = PetscMax(mss, subset_size); 3095 } 3096 3097 /* min/max and threshold */ 3098 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3099 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3100 nmax = PetscMax(nmin, nmax); 3101 allocated_S_St = PETSC_FALSE; 3102 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3103 allocated_S_St = PETSC_TRUE; 3104 } 3105 3106 /* allocate lapack workspace */ 3107 cum = cum2 = 0; 3108 maxneigs = 0; 3109 for (i = 0; i < sub_schurs->n_subs; i++) { 3110 PetscInt n, subset_size; 3111 3112 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3113 n = PetscMin(subset_size, nmax); 3114 cum += subset_size; 3115 cum2 += subset_size * n; 3116 maxneigs = PetscMax(maxneigs, n); 3117 } 3118 lwork = 0; 3119 if (mss) { 3120 PetscScalar sdummy = 0.; 3121 PetscBLASInt B_itype = 1; 3122 PetscBLASInt B_N = mss, idummy = 0; 3123 PetscReal rdummy = 0., zero = 0.0; 3124 PetscReal eps = 0.0; /* dlamch? */ 3125 3126 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3127 B_lwork = -1; 3128 /* some implementations may complain about NULL pointers, even if we are querying */ 3129 S = &sdummy; 3130 St = &sdummy; 3131 eigs = &rdummy; 3132 eigv = &sdummy; 3133 B_iwork = &idummy; 3134 B_ifail = &idummy; 3135 #if defined(PETSC_USE_COMPLEX) 3136 rwork = &rdummy; 3137 #endif 3138 thresh = 1.0; 3139 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3140 #if defined(PETSC_USE_COMPLEX) 3141 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3142 #else 3143 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3144 #endif 3145 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr); 3146 PetscCall(PetscFPTrapPop()); 3147 } 3148 3149 nv = 0; 3150 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3151 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3152 } 3153 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3154 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3155 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3156 #if defined(PETSC_USE_COMPLEX) 3157 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3158 #endif 3159 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3160 &pcbddc->adaptive_constraints_data)); 3161 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3162 3163 maxneigs = 0; 3164 cum = cumarray = 0; 3165 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3166 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3167 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3168 const PetscInt *idxs; 3169 3170 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3171 for (cum = 0; cum < nv; cum++) { 3172 pcbddc->adaptive_constraints_n[cum] = 1; 3173 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3174 pcbddc->adaptive_constraints_data[cum] = 1.0; 3175 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3176 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3177 } 3178 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3179 } 3180 3181 if (mss) { /* multilevel */ 3182 if (sub_schurs->gdsw) { 3183 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3184 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3185 } else { 3186 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3187 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3188 } 3189 } 3190 3191 lthresh = pcbddc->adaptive_threshold[0]; 3192 uthresh = pcbddc->adaptive_threshold[1]; 3193 upart = pcbddc->use_deluxe_scaling; 3194 for (i = 0; i < sub_schurs->n_subs; i++) { 3195 const PetscInt *idxs; 3196 PetscReal upper, lower; 3197 PetscInt j, subset_size, eigs_start = 0; 3198 PetscBLASInt B_N; 3199 PetscBool same_data = PETSC_FALSE; 3200 PetscBool scal = PETSC_FALSE; 3201 3202 if (upart) { 3203 upper = PETSC_MAX_REAL; 3204 lower = uthresh; 3205 } else { 3206 if (sub_schurs->gdsw) { 3207 upper = uthresh; 3208 lower = PETSC_MIN_REAL; 3209 } else { 3210 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3211 upper = 1. / uthresh; 3212 lower = 0.; 3213 } 3214 } 3215 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3216 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3217 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3218 /* this is experimental: we assume the dofs have been properly grouped to have 3219 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3220 if (!sub_schurs->is_posdef) { 3221 Mat T; 3222 3223 for (j = 0; j < subset_size; j++) { 3224 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3225 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3226 PetscCall(MatScale(T, -1.0)); 3227 PetscCall(MatDestroy(&T)); 3228 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3229 PetscCall(MatScale(T, -1.0)); 3230 PetscCall(MatDestroy(&T)); 3231 if (sub_schurs->change_primal_sub) { 3232 PetscInt nz, k; 3233 const PetscInt *idxs; 3234 3235 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3236 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3237 for (k = 0; k < nz; k++) { 3238 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3239 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3240 } 3241 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3242 } 3243 scal = PETSC_TRUE; 3244 break; 3245 } 3246 } 3247 } 3248 3249 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3250 if (sub_schurs->is_symmetric) { 3251 PetscInt j, k; 3252 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3253 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3254 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3255 } 3256 for (j = 0; j < subset_size; j++) { 3257 for (k = j; k < subset_size; k++) { 3258 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3259 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3260 } 3261 } 3262 } else { 3263 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3264 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3265 } 3266 } else { 3267 S = Sarray + cumarray; 3268 St = Starray + cumarray; 3269 } 3270 /* see if we can save some work */ 3271 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3272 3273 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3274 B_neigs = 0; 3275 } else { 3276 PetscBLASInt B_itype = 1; 3277 PetscBLASInt B_IL, B_IU; 3278 PetscReal eps = -1.0; /* dlamch? */ 3279 PetscInt nmin_s; 3280 PetscBool compute_range; 3281 3282 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3283 B_neigs = 0; 3284 compute_range = (PetscBool)!same_data; 3285 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3286 3287 if (pcbddc->dbg_flag) { 3288 PetscInt nc = 0; 3289 3290 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3291 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, 3292 sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc)); 3293 } 3294 3295 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3296 if (compute_range) { 3297 /* ask for eigenvalues larger than thresh */ 3298 if (sub_schurs->is_posdef) { 3299 #if defined(PETSC_USE_COMPLEX) 3300 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3301 #else 3302 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3303 #endif 3304 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3305 } else { /* no theory so far, but it works nicely */ 3306 PetscInt recipe = 0, recipe_m = 1; 3307 PetscReal bb[2]; 3308 3309 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3310 switch (recipe) { 3311 case 0: 3312 if (scal) { 3313 bb[0] = PETSC_MIN_REAL; 3314 bb[1] = lthresh; 3315 } else { 3316 bb[0] = uthresh; 3317 bb[1] = PETSC_MAX_REAL; 3318 } 3319 #if defined(PETSC_USE_COMPLEX) 3320 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3321 #else 3322 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3323 #endif 3324 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3325 break; 3326 case 1: 3327 bb[0] = PETSC_MIN_REAL; 3328 bb[1] = lthresh * lthresh; 3329 #if defined(PETSC_USE_COMPLEX) 3330 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3331 #else 3332 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3333 #endif 3334 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3335 if (!scal) { 3336 PetscBLASInt B_neigs2 = 0; 3337 3338 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3339 bb[1] = PETSC_MAX_REAL; 3340 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3341 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3342 #if defined(PETSC_USE_COMPLEX) 3343 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3344 #else 3345 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3346 #endif 3347 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3348 B_neigs += B_neigs2; 3349 } 3350 break; 3351 case 2: 3352 if (scal) { 3353 bb[0] = PETSC_MIN_REAL; 3354 bb[1] = 0; 3355 #if defined(PETSC_USE_COMPLEX) 3356 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3357 #else 3358 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3359 #endif 3360 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3361 } else { 3362 PetscBLASInt B_neigs2 = 0; 3363 PetscBool do_copy = PETSC_FALSE; 3364 3365 lthresh = PetscMax(lthresh, 0.0); 3366 if (lthresh > 0.0) { 3367 bb[0] = PETSC_MIN_REAL; 3368 bb[1] = lthresh * lthresh; 3369 3370 do_copy = PETSC_TRUE; 3371 #if defined(PETSC_USE_COMPLEX) 3372 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3373 #else 3374 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3375 #endif 3376 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3377 } 3378 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3379 bb[1] = PETSC_MAX_REAL; 3380 if (do_copy) { 3381 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3382 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3383 } 3384 #if defined(PETSC_USE_COMPLEX) 3385 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3386 #else 3387 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3388 #endif 3389 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3390 B_neigs += B_neigs2; 3391 } 3392 break; 3393 case 3: 3394 if (scal) { 3395 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3396 } else { 3397 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3398 } 3399 if (!scal) { 3400 bb[0] = uthresh; 3401 bb[1] = PETSC_MAX_REAL; 3402 #if defined(PETSC_USE_COMPLEX) 3403 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3404 #else 3405 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3406 #endif 3407 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3408 } 3409 if (recipe_m > 0 && B_N - B_neigs > 0) { 3410 PetscBLASInt B_neigs2 = 0; 3411 3412 B_IL = 1; 3413 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3414 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3415 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3416 #if defined(PETSC_USE_COMPLEX) 3417 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3418 #else 3419 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3420 #endif 3421 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3422 B_neigs += B_neigs2; 3423 } 3424 break; 3425 case 4: 3426 bb[0] = PETSC_MIN_REAL; 3427 bb[1] = lthresh; 3428 #if defined(PETSC_USE_COMPLEX) 3429 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3430 #else 3431 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3432 #endif 3433 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3434 { 3435 PetscBLASInt B_neigs2 = 0; 3436 3437 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3438 bb[1] = PETSC_MAX_REAL; 3439 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3440 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3441 #if defined(PETSC_USE_COMPLEX) 3442 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3443 #else 3444 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3445 #endif 3446 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3447 B_neigs += B_neigs2; 3448 } 3449 break; 3450 case 5: /* same as before: first compute all eigenvalues, then filter */ 3451 #if defined(PETSC_USE_COMPLEX) 3452 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3453 #else 3454 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3455 #endif 3456 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3457 { 3458 PetscInt e, k, ne; 3459 for (e = 0, ne = 0; e < B_neigs; e++) { 3460 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3461 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3462 eigs[ne] = eigs[e]; 3463 ne++; 3464 } 3465 } 3466 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3467 B_neigs = ne; 3468 } 3469 break; 3470 default: 3471 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3472 } 3473 } 3474 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3475 B_IU = PetscMax(1, PetscMin(B_N, nmax)); 3476 B_IL = 1; 3477 #if defined(PETSC_USE_COMPLEX) 3478 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3479 #else 3480 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3481 #endif 3482 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3483 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3484 PetscInt k; 3485 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3486 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3487 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3488 nmin = nmax; 3489 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3490 for (k = 0; k < nmax; k++) { 3491 eigs[k] = 1. / PETSC_SMALL; 3492 eigv[k * (subset_size + 1)] = 1.0; 3493 } 3494 } 3495 PetscCall(PetscFPTrapPop()); 3496 if (B_ierr) { 3497 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3498 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3499 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3500 } 3501 3502 if (B_neigs > nmax) { 3503 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3504 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3505 B_neigs = nmax; 3506 } 3507 3508 nmin_s = PetscMin(nmin, B_N); 3509 if (B_neigs < nmin_s) { 3510 PetscBLASInt B_neigs2 = 0; 3511 3512 if (upart) { 3513 if (scal) { 3514 B_IU = nmin_s; 3515 B_IL = B_neigs + 1; 3516 } else { 3517 B_IL = B_N - nmin_s + 1; 3518 B_IU = B_N - B_neigs; 3519 } 3520 } else { 3521 B_IL = B_neigs + 1; 3522 B_IU = nmin_s; 3523 } 3524 if (pcbddc->dbg_flag) { 3525 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3526 } 3527 if (sub_schurs->is_symmetric) { 3528 PetscInt j, k; 3529 for (j = 0; j < subset_size; j++) { 3530 for (k = j; k < subset_size; k++) { 3531 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3532 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3533 } 3534 } 3535 } else { 3536 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3537 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3538 } 3539 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3540 #if defined(PETSC_USE_COMPLEX) 3541 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3542 #else 3543 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3544 #endif 3545 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3546 PetscCall(PetscFPTrapPop()); 3547 B_neigs += B_neigs2; 3548 } 3549 if (B_ierr) { 3550 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3551 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3552 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3553 } 3554 if (pcbddc->dbg_flag) { 3555 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3556 for (j = 0; j < B_neigs; j++) { 3557 if (!sub_schurs->gdsw) { 3558 if (eigs[j] == 0.0) { 3559 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3560 } else { 3561 if (upart) { 3562 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3563 } else { 3564 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start]))); 3565 } 3566 } 3567 } else { 3568 double pg = (double)eigs[j + eigs_start]; 3569 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3570 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3571 } 3572 } 3573 } 3574 } 3575 /* change the basis back to the original one */ 3576 if (sub_schurs->change) { 3577 Mat change, phi, phit; 3578 3579 if (pcbddc->dbg_flag > 2) { 3580 PetscInt ii; 3581 for (ii = 0; ii < B_neigs; ii++) { 3582 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3583 for (j = 0; j < B_N; j++) { 3584 #if defined(PETSC_USE_COMPLEX) 3585 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3586 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3587 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3588 #else 3589 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3590 #endif 3591 } 3592 } 3593 } 3594 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3595 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3596 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi)); 3597 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3598 PetscCall(MatDestroy(&phit)); 3599 PetscCall(MatDestroy(&phi)); 3600 } 3601 maxneigs = PetscMax(B_neigs, maxneigs); 3602 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3603 if (B_neigs) { 3604 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3605 3606 if (pcbddc->dbg_flag > 1) { 3607 PetscInt ii; 3608 for (ii = 0; ii < B_neigs; ii++) { 3609 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3610 for (j = 0; j < B_N; j++) { 3611 #if defined(PETSC_USE_COMPLEX) 3612 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3613 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3614 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3615 #else 3616 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3617 #endif 3618 } 3619 } 3620 } 3621 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3622 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3623 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3624 cum++; 3625 } 3626 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3627 /* shift for next computation */ 3628 cumarray += subset_size * subset_size; 3629 } 3630 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3631 3632 if (mss) { 3633 if (sub_schurs->gdsw) { 3634 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3635 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3636 } else { 3637 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3638 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3639 /* destroy matrices (junk) */ 3640 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3641 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3642 } 3643 } 3644 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3645 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3646 #if defined(PETSC_USE_COMPLEX) 3647 PetscCall(PetscFree(rwork)); 3648 #endif 3649 if (pcbddc->dbg_flag) { 3650 PetscInt maxneigs_r; 3651 PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3652 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3653 } 3654 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3655 PetscFunctionReturn(PETSC_SUCCESS); 3656 } 3657 3658 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3659 { 3660 PetscScalar *coarse_submat_vals; 3661 3662 PetscFunctionBegin; 3663 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3664 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3665 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3666 3667 /* Setup local neumann solver ksp_R */ 3668 /* PCBDDCSetUpLocalScatters should be called first! */ 3669 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3670 3671 /* 3672 Setup local correction and local part of coarse basis. 3673 Gives back the dense local part of the coarse matrix in column major ordering 3674 */ 3675 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat_vals)); 3676 3677 /* Compute total number of coarse nodes and setup coarse solver */ 3678 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals)); 3679 3680 /* free */ 3681 PetscCall(PetscFree(coarse_submat_vals)); 3682 PetscFunctionReturn(PETSC_SUCCESS); 3683 } 3684 3685 PetscErrorCode PCBDDCResetCustomization(PC pc) 3686 { 3687 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3688 3689 PetscFunctionBegin; 3690 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3691 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3692 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3693 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3694 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3695 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3696 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3697 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3698 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3699 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3700 PetscFunctionReturn(PETSC_SUCCESS); 3701 } 3702 3703 PetscErrorCode PCBDDCResetTopography(PC pc) 3704 { 3705 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3706 PetscInt i; 3707 3708 PetscFunctionBegin; 3709 PetscCall(MatDestroy(&pcbddc->nedcG)); 3710 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3711 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3712 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3713 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3714 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3715 PetscCall(VecDestroy(&pcbddc->work_change)); 3716 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3717 PetscCall(MatDestroy(&pcbddc->divudotp)); 3718 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3719 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3720 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3721 pcbddc->n_local_subs = 0; 3722 PetscCall(PetscFree(pcbddc->local_subs)); 3723 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3724 pcbddc->graphanalyzed = PETSC_FALSE; 3725 pcbddc->recompute_topography = PETSC_TRUE; 3726 pcbddc->corner_selected = PETSC_FALSE; 3727 PetscFunctionReturn(PETSC_SUCCESS); 3728 } 3729 3730 PetscErrorCode PCBDDCResetSolvers(PC pc) 3731 { 3732 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3733 3734 PetscFunctionBegin; 3735 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3736 if (pcbddc->coarse_phi_B) { 3737 PetscScalar *array; 3738 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &array)); 3739 PetscCall(PetscFree(array)); 3740 } 3741 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3742 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3743 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3744 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3745 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3746 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3747 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3748 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3749 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3750 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3751 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3752 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3753 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3754 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3755 PetscCall(KSPReset(pcbddc->ksp_D)); 3756 PetscCall(KSPReset(pcbddc->ksp_R)); 3757 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3758 PetscCall(MatDestroy(&pcbddc->local_mat)); 3759 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3760 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3761 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3762 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3763 PetscCall(MatDestroy(&pcbddc->benign_change)); 3764 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3765 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3766 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3767 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3768 if (pcbddc->benign_zerodiag_subs) { 3769 PetscInt i; 3770 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3771 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3772 } 3773 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3774 PetscFunctionReturn(PETSC_SUCCESS); 3775 } 3776 3777 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3778 { 3779 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3780 PC_IS *pcis = (PC_IS *)pc->data; 3781 VecType impVecType; 3782 PetscInt n_constraints, n_R, old_size; 3783 3784 PetscFunctionBegin; 3785 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3786 n_R = pcis->n - pcbddc->n_vertices; 3787 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3788 /* local work vectors (try to avoid unneeded work)*/ 3789 /* R nodes */ 3790 old_size = -1; 3791 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3792 if (n_R != old_size) { 3793 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3794 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3795 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3796 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3797 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3798 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3799 } 3800 /* local primal dofs */ 3801 old_size = -1; 3802 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3803 if (pcbddc->local_primal_size != old_size) { 3804 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3805 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3806 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3807 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3808 } 3809 /* local explicit constraints */ 3810 old_size = -1; 3811 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 3812 if (n_constraints && n_constraints != old_size) { 3813 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3814 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3815 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3816 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3817 } 3818 PetscFunctionReturn(PETSC_SUCCESS); 3819 } 3820 3821 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3822 { 3823 /* pointers to pcis and pcbddc */ 3824 PC_IS *pcis = (PC_IS *)pc->data; 3825 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3826 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3827 /* submatrices of local problem */ 3828 Mat A_RV, A_VR, A_VV, local_auxmat2_R; 3829 /* submatrices of local coarse problem */ 3830 Mat S_VV, S_CV, S_VC, S_CC; 3831 /* working matrices */ 3832 Mat C_CR; 3833 /* additional working stuff */ 3834 PC pc_R; 3835 Mat F, Brhs = NULL; 3836 Vec dummy_vec; 3837 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 3838 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3839 PetscScalar *work; 3840 PetscInt *idx_V_B; 3841 PetscInt lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I; 3842 PetscInt i, n_R, n_D, n_B; 3843 PetscScalar one = 1.0, m_one = -1.0; 3844 3845 PetscFunctionBegin; 3846 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 3847 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3848 3849 /* Set Non-overlapping dimensions */ 3850 n_vertices = pcbddc->n_vertices; 3851 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3852 n_B = pcis->n_B; 3853 n_D = pcis->n - n_B; 3854 n_R = pcis->n - n_vertices; 3855 3856 /* vertices in boundary numbering */ 3857 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 3858 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 3859 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 3860 3861 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3862 PetscCall(PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals)); 3863 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV)); 3864 PetscCall(MatDenseSetLDA(S_VV, pcbddc->local_primal_size)); 3865 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, PetscSafePointerPlusOffset(coarse_submat_vals, n_vertices), &S_CV)); 3866 PetscCall(MatDenseSetLDA(S_CV, pcbddc->local_primal_size)); 3867 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, PetscSafePointerPlusOffset(coarse_submat_vals, pcbddc->local_primal_size * n_vertices), &S_VC)); 3868 PetscCall(MatDenseSetLDA(S_VC, pcbddc->local_primal_size)); 3869 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, PetscSafePointerPlusOffset(coarse_submat_vals, (pcbddc->local_primal_size + 1) * n_vertices), &S_CC)); 3870 PetscCall(MatDenseSetLDA(S_CC, pcbddc->local_primal_size)); 3871 3872 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3873 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 3874 PetscCall(PCSetUp(pc_R)); 3875 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 3876 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 3877 lda_rhs = n_R; 3878 need_benign_correction = PETSC_FALSE; 3879 if (isLU || isCHOL) { 3880 PetscCall(PCFactorGetMatrix(pc_R, &F)); 3881 } else if (sub_schurs && sub_schurs->reuse_solver) { 3882 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3883 MatFactorType type; 3884 3885 F = reuse_solver->F; 3886 PetscCall(MatGetFactorType(F, &type)); 3887 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3888 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3889 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 3890 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3891 } else F = NULL; 3892 3893 /* determine if we can use a sparse right-hand side */ 3894 sparserhs = PETSC_FALSE; 3895 if (F) { 3896 MatSolverType solver; 3897 3898 PetscCall(MatFactorGetSolverType(F, &solver)); 3899 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 3900 } 3901 3902 /* allocate workspace */ 3903 n = 0; 3904 if (n_constraints) n += lda_rhs * n_constraints; 3905 if (n_vertices) { 3906 n = PetscMax(2 * lda_rhs * n_vertices, n); 3907 n = PetscMax((lda_rhs + n_B) * n_vertices, n); 3908 } 3909 if (!pcbddc->symmetric_primal) n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n); 3910 PetscCall(PetscMalloc1(n, &work)); 3911 3912 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3913 dummy_vec = NULL; 3914 if (need_benign_correction && lda_rhs != n_R && F) { 3915 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 3916 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 3917 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 3918 } 3919 3920 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3921 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3922 3923 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3924 if (n_constraints) { 3925 Mat M3, C_B; 3926 IS is_aux; 3927 3928 /* Extract constraints on R nodes: C_{CR} */ 3929 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux)); 3930 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 3931 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 3932 3933 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3934 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3935 if (!sparserhs) { 3936 PetscCall(PetscArrayzero(work, lda_rhs * n_constraints)); 3937 for (i = 0; i < n_constraints; i++) { 3938 const PetscScalar *row_cmat_values; 3939 const PetscInt *row_cmat_indices; 3940 PetscInt size_of_constraint, j; 3941 3942 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3943 for (j = 0; j < size_of_constraint; j++) work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j]; 3944 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3945 } 3946 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs)); 3947 } else { 3948 Mat tC_CR; 3949 3950 PetscCall(MatScale(C_CR, -1.0)); 3951 if (lda_rhs != n_R) { 3952 PetscScalar *aa; 3953 PetscInt r, *ii, *jj; 3954 PetscBool done; 3955 3956 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3957 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 3958 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 3959 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 3960 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3961 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 3962 } else { 3963 PetscCall(PetscObjectReference((PetscObject)C_CR)); 3964 tC_CR = C_CR; 3965 } 3966 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 3967 PetscCall(MatDestroy(&tC_CR)); 3968 } 3969 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R)); 3970 if (F) { 3971 if (need_benign_correction) { 3972 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3973 3974 /* rhs is already zero on interior dofs, no need to change the rhs */ 3975 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 3976 } 3977 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 3978 if (need_benign_correction) { 3979 PetscScalar *marr; 3980 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3981 3982 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 3983 if (lda_rhs != n_R) { 3984 for (i = 0; i < n_constraints; i++) { 3985 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 3986 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 3987 PetscCall(VecResetArray(dummy_vec)); 3988 } 3989 } else { 3990 for (i = 0; i < n_constraints; i++) { 3991 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 3992 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 3993 PetscCall(VecResetArray(pcbddc->vec1_R)); 3994 } 3995 } 3996 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 3997 } 3998 } else { 3999 PetscScalar *marr; 4000 4001 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4002 for (i = 0; i < n_constraints; i++) { 4003 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 4004 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4005 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4006 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4007 PetscCall(VecResetArray(pcbddc->vec1_R)); 4008 PetscCall(VecResetArray(pcbddc->vec2_R)); 4009 } 4010 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4011 } 4012 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4013 PetscCall(MatDestroy(&Brhs)); 4014 if (!pcbddc->switch_static) { 4015 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2)); 4016 for (i = 0; i < n_constraints; i++) { 4017 Vec r, b; 4018 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 4019 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 4020 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4021 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4022 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 4023 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 4024 } 4025 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 4026 } else { 4027 if (lda_rhs != n_R) { 4028 IS dummy; 4029 4030 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy)); 4031 PetscCall(MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4032 PetscCall(ISDestroy(&dummy)); 4033 } else { 4034 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4035 pcbddc->local_auxmat2 = local_auxmat2_R; 4036 } 4037 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 4038 } 4039 PetscCall(ISDestroy(&is_aux)); 4040 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4041 PetscCall(MatScale(M3, m_one)); 4042 if (isCHOL) { 4043 PetscCall(MatCholeskyFactor(M3, NULL, NULL)); 4044 } else { 4045 PetscCall(MatLUFactor(M3, NULL, NULL, NULL)); 4046 } 4047 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 4048 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4049 PetscCall(MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1)); 4050 PetscCall(MatDestroy(&C_B)); 4051 PetscCall(MatCopy(M3, S_CC, SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4052 PetscCall(MatDestroy(&M3)); 4053 } 4054 4055 /* Get submatrices from subdomain matrix */ 4056 if (n_vertices) { 4057 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4058 PetscBool oldpin; 4059 #endif 4060 PetscBool isaij; 4061 IS is_aux; 4062 4063 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4064 IS tis; 4065 4066 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4067 PetscCall(ISSort(tis)); 4068 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4069 PetscCall(ISDestroy(&tis)); 4070 } else { 4071 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4072 } 4073 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4074 oldpin = pcbddc->local_mat->boundtocpu; 4075 #endif 4076 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4077 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4078 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4079 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij)); 4080 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4081 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4082 } 4083 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4084 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4085 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4086 #endif 4087 PetscCall(ISDestroy(&is_aux)); 4088 } 4089 4090 /* Matrix of coarse basis functions (local) */ 4091 if (pcbddc->coarse_phi_B) { 4092 PetscInt on_B, on_primal, on_D = n_D; 4093 if (pcbddc->coarse_phi_D) PetscCall(MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL)); 4094 PetscCall(MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal)); 4095 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4096 PetscScalar *marray; 4097 4098 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &marray)); 4099 PetscCall(PetscFree(marray)); 4100 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4101 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4102 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4103 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4104 } 4105 } 4106 4107 if (!pcbddc->coarse_phi_B) { 4108 PetscScalar *marr; 4109 4110 /* memory size */ 4111 n = n_B * pcbddc->local_primal_size; 4112 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size; 4113 if (!pcbddc->symmetric_primal) n *= 2; 4114 PetscCall(PetscCalloc1(n, &marr)); 4115 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B)); 4116 marr = PetscSafePointerPlusOffset(marr, n_B * pcbddc->local_primal_size); 4117 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4118 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D)); 4119 marr += n_D * pcbddc->local_primal_size; 4120 } 4121 if (!pcbddc->symmetric_primal) { 4122 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B)); 4123 marr += n_B * pcbddc->local_primal_size; 4124 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D)); 4125 } else { 4126 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4127 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4128 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4129 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4130 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4131 } 4132 } 4133 } 4134 4135 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4136 p0_lidx_I = NULL; 4137 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4138 const PetscInt *idxs; 4139 4140 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4141 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4142 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4143 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4144 } 4145 4146 /* vertices */ 4147 if (n_vertices) { 4148 PetscBool restoreavr = PETSC_FALSE; 4149 4150 PetscCall(MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV)); 4151 4152 if (n_R) { 4153 Mat A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */ 4154 PetscBLASInt B_N, B_one = 1; 4155 const PetscScalar *x; 4156 PetscScalar *y; 4157 4158 PetscCall(MatScale(A_RV, m_one)); 4159 if (need_benign_correction) { 4160 ISLocalToGlobalMapping RtoN; 4161 IS is_p0; 4162 PetscInt *idxs_p0, n; 4163 4164 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4165 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4166 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4167 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4168 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4169 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4170 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4171 PetscCall(ISDestroy(&is_p0)); 4172 } 4173 4174 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV)); 4175 if (!sparserhs || need_benign_correction) { 4176 if (lda_rhs == n_R) { 4177 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4178 } else { 4179 PetscScalar *av, *array; 4180 const PetscInt *xadj, *adjncy; 4181 PetscInt n; 4182 PetscBool flg_row; 4183 4184 array = work + lda_rhs * n_vertices; 4185 PetscCall(PetscArrayzero(array, lda_rhs * n_vertices)); 4186 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4187 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4188 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4189 for (i = 0; i < n; i++) { 4190 PetscInt j; 4191 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j]; 4192 } 4193 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4194 PetscCall(MatDestroy(&A_RV)); 4195 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV)); 4196 } 4197 if (need_benign_correction) { 4198 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4199 PetscScalar *marr; 4200 4201 PetscCall(MatDenseGetArray(A_RV, &marr)); 4202 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4203 4204 | 0 0 0 | (V) 4205 L = | 0 0 -1 | (P-p0) 4206 | 0 0 -1 | (p0) 4207 4208 */ 4209 for (i = 0; i < reuse_solver->benign_n; i++) { 4210 const PetscScalar *vals; 4211 const PetscInt *idxs, *idxs_zero; 4212 PetscInt n, j, nz; 4213 4214 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4215 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4216 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4217 for (j = 0; j < n; j++) { 4218 PetscScalar val = vals[j]; 4219 PetscInt k, col = idxs[j]; 4220 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4221 } 4222 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4223 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4224 } 4225 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4226 } 4227 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4228 Brhs = A_RV; 4229 } else { 4230 Mat tA_RVT, A_RVT; 4231 4232 if (!pcbddc->symmetric_primal) { 4233 /* A_RV already scaled by -1 */ 4234 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4235 } else { 4236 restoreavr = PETSC_TRUE; 4237 PetscCall(MatScale(A_VR, -1.0)); 4238 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4239 A_RVT = A_VR; 4240 } 4241 if (lda_rhs != n_R) { 4242 PetscScalar *aa; 4243 PetscInt r, *ii, *jj; 4244 PetscBool done; 4245 4246 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4247 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4248 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4249 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4250 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4251 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4252 } else { 4253 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4254 tA_RVT = A_RVT; 4255 } 4256 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4257 PetscCall(MatDestroy(&tA_RVT)); 4258 PetscCall(MatDestroy(&A_RVT)); 4259 } 4260 if (F) { 4261 /* need to correct the rhs */ 4262 if (need_benign_correction) { 4263 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4264 PetscScalar *marr; 4265 4266 PetscCall(MatDenseGetArray(Brhs, &marr)); 4267 if (lda_rhs != n_R) { 4268 for (i = 0; i < n_vertices; i++) { 4269 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4270 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4271 PetscCall(VecResetArray(dummy_vec)); 4272 } 4273 } else { 4274 for (i = 0; i < n_vertices; i++) { 4275 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4276 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4277 PetscCall(VecResetArray(pcbddc->vec1_R)); 4278 } 4279 } 4280 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4281 } 4282 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4283 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4284 /* need to correct the solution */ 4285 if (need_benign_correction) { 4286 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4287 PetscScalar *marr; 4288 4289 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4290 if (lda_rhs != n_R) { 4291 for (i = 0; i < n_vertices; i++) { 4292 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4293 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4294 PetscCall(VecResetArray(dummy_vec)); 4295 } 4296 } else { 4297 for (i = 0; i < n_vertices; i++) { 4298 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4299 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4300 PetscCall(VecResetArray(pcbddc->vec1_R)); 4301 } 4302 } 4303 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4304 } 4305 } else { 4306 PetscCall(MatDenseGetArray(Brhs, &y)); 4307 for (i = 0; i < n_vertices; i++) { 4308 PetscCall(VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs)); 4309 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs)); 4310 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4311 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4312 PetscCall(VecResetArray(pcbddc->vec1_R)); 4313 PetscCall(VecResetArray(pcbddc->vec2_R)); 4314 } 4315 PetscCall(MatDenseRestoreArray(Brhs, &y)); 4316 } 4317 PetscCall(MatDestroy(&A_RV)); 4318 PetscCall(MatDestroy(&Brhs)); 4319 /* S_VV and S_CV */ 4320 if (n_constraints) { 4321 Mat B; 4322 4323 PetscCall(PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices)); 4324 for (i = 0; i < n_vertices; i++) { 4325 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 4326 PetscCall(VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B)); 4327 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4328 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4329 PetscCall(VecResetArray(pcis->vec1_B)); 4330 PetscCall(VecResetArray(pcbddc->vec1_R)); 4331 } 4332 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B)); 4333 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4334 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV)); 4335 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4336 PetscCall(MatProductSetFromOptions(S_CV)); 4337 PetscCall(MatProductSymbolic(S_CV)); 4338 PetscCall(MatProductNumeric(S_CV)); 4339 PetscCall(MatProductClear(S_CV)); 4340 4341 PetscCall(MatDestroy(&B)); 4342 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B)); 4343 /* Reuse B = local_auxmat2_R * S_CV */ 4344 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B)); 4345 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4346 PetscCall(MatProductSetFromOptions(B)); 4347 PetscCall(MatProductSymbolic(B)); 4348 PetscCall(MatProductNumeric(B)); 4349 4350 PetscCall(MatScale(S_CV, m_one)); 4351 PetscCall(PetscBLASIntCast(lda_rhs * n_vertices, &B_N)); 4352 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one)); 4353 PetscCall(MatDestroy(&B)); 4354 } 4355 if (lda_rhs != n_R) { 4356 PetscCall(MatDestroy(&A_RRmA_RV)); 4357 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV)); 4358 PetscCall(MatDenseSetLDA(A_RRmA_RV, lda_rhs)); 4359 } 4360 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt)); 4361 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4362 if (need_benign_correction) { 4363 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4364 PetscScalar *marr, *sums; 4365 4366 PetscCall(PetscMalloc1(n_vertices, &sums)); 4367 PetscCall(MatDenseGetArray(S_VVt, &marr)); 4368 for (i = 0; i < reuse_solver->benign_n; i++) { 4369 const PetscScalar *vals; 4370 const PetscInt *idxs, *idxs_zero; 4371 PetscInt n, j, nz; 4372 4373 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4374 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4375 for (j = 0; j < n_vertices; j++) { 4376 PetscInt k; 4377 sums[j] = 0.; 4378 for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs]; 4379 } 4380 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4381 for (j = 0; j < n; j++) { 4382 PetscScalar val = vals[j]; 4383 PetscInt k; 4384 for (k = 0; k < n_vertices; k++) marr[idxs[j] + k * n_vertices] += val * sums[k]; 4385 } 4386 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4387 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4388 } 4389 PetscCall(PetscFree(sums)); 4390 PetscCall(MatDenseRestoreArray(S_VVt, &marr)); 4391 PetscCall(MatDestroy(&A_RV_bcorr)); 4392 } 4393 PetscCall(MatDestroy(&A_RRmA_RV)); 4394 PetscCall(PetscBLASIntCast(n_vertices * n_vertices, &B_N)); 4395 PetscCall(MatDenseGetArrayRead(A_VV, &x)); 4396 PetscCall(MatDenseGetArray(S_VVt, &y)); 4397 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one)); 4398 PetscCall(MatDenseRestoreArrayRead(A_VV, &x)); 4399 PetscCall(MatDenseRestoreArray(S_VVt, &y)); 4400 PetscCall(MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN)); 4401 PetscCall(MatDestroy(&S_VVt)); 4402 } else { 4403 PetscCall(MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN)); 4404 } 4405 PetscCall(MatDestroy(&A_VV)); 4406 4407 /* coarse basis functions */ 4408 for (i = 0; i < n_vertices; i++) { 4409 Vec v; 4410 PetscScalar one = 1.0, zero = 0.0; 4411 4412 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4413 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v)); 4414 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4415 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4416 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4417 PetscMPIInt rank; 4418 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank)); 4419 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4420 } 4421 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4422 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4423 PetscCall(VecAssemblyEnd(v)); 4424 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v)); 4425 4426 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4427 PetscInt j; 4428 4429 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v)); 4430 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4431 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4432 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4433 PetscMPIInt rank; 4434 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank)); 4435 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4436 } 4437 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4438 PetscCall(VecAssemblyBegin(v)); 4439 PetscCall(VecAssemblyEnd(v)); 4440 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v)); 4441 } 4442 PetscCall(VecResetArray(pcbddc->vec1_R)); 4443 } 4444 /* if n_R == 0 the object is not destroyed */ 4445 PetscCall(MatDestroy(&A_RV)); 4446 } 4447 PetscCall(VecDestroy(&dummy_vec)); 4448 4449 if (n_constraints) { 4450 Mat B; 4451 4452 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B)); 4453 PetscCall(MatScale(S_CC, m_one)); 4454 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B)); 4455 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4456 PetscCall(MatProductSetFromOptions(B)); 4457 PetscCall(MatProductSymbolic(B)); 4458 PetscCall(MatProductNumeric(B)); 4459 4460 PetscCall(MatScale(S_CC, m_one)); 4461 if (n_vertices) { 4462 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4463 PetscCall(MatTransposeSetPrecursor(S_CV, S_VC)); 4464 PetscCall(MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC)); 4465 } else { 4466 Mat S_VCt; 4467 4468 if (lda_rhs != n_R) { 4469 PetscCall(MatDestroy(&B)); 4470 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B)); 4471 PetscCall(MatDenseSetLDA(B, lda_rhs)); 4472 } 4473 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt)); 4474 PetscCall(MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN)); 4475 PetscCall(MatDestroy(&S_VCt)); 4476 } 4477 } 4478 PetscCall(MatDestroy(&B)); 4479 /* coarse basis functions */ 4480 for (i = 0; i < n_constraints; i++) { 4481 Vec v; 4482 4483 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4484 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4485 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4486 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4487 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4488 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4489 PetscInt j; 4490 PetscScalar zero = 0.0; 4491 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4492 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4493 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4494 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4495 PetscCall(VecAssemblyBegin(v)); 4496 PetscCall(VecAssemblyEnd(v)); 4497 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4498 } 4499 PetscCall(VecResetArray(pcbddc->vec1_R)); 4500 } 4501 } 4502 if (n_constraints) PetscCall(MatDestroy(&local_auxmat2_R)); 4503 PetscCall(PetscFree(p0_lidx_I)); 4504 4505 /* coarse matrix entries relative to B_0 */ 4506 if (pcbddc->benign_n) { 4507 Mat B0_B, B0_BPHI; 4508 IS is_dummy; 4509 const PetscScalar *data; 4510 PetscInt j; 4511 4512 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4513 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4514 PetscCall(ISDestroy(&is_dummy)); 4515 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4516 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4517 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 4518 for (j = 0; j < pcbddc->benign_n; j++) { 4519 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4520 for (i = 0; i < pcbddc->local_primal_size; i++) { 4521 coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j]; 4522 coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j]; 4523 } 4524 } 4525 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 4526 PetscCall(MatDestroy(&B0_B)); 4527 PetscCall(MatDestroy(&B0_BPHI)); 4528 } 4529 4530 /* compute other basis functions for non-symmetric problems */ 4531 if (!pcbddc->symmetric_primal) { 4532 Mat B_V = NULL, B_C = NULL; 4533 PetscScalar *marray; 4534 4535 if (n_constraints) { 4536 Mat S_CCT, C_CRT; 4537 4538 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 4539 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 4540 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C)); 4541 PetscCall(MatDestroy(&S_CCT)); 4542 if (n_vertices) { 4543 Mat S_VCT; 4544 4545 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 4546 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V)); 4547 PetscCall(MatDestroy(&S_VCT)); 4548 } 4549 PetscCall(MatDestroy(&C_CRT)); 4550 } else { 4551 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 4552 } 4553 if (n_vertices && n_R) { 4554 PetscScalar *av, *marray; 4555 const PetscInt *xadj, *adjncy; 4556 PetscInt n; 4557 PetscBool flg_row; 4558 4559 /* B_V = B_V - A_VR^T */ 4560 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4561 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4562 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 4563 PetscCall(MatDenseGetArray(B_V, &marray)); 4564 for (i = 0; i < n; i++) { 4565 PetscInt j; 4566 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 4567 } 4568 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4569 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4570 PetscCall(MatDestroy(&A_VR)); 4571 } 4572 4573 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4574 if (n_vertices) { 4575 PetscCall(MatDenseGetArray(B_V, &marray)); 4576 for (i = 0; i < n_vertices; i++) { 4577 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 4578 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4579 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4580 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4581 PetscCall(VecResetArray(pcbddc->vec1_R)); 4582 PetscCall(VecResetArray(pcbddc->vec2_R)); 4583 } 4584 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4585 } 4586 if (B_C) { 4587 PetscCall(MatDenseGetArray(B_C, &marray)); 4588 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 4589 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 4590 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4591 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4592 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4593 PetscCall(VecResetArray(pcbddc->vec1_R)); 4594 PetscCall(VecResetArray(pcbddc->vec2_R)); 4595 } 4596 PetscCall(MatDenseRestoreArray(B_C, &marray)); 4597 } 4598 /* coarse basis functions */ 4599 for (i = 0; i < pcbddc->local_primal_size; i++) { 4600 Vec v; 4601 4602 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 4603 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 4604 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4605 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4606 if (i < n_vertices) { 4607 PetscScalar one = 1.0; 4608 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4609 PetscCall(VecAssemblyBegin(v)); 4610 PetscCall(VecAssemblyEnd(v)); 4611 } 4612 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 4613 4614 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4615 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 4616 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4617 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4618 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 4619 } 4620 PetscCall(VecResetArray(pcbddc->vec1_R)); 4621 } 4622 PetscCall(MatDestroy(&B_V)); 4623 PetscCall(MatDestroy(&B_C)); 4624 } 4625 4626 /* free memory */ 4627 PetscCall(PetscFree(idx_V_B)); 4628 PetscCall(MatDestroy(&S_VV)); 4629 PetscCall(MatDestroy(&S_CV)); 4630 PetscCall(MatDestroy(&S_VC)); 4631 PetscCall(MatDestroy(&S_CC)); 4632 PetscCall(PetscFree(work)); 4633 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 4634 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 4635 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4636 4637 /* Checking coarse_sub_mat and coarse basis functions */ 4638 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4639 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4640 if (pcbddc->dbg_flag) { 4641 Mat coarse_sub_mat; 4642 Mat AUXMAT, TM1, TM2, TM3, TM4; 4643 Mat coarse_phi_D, coarse_phi_B; 4644 Mat coarse_psi_D, coarse_psi_B; 4645 Mat A_II, A_BB, A_IB, A_BI; 4646 Mat C_B, CPHI; 4647 IS is_dummy; 4648 Vec mones; 4649 MatType checkmattype = MATSEQAIJ; 4650 PetscReal real_value; 4651 4652 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4653 Mat A; 4654 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 4655 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 4656 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 4657 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 4658 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 4659 PetscCall(MatDestroy(&A)); 4660 } else { 4661 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 4662 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 4663 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 4664 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 4665 } 4666 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 4667 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 4668 if (!pcbddc->symmetric_primal) { 4669 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 4670 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 4671 } 4672 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat)); 4673 4674 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 4675 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 4676 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4677 if (!pcbddc->symmetric_primal) { 4678 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4679 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4680 PetscCall(MatDestroy(&AUXMAT)); 4681 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4682 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4683 PetscCall(MatDestroy(&AUXMAT)); 4684 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4685 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4686 PetscCall(MatDestroy(&AUXMAT)); 4687 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4688 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4689 PetscCall(MatDestroy(&AUXMAT)); 4690 } else { 4691 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4692 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4693 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4694 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4695 PetscCall(MatDestroy(&AUXMAT)); 4696 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4697 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4698 PetscCall(MatDestroy(&AUXMAT)); 4699 } 4700 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 4701 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 4702 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 4703 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 4704 if (pcbddc->benign_n) { 4705 Mat B0_B, B0_BPHI; 4706 const PetscScalar *data2; 4707 PetscScalar *data; 4708 PetscInt j; 4709 4710 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4711 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4712 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4713 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4714 PetscCall(MatDenseGetArray(TM1, &data)); 4715 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 4716 for (j = 0; j < pcbddc->benign_n; j++) { 4717 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4718 for (i = 0; i < pcbddc->local_primal_size; i++) { 4719 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 4720 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 4721 } 4722 } 4723 PetscCall(MatDenseRestoreArray(TM1, &data)); 4724 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 4725 PetscCall(MatDestroy(&B0_B)); 4726 PetscCall(ISDestroy(&is_dummy)); 4727 PetscCall(MatDestroy(&B0_BPHI)); 4728 } 4729 #if 0 4730 { 4731 PetscViewer viewer; 4732 char filename[256]; 4733 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 4734 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4735 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4736 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4737 PetscCall(MatView(coarse_sub_mat,viewer)); 4738 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4739 PetscCall(MatView(TM1,viewer)); 4740 if (pcbddc->coarse_phi_B) { 4741 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4742 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4743 } 4744 if (pcbddc->coarse_phi_D) { 4745 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4746 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4747 } 4748 if (pcbddc->coarse_psi_B) { 4749 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4750 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4751 } 4752 if (pcbddc->coarse_psi_D) { 4753 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4754 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4755 } 4756 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4757 PetscCall(MatView(pcbddc->local_mat,viewer)); 4758 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4759 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4760 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4761 PetscCall(ISView(pcis->is_I_local,viewer)); 4762 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4763 PetscCall(ISView(pcis->is_B_local,viewer)); 4764 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4765 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4766 PetscCall(PetscOptionsRestoreViewer(&viewer)); 4767 } 4768 #endif 4769 PetscCall(MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN)); 4770 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 4771 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4772 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4773 4774 /* check constraints */ 4775 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 4776 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4777 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4778 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4779 } else { 4780 PetscScalar *data; 4781 Mat tmat; 4782 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 4783 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 4784 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 4785 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4786 PetscCall(MatDestroy(&tmat)); 4787 } 4788 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 4789 PetscCall(VecSet(mones, -1.0)); 4790 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4791 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4792 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4793 if (!pcbddc->symmetric_primal) { 4794 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 4795 PetscCall(VecSet(mones, -1.0)); 4796 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4797 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4798 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4799 } 4800 PetscCall(MatDestroy(&C_B)); 4801 PetscCall(MatDestroy(&CPHI)); 4802 PetscCall(ISDestroy(&is_dummy)); 4803 PetscCall(VecDestroy(&mones)); 4804 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4805 PetscCall(MatDestroy(&A_II)); 4806 PetscCall(MatDestroy(&A_BB)); 4807 PetscCall(MatDestroy(&A_IB)); 4808 PetscCall(MatDestroy(&A_BI)); 4809 PetscCall(MatDestroy(&TM1)); 4810 PetscCall(MatDestroy(&TM2)); 4811 PetscCall(MatDestroy(&TM3)); 4812 PetscCall(MatDestroy(&TM4)); 4813 PetscCall(MatDestroy(&coarse_phi_D)); 4814 PetscCall(MatDestroy(&coarse_phi_B)); 4815 if (!pcbddc->symmetric_primal) { 4816 PetscCall(MatDestroy(&coarse_psi_D)); 4817 PetscCall(MatDestroy(&coarse_psi_B)); 4818 } 4819 PetscCall(MatDestroy(&coarse_sub_mat)); 4820 } 4821 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4822 { 4823 PetscBool gpu; 4824 4825 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu)); 4826 if (gpu) { 4827 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 4828 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 4829 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 4830 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 4831 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 4832 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 4833 } 4834 } 4835 /* get back data */ 4836 *coarse_submat_vals_n = coarse_submat_vals; 4837 PetscFunctionReturn(PETSC_SUCCESS); 4838 } 4839 4840 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 4841 { 4842 Mat *work_mat; 4843 IS isrow_s, iscol_s; 4844 PetscBool rsorted, csorted; 4845 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 4846 4847 PetscFunctionBegin; 4848 PetscCall(ISSorted(isrow, &rsorted)); 4849 PetscCall(ISSorted(iscol, &csorted)); 4850 PetscCall(ISGetLocalSize(isrow, &rsize)); 4851 PetscCall(ISGetLocalSize(iscol, &csize)); 4852 4853 if (!rsorted) { 4854 const PetscInt *idxs; 4855 PetscInt *idxs_sorted, i; 4856 4857 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 4858 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 4859 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 4860 PetscCall(ISGetIndices(isrow, &idxs)); 4861 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 4862 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4863 PetscCall(ISRestoreIndices(isrow, &idxs)); 4864 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 4865 } else { 4866 PetscCall(PetscObjectReference((PetscObject)isrow)); 4867 isrow_s = isrow; 4868 } 4869 4870 if (!csorted) { 4871 if (isrow == iscol) { 4872 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4873 iscol_s = isrow_s; 4874 } else { 4875 const PetscInt *idxs; 4876 PetscInt *idxs_sorted, i; 4877 4878 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 4879 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 4880 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 4881 PetscCall(ISGetIndices(iscol, &idxs)); 4882 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 4883 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4884 PetscCall(ISRestoreIndices(iscol, &idxs)); 4885 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 4886 } 4887 } else { 4888 PetscCall(PetscObjectReference((PetscObject)iscol)); 4889 iscol_s = iscol; 4890 } 4891 4892 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 4893 4894 if (!rsorted || !csorted) { 4895 Mat new_mat; 4896 IS is_perm_r, is_perm_c; 4897 4898 if (!rsorted) { 4899 PetscInt *idxs_r, i; 4900 PetscCall(PetscMalloc1(rsize, &idxs_r)); 4901 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 4902 PetscCall(PetscFree(idxs_perm_r)); 4903 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 4904 } else { 4905 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 4906 } 4907 PetscCall(ISSetPermutation(is_perm_r)); 4908 4909 if (!csorted) { 4910 if (isrow_s == iscol_s) { 4911 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 4912 is_perm_c = is_perm_r; 4913 } else { 4914 PetscInt *idxs_c, i; 4915 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 4916 PetscCall(PetscMalloc1(csize, &idxs_c)); 4917 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 4918 PetscCall(PetscFree(idxs_perm_c)); 4919 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 4920 } 4921 } else { 4922 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 4923 } 4924 PetscCall(ISSetPermutation(is_perm_c)); 4925 4926 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 4927 PetscCall(MatDestroy(&work_mat[0])); 4928 work_mat[0] = new_mat; 4929 PetscCall(ISDestroy(&is_perm_r)); 4930 PetscCall(ISDestroy(&is_perm_c)); 4931 } 4932 4933 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 4934 *B = work_mat[0]; 4935 PetscCall(MatDestroyMatrices(1, &work_mat)); 4936 PetscCall(ISDestroy(&isrow_s)); 4937 PetscCall(ISDestroy(&iscol_s)); 4938 PetscFunctionReturn(PETSC_SUCCESS); 4939 } 4940 4941 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4942 { 4943 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 4944 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4945 Mat new_mat, lA; 4946 IS is_local, is_global; 4947 PetscInt local_size; 4948 PetscBool isseqaij, issym, isset; 4949 4950 PetscFunctionBegin; 4951 PetscCall(MatDestroy(&pcbddc->local_mat)); 4952 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 4953 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 4954 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 4955 PetscCall(ISDestroy(&is_local)); 4956 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 4957 PetscCall(ISDestroy(&is_global)); 4958 4959 if (pcbddc->dbg_flag) { 4960 Vec x, x_change; 4961 PetscReal error; 4962 4963 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 4964 PetscCall(VecSetRandom(x, NULL)); 4965 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 4966 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4967 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4968 PetscCall(MatMult(new_mat, matis->x, matis->y)); 4969 if (!pcbddc->change_interior) { 4970 const PetscScalar *x, *y, *v; 4971 PetscReal lerror = 0.; 4972 PetscInt i; 4973 4974 PetscCall(VecGetArrayRead(matis->x, &x)); 4975 PetscCall(VecGetArrayRead(matis->y, &y)); 4976 PetscCall(VecGetArrayRead(matis->counter, &v)); 4977 for (i = 0; i < local_size; i++) 4978 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 4979 PetscCall(VecRestoreArrayRead(matis->x, &x)); 4980 PetscCall(VecRestoreArrayRead(matis->y, &y)); 4981 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 4982 PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 4983 if (error > PETSC_SMALL) { 4984 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4985 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 4986 } else { 4987 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 4988 } 4989 } 4990 } 4991 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4992 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4993 PetscCall(VecAXPY(x, -1.0, x_change)); 4994 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 4995 if (error > PETSC_SMALL) { 4996 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4997 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 4998 } else { 4999 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 5000 } 5001 } 5002 PetscCall(VecDestroy(&x)); 5003 PetscCall(VecDestroy(&x_change)); 5004 } 5005 5006 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5007 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5008 5009 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5010 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5011 if (isseqaij) { 5012 PetscCall(MatDestroy(&pcbddc->local_mat)); 5013 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5014 if (lA) { 5015 Mat work; 5016 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5017 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5018 PetscCall(MatDestroy(&work)); 5019 } 5020 } else { 5021 Mat work_mat; 5022 5023 PetscCall(MatDestroy(&pcbddc->local_mat)); 5024 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5025 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5026 PetscCall(MatDestroy(&work_mat)); 5027 if (lA) { 5028 Mat work; 5029 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5030 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5031 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5032 PetscCall(MatDestroy(&work)); 5033 } 5034 } 5035 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5036 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5037 PetscCall(MatDestroy(&new_mat)); 5038 PetscFunctionReturn(PETSC_SUCCESS); 5039 } 5040 5041 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5042 { 5043 PC_IS *pcis = (PC_IS *)(pc->data); 5044 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5045 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5046 PetscInt *idx_R_local = NULL; 5047 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5048 PetscInt vbs, bs; 5049 PetscBT bitmask = NULL; 5050 5051 PetscFunctionBegin; 5052 /* 5053 No need to setup local scatters if 5054 - primal space is unchanged 5055 AND 5056 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5057 AND 5058 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5059 */ 5060 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5061 /* destroy old objects */ 5062 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5063 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5064 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5065 /* Set Non-overlapping dimensions */ 5066 n_B = pcis->n_B; 5067 n_D = pcis->n - n_B; 5068 n_vertices = pcbddc->n_vertices; 5069 5070 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5071 5072 /* create auxiliary bitmask and allocate workspace */ 5073 if (!sub_schurs || !sub_schurs->reuse_solver) { 5074 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5075 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5076 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5077 5078 for (i = 0, n_R = 0; i < pcis->n; i++) { 5079 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5080 } 5081 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5082 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5083 5084 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5085 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5086 } 5087 5088 /* Block code */ 5089 vbs = 1; 5090 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5091 if (bs > 1 && !(n_vertices % bs)) { 5092 PetscBool is_blocked = PETSC_TRUE; 5093 PetscInt *vary; 5094 if (!sub_schurs || !sub_schurs->reuse_solver) { 5095 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5096 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5097 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5098 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5099 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5100 for (i = 0; i < pcis->n / bs; i++) { 5101 if (vary[i] != 0 && vary[i] != bs) { 5102 is_blocked = PETSC_FALSE; 5103 break; 5104 } 5105 } 5106 PetscCall(PetscFree(vary)); 5107 } else { 5108 /* Verify directly the R set */ 5109 for (i = 0; i < n_R / bs; i++) { 5110 PetscInt j, node = idx_R_local[bs * i]; 5111 for (j = 1; j < bs; j++) { 5112 if (node != idx_R_local[bs * i + j] - j) { 5113 is_blocked = PETSC_FALSE; 5114 break; 5115 } 5116 } 5117 } 5118 } 5119 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5120 vbs = bs; 5121 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5122 } 5123 } 5124 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5125 if (sub_schurs && sub_schurs->reuse_solver) { 5126 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5127 5128 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5129 PetscCall(ISDestroy(&reuse_solver->is_R)); 5130 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5131 reuse_solver->is_R = pcbddc->is_R_local; 5132 } else { 5133 PetscCall(PetscFree(idx_R_local)); 5134 } 5135 5136 /* print some info if requested */ 5137 if (pcbddc->dbg_flag) { 5138 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5139 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5140 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5141 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5142 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5143 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5144 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5145 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5146 } 5147 5148 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5149 if (!sub_schurs || !sub_schurs->reuse_solver) { 5150 IS is_aux1, is_aux2; 5151 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5152 5153 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5154 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5155 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5156 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5157 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5158 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5159 for (i = 0, j = 0; i < n_R; i++) { 5160 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5161 } 5162 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5163 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5164 for (i = 0, j = 0; i < n_B; i++) { 5165 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5166 } 5167 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5168 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5169 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5170 PetscCall(ISDestroy(&is_aux1)); 5171 PetscCall(ISDestroy(&is_aux2)); 5172 5173 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5174 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5175 for (i = 0, j = 0; i < n_R; i++) { 5176 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5177 } 5178 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5179 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5180 PetscCall(ISDestroy(&is_aux1)); 5181 } 5182 PetscCall(PetscBTDestroy(&bitmask)); 5183 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5184 } else { 5185 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5186 IS tis; 5187 PetscInt schur_size; 5188 5189 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5190 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5191 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5192 PetscCall(ISDestroy(&tis)); 5193 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5194 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5195 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5196 PetscCall(ISDestroy(&tis)); 5197 } 5198 } 5199 PetscFunctionReturn(PETSC_SUCCESS); 5200 } 5201 5202 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5203 { 5204 MatNullSpace NullSpace; 5205 Mat dmat; 5206 const Vec *nullvecs; 5207 Vec v, v2, *nullvecs2; 5208 VecScatter sct = NULL; 5209 PetscContainer c; 5210 PetscScalar *ddata; 5211 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5212 PetscBool nnsp_has_cnst; 5213 5214 PetscFunctionBegin; 5215 if (!is && !B) { /* MATIS */ 5216 Mat_IS *matis = (Mat_IS *)A->data; 5217 5218 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5219 sct = matis->cctx; 5220 PetscCall(PetscObjectReference((PetscObject)sct)); 5221 } else { 5222 PetscCall(MatGetNullSpace(B, &NullSpace)); 5223 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5224 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5225 } 5226 PetscCall(MatGetNullSpace(A, &NullSpace)); 5227 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5228 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5229 5230 PetscCall(MatCreateVecs(A, &v, NULL)); 5231 PetscCall(MatCreateVecs(B, &v2, NULL)); 5232 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5233 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs)); 5234 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5235 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5236 PetscCall(VecGetBlockSize(v2, &bs)); 5237 PetscCall(VecGetSize(v2, &N)); 5238 PetscCall(VecGetLocalSize(v2, &n)); 5239 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5240 for (k = 0; k < nnsp_size; k++) { 5241 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5242 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5243 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5244 } 5245 if (nnsp_has_cnst) { 5246 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5247 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5248 } 5249 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5250 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5251 5252 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5253 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c)); 5254 PetscCall(PetscContainerSetPointer(c, ddata)); 5255 PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault)); 5256 PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c)); 5257 PetscCall(PetscContainerDestroy(&c)); 5258 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5259 PetscCall(MatDestroy(&dmat)); 5260 5261 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5262 PetscCall(PetscFree(nullvecs2)); 5263 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5264 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5265 PetscCall(VecDestroy(&v)); 5266 PetscCall(VecDestroy(&v2)); 5267 PetscCall(VecScatterDestroy(&sct)); 5268 PetscFunctionReturn(PETSC_SUCCESS); 5269 } 5270 5271 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5272 { 5273 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5274 PC_IS *pcis = (PC_IS *)pc->data; 5275 PC pc_temp; 5276 Mat A_RR; 5277 MatNullSpace nnsp; 5278 MatReuse reuse; 5279 PetscScalar m_one = -1.0; 5280 PetscReal value; 5281 PetscInt n_D, n_R; 5282 PetscBool issbaij, opts, isset, issym; 5283 void (*f)(void) = NULL; 5284 char dir_prefix[256], neu_prefix[256], str_level[16]; 5285 size_t len; 5286 5287 PetscFunctionBegin; 5288 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5289 /* approximate solver, propagate NearNullSpace if needed */ 5290 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5291 MatNullSpace gnnsp1, gnnsp2; 5292 PetscBool lhas, ghas; 5293 5294 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5295 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5296 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5297 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5298 PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5299 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5300 } 5301 5302 /* compute prefixes */ 5303 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5304 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5305 if (!pcbddc->current_level) { 5306 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5307 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5308 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5309 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5310 } else { 5311 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level))); 5312 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5313 len -= 15; /* remove "pc_bddc_coarse_" */ 5314 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5315 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5316 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5317 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5318 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5319 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5320 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5321 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5322 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5323 } 5324 5325 /* DIRICHLET PROBLEM */ 5326 if (dirichlet) { 5327 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5328 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5329 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5330 if (pcbddc->dbg_flag) { 5331 Mat A_IIn; 5332 5333 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5334 PetscCall(MatDestroy(&pcis->A_II)); 5335 pcis->A_II = A_IIn; 5336 } 5337 } 5338 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5339 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5340 5341 /* Matrix for Dirichlet problem is pcis->A_II */ 5342 n_D = pcis->n - pcis->n_B; 5343 opts = PETSC_FALSE; 5344 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5345 opts = PETSC_TRUE; 5346 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5347 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 5348 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5349 /* default */ 5350 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5351 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5352 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5353 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5354 if (issbaij) { 5355 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5356 } else { 5357 PetscCall(PCSetType(pc_temp, PCLU)); 5358 } 5359 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5360 } 5361 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5362 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5363 /* Allow user's customization */ 5364 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5365 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5366 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5367 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5368 } 5369 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5370 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5371 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5372 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5373 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5374 const PetscInt *idxs; 5375 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5376 5377 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5378 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5379 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5380 for (i = 0; i < nl; i++) { 5381 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5382 } 5383 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5384 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5385 PetscCall(PetscFree(scoords)); 5386 } 5387 if (sub_schurs && sub_schurs->reuse_solver) { 5388 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5389 5390 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5391 } 5392 5393 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5394 if (!n_D) { 5395 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5396 PetscCall(PCSetType(pc_temp, PCNONE)); 5397 } 5398 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5399 /* set ksp_D into pcis data */ 5400 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5401 PetscCall(KSPDestroy(&pcis->ksp_D)); 5402 pcis->ksp_D = pcbddc->ksp_D; 5403 } 5404 5405 /* NEUMANN PROBLEM */ 5406 A_RR = NULL; 5407 if (neumann) { 5408 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5409 PetscInt ibs, mbs; 5410 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5411 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5412 5413 reuse_neumann_solver = PETSC_FALSE; 5414 if (sub_schurs && sub_schurs->reuse_solver) { 5415 IS iP; 5416 5417 reuse_neumann_solver = PETSC_TRUE; 5418 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5419 if (iP) reuse_neumann_solver = PETSC_FALSE; 5420 } 5421 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5422 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5423 if (pcbddc->ksp_R) { /* already created ksp */ 5424 PetscInt nn_R; 5425 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5426 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5427 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5428 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5429 PetscCall(KSPReset(pcbddc->ksp_R)); 5430 PetscCall(MatDestroy(&A_RR)); 5431 reuse = MAT_INITIAL_MATRIX; 5432 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5433 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5434 PetscCall(MatDestroy(&A_RR)); 5435 reuse = MAT_INITIAL_MATRIX; 5436 } else { /* safe to reuse the matrix */ 5437 reuse = MAT_REUSE_MATRIX; 5438 } 5439 } 5440 /* last check */ 5441 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5442 PetscCall(MatDestroy(&A_RR)); 5443 reuse = MAT_INITIAL_MATRIX; 5444 } 5445 } else { /* first time, so we need to create the matrix */ 5446 reuse = MAT_INITIAL_MATRIX; 5447 } 5448 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5449 TODO: Get Rid of these conversions */ 5450 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5451 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5452 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5453 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5454 if (matis->A == pcbddc->local_mat) { 5455 PetscCall(MatDestroy(&pcbddc->local_mat)); 5456 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5457 } else { 5458 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5459 } 5460 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 5461 if (matis->A == pcbddc->local_mat) { 5462 PetscCall(MatDestroy(&pcbddc->local_mat)); 5463 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5464 } else { 5465 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5466 } 5467 } 5468 /* extract A_RR */ 5469 if (reuse_neumann_solver) { 5470 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5471 5472 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5473 PetscCall(MatDestroy(&A_RR)); 5474 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5475 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 5476 } else { 5477 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 5478 } 5479 } else { 5480 PetscCall(MatDestroy(&A_RR)); 5481 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 5482 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5483 } 5484 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5485 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 5486 } 5487 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5488 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 5489 opts = PETSC_FALSE; 5490 if (!pcbddc->ksp_R) { /* create object if not present */ 5491 opts = PETSC_TRUE; 5492 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 5493 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 5494 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 5495 /* default */ 5496 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 5497 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 5498 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5499 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 5500 if (issbaij) { 5501 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5502 } else { 5503 PetscCall(PCSetType(pc_temp, PCLU)); 5504 } 5505 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 5506 } 5507 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 5508 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 5509 if (opts) { /* Allow user's customization once */ 5510 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5511 } 5512 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5513 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5514 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 5515 } 5516 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5517 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5518 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5519 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5520 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5521 const PetscInt *idxs; 5522 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5523 5524 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 5525 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 5526 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5527 for (i = 0; i < nl; i++) { 5528 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5529 } 5530 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 5531 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5532 PetscCall(PetscFree(scoords)); 5533 } 5534 5535 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5536 if (!n_R) { 5537 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5538 PetscCall(PCSetType(pc_temp, PCNONE)); 5539 } 5540 /* Reuse solver if it is present */ 5541 if (reuse_neumann_solver) { 5542 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5543 5544 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 5545 } 5546 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5547 } 5548 5549 if (pcbddc->dbg_flag) { 5550 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5551 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5552 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5553 } 5554 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5555 5556 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5557 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 5558 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 5559 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 5560 /* check Dirichlet and Neumann solvers */ 5561 if (pcbddc->dbg_flag) { 5562 if (dirichlet) { /* Dirichlet */ 5563 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 5564 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 5565 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 5566 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 5567 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 5568 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 5569 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_D))->prefix, (double)value)); 5570 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5571 } 5572 if (neumann) { /* Neumann */ 5573 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 5574 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 5575 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 5576 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5577 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 5578 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 5579 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_R))->prefix, (double)value)); 5580 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5581 } 5582 } 5583 /* free Neumann problem's matrix */ 5584 PetscCall(MatDestroy(&A_RR)); 5585 PetscFunctionReturn(PETSC_SUCCESS); 5586 } 5587 5588 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5589 { 5590 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5591 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5592 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5593 5594 PetscFunctionBegin; 5595 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 5596 if (!pcbddc->switch_static) { 5597 if (applytranspose && pcbddc->local_auxmat1) { 5598 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 5599 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5600 } 5601 if (!reuse_solver) { 5602 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5603 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5604 } else { 5605 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5606 5607 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5608 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5609 } 5610 } else { 5611 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5612 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5613 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5614 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5615 if (applytranspose && pcbddc->local_auxmat1) { 5616 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 5617 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5618 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5619 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5620 } 5621 } 5622 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5623 if (!reuse_solver || pcbddc->switch_static) { 5624 if (applytranspose) { 5625 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5626 } else { 5627 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5628 } 5629 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 5630 } else { 5631 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5632 5633 if (applytranspose) { 5634 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5635 } else { 5636 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5637 } 5638 } 5639 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5640 PetscCall(VecSet(inout_B, 0.)); 5641 if (!pcbddc->switch_static) { 5642 if (!reuse_solver) { 5643 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5644 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5645 } else { 5646 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5647 5648 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5649 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5650 } 5651 if (!applytranspose && pcbddc->local_auxmat1) { 5652 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5653 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 5654 } 5655 } else { 5656 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5657 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5658 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5659 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5660 if (!applytranspose && pcbddc->local_auxmat1) { 5661 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5662 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 5663 } 5664 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5665 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5666 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5667 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5668 } 5669 PetscFunctionReturn(PETSC_SUCCESS); 5670 } 5671 5672 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5673 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5674 { 5675 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5676 PC_IS *pcis = (PC_IS *)(pc->data); 5677 const PetscScalar zero = 0.0; 5678 5679 PetscFunctionBegin; 5680 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5681 if (!pcbddc->benign_apply_coarse_only) { 5682 if (applytranspose) { 5683 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 5684 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5685 } else { 5686 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 5687 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5688 } 5689 } else { 5690 PetscCall(VecSet(pcbddc->vec1_P, zero)); 5691 } 5692 5693 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5694 if (pcbddc->benign_n) { 5695 PetscScalar *array; 5696 PetscInt j; 5697 5698 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5699 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 5700 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5701 } 5702 5703 /* start communications from local primal nodes to rhs of coarse solver */ 5704 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 5705 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 5706 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 5707 5708 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5709 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5710 if (pcbddc->coarse_ksp) { 5711 Mat coarse_mat; 5712 Vec rhs, sol; 5713 MatNullSpace nullsp; 5714 PetscBool isbddc = PETSC_FALSE; 5715 5716 if (pcbddc->benign_have_null) { 5717 PC coarse_pc; 5718 5719 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5720 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 5721 /* we need to propagate to coarser levels the need for a possible benign correction */ 5722 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5723 PC_BDDC *coarsepcbddc = (PC_BDDC *)(coarse_pc->data); 5724 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5725 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5726 } 5727 } 5728 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 5729 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 5730 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 5731 if (applytranspose) { 5732 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 5733 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 5734 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5735 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 5736 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5737 } else { 5738 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 5739 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5740 PC coarse_pc; 5741 5742 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 5743 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5744 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 5745 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 5746 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 5747 } else { 5748 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 5749 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5750 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5751 } 5752 } 5753 /* we don't need the benign correction at coarser levels anymore */ 5754 if (pcbddc->benign_have_null && isbddc) { 5755 PC coarse_pc; 5756 PC_BDDC *coarsepcbddc; 5757 5758 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5759 coarsepcbddc = (PC_BDDC *)(coarse_pc->data); 5760 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5761 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5762 } 5763 } 5764 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5765 5766 /* Local solution on R nodes */ 5767 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 5768 /* communications from coarse sol to local primal nodes */ 5769 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 5770 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 5771 5772 /* Sum contributions from the two levels */ 5773 if (!pcbddc->benign_apply_coarse_only) { 5774 if (applytranspose) { 5775 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5776 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5777 } else { 5778 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5779 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5780 } 5781 /* store p0 */ 5782 if (pcbddc->benign_n) { 5783 PetscScalar *array; 5784 PetscInt j; 5785 5786 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5787 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 5788 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5789 } 5790 } else { /* expand the coarse solution */ 5791 if (applytranspose) { 5792 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 5793 } else { 5794 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 5795 } 5796 } 5797 PetscFunctionReturn(PETSC_SUCCESS); 5798 } 5799 5800 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 5801 { 5802 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5803 Vec from, to; 5804 const PetscScalar *array; 5805 5806 PetscFunctionBegin; 5807 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5808 from = pcbddc->coarse_vec; 5809 to = pcbddc->vec1_P; 5810 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5811 Vec tvec; 5812 5813 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5814 PetscCall(VecResetArray(tvec)); 5815 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 5816 PetscCall(VecGetArrayRead(tvec, &array)); 5817 PetscCall(VecPlaceArray(from, array)); 5818 PetscCall(VecRestoreArrayRead(tvec, &array)); 5819 } 5820 } else { /* from local to global -> put data in coarse right hand side */ 5821 from = pcbddc->vec1_P; 5822 to = pcbddc->coarse_vec; 5823 } 5824 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5825 PetscFunctionReturn(PETSC_SUCCESS); 5826 } 5827 5828 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5829 { 5830 PC_BDDC *pcbddc = (PC_BDDC *)(pc->data); 5831 Vec from, to; 5832 const PetscScalar *array; 5833 5834 PetscFunctionBegin; 5835 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5836 from = pcbddc->coarse_vec; 5837 to = pcbddc->vec1_P; 5838 } else { /* from local to global -> put data in coarse right hand side */ 5839 from = pcbddc->vec1_P; 5840 to = pcbddc->coarse_vec; 5841 } 5842 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5843 if (smode == SCATTER_FORWARD) { 5844 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5845 Vec tvec; 5846 5847 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5848 PetscCall(VecGetArrayRead(to, &array)); 5849 PetscCall(VecPlaceArray(tvec, array)); 5850 PetscCall(VecRestoreArrayRead(to, &array)); 5851 } 5852 } else { 5853 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5854 PetscCall(VecResetArray(from)); 5855 } 5856 } 5857 PetscFunctionReturn(PETSC_SUCCESS); 5858 } 5859 5860 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5861 { 5862 PC_IS *pcis = (PC_IS *)(pc->data); 5863 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5864 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5865 /* one and zero */ 5866 PetscScalar one = 1.0, zero = 0.0; 5867 /* space to store constraints and their local indices */ 5868 PetscScalar *constraints_data; 5869 PetscInt *constraints_idxs, *constraints_idxs_B; 5870 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 5871 PetscInt *constraints_n; 5872 /* iterators */ 5873 PetscInt i, j, k, total_counts, total_counts_cc, cum; 5874 /* BLAS integers */ 5875 PetscBLASInt lwork, lierr; 5876 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 5877 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 5878 /* reuse */ 5879 PetscInt olocal_primal_size, olocal_primal_size_cc; 5880 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 5881 /* change of basis */ 5882 PetscBool qr_needed; 5883 PetscBT change_basis, qr_needed_idx; 5884 /* auxiliary stuff */ 5885 PetscInt *nnz, *is_indices; 5886 PetscInt ncc; 5887 /* some quantities */ 5888 PetscInt n_vertices, total_primal_vertices, valid_constraints; 5889 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 5890 PetscReal tol; /* tolerance for retaining eigenmodes */ 5891 5892 PetscFunctionBegin; 5893 tol = PetscSqrtReal(PETSC_SMALL); 5894 /* Destroy Mat objects computed previously */ 5895 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 5896 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 5897 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 5898 /* save info on constraints from previous setup (if any) */ 5899 olocal_primal_size = pcbddc->local_primal_size; 5900 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5901 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 5902 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 5903 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 5904 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 5905 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 5906 5907 if (!pcbddc->adaptive_selection) { 5908 IS ISForVertices, *ISForFaces, *ISForEdges; 5909 MatNullSpace nearnullsp; 5910 const Vec *nearnullvecs; 5911 Vec *localnearnullsp; 5912 PetscScalar *array; 5913 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 5914 PetscBool nnsp_has_cnst; 5915 /* LAPACK working arrays for SVD or POD */ 5916 PetscBool skip_lapack, boolforchange; 5917 PetscScalar *work; 5918 PetscReal *singular_vals; 5919 #if defined(PETSC_USE_COMPLEX) 5920 PetscReal *rwork; 5921 #endif 5922 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 5923 PetscBLASInt dummy_int = 1; 5924 PetscScalar dummy_scalar = 1.; 5925 PetscBool use_pod = PETSC_FALSE; 5926 5927 /* MKL SVD with same input gives different results on different processes! */ 5928 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 5929 use_pod = PETSC_TRUE; 5930 #endif 5931 /* Get index sets for faces, edges and vertices from graph */ 5932 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 5933 o_nf = n_ISForFaces; 5934 o_ne = n_ISForEdges; 5935 n_vertices = 0; 5936 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 5937 /* print some info */ 5938 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5939 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 5940 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 5941 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5942 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 5943 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 5944 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 5945 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 5946 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5947 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 5948 } 5949 5950 if (!pcbddc->use_vertices) n_vertices = 0; 5951 if (!pcbddc->use_edges) n_ISForEdges = 0; 5952 if (!pcbddc->use_faces) n_ISForFaces = 0; 5953 5954 /* check if near null space is attached to global mat */ 5955 if (pcbddc->use_nnsp) { 5956 PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 5957 } else nearnullsp = NULL; 5958 5959 if (nearnullsp) { 5960 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 5961 /* remove any stored info */ 5962 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 5963 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 5964 /* store information for BDDC solver reuse */ 5965 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 5966 pcbddc->onearnullspace = nearnullsp; 5967 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 5968 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 5969 } else { /* if near null space is not provided BDDC uses constants by default */ 5970 nnsp_size = 0; 5971 nnsp_has_cnst = PETSC_TRUE; 5972 } 5973 /* get max number of constraints on a single cc */ 5974 max_constraints = nnsp_size; 5975 if (nnsp_has_cnst) max_constraints++; 5976 5977 /* 5978 Evaluate maximum storage size needed by the procedure 5979 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5980 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5981 There can be multiple constraints per connected component 5982 */ 5983 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 5984 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 5985 5986 total_counts = n_ISForFaces + n_ISForEdges; 5987 total_counts *= max_constraints; 5988 total_counts += n_vertices; 5989 PetscCall(PetscBTCreate(total_counts, &change_basis)); 5990 5991 total_counts = 0; 5992 max_size_of_constraint = 0; 5993 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 5994 IS used_is; 5995 if (i < n_ISForEdges) { 5996 used_is = ISForEdges[i]; 5997 } else { 5998 used_is = ISForFaces[i - n_ISForEdges]; 5999 } 6000 PetscCall(ISGetSize(used_is, &j)); 6001 total_counts += j; 6002 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6003 } 6004 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6005 6006 /* get local part of global near null space vectors */ 6007 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6008 for (k = 0; k < nnsp_size; k++) { 6009 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6010 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6011 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6012 } 6013 6014 /* whether or not to skip lapack calls */ 6015 skip_lapack = PETSC_TRUE; 6016 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6017 6018 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6019 if (!skip_lapack) { 6020 PetscScalar temp_work; 6021 6022 if (use_pod) { 6023 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6024 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6025 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6026 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6027 #if defined(PETSC_USE_COMPLEX) 6028 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6029 #endif 6030 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6031 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6032 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6033 lwork = -1; 6034 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6035 #if !defined(PETSC_USE_COMPLEX) 6036 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6037 #else 6038 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6039 #endif 6040 PetscCall(PetscFPTrapPop()); 6041 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr); 6042 } else { 6043 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6044 /* SVD */ 6045 PetscInt max_n, min_n; 6046 max_n = max_size_of_constraint; 6047 min_n = max_constraints; 6048 if (max_size_of_constraint < max_constraints) { 6049 min_n = max_size_of_constraint; 6050 max_n = max_constraints; 6051 } 6052 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6053 #if defined(PETSC_USE_COMPLEX) 6054 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6055 #endif 6056 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6057 lwork = -1; 6058 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6059 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6060 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6061 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6062 #if !defined(PETSC_USE_COMPLEX) 6063 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6064 #else 6065 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6066 #endif 6067 PetscCall(PetscFPTrapPop()); 6068 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr); 6069 #else 6070 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6071 #endif /* on missing GESVD */ 6072 } 6073 /* Allocate optimal workspace */ 6074 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6075 PetscCall(PetscMalloc1(lwork, &work)); 6076 } 6077 /* Now we can loop on constraining sets */ 6078 total_counts = 0; 6079 constraints_idxs_ptr[0] = 0; 6080 constraints_data_ptr[0] = 0; 6081 /* vertices */ 6082 if (n_vertices) { 6083 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6084 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6085 for (i = 0; i < n_vertices; i++) { 6086 constraints_n[total_counts] = 1; 6087 constraints_data[total_counts] = 1.0; 6088 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6089 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6090 total_counts++; 6091 } 6092 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6093 } 6094 6095 /* edges and faces */ 6096 total_counts_cc = total_counts; 6097 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6098 IS used_is; 6099 PetscBool idxs_copied = PETSC_FALSE; 6100 6101 if (ncc < n_ISForEdges) { 6102 used_is = ISForEdges[ncc]; 6103 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6104 } else { 6105 used_is = ISForFaces[ncc - n_ISForEdges]; 6106 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6107 } 6108 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6109 6110 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6111 if (!size_of_constraint) continue; 6112 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6113 /* change of basis should not be performed on local periodic nodes */ 6114 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6115 if (nnsp_has_cnst) { 6116 PetscScalar quad_value; 6117 6118 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6119 idxs_copied = PETSC_TRUE; 6120 6121 if (!pcbddc->use_nnsp_true) { 6122 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6123 } else { 6124 quad_value = 1.0; 6125 } 6126 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6127 temp_constraints++; 6128 total_counts++; 6129 } 6130 for (k = 0; k < nnsp_size; k++) { 6131 PetscReal real_value; 6132 PetscScalar *ptr_to_data; 6133 6134 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6135 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6136 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6137 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6138 /* check if array is null on the connected component */ 6139 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6140 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6141 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6142 temp_constraints++; 6143 total_counts++; 6144 if (!idxs_copied) { 6145 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6146 idxs_copied = PETSC_TRUE; 6147 } 6148 } 6149 } 6150 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6151 valid_constraints = temp_constraints; 6152 if (!pcbddc->use_nnsp_true && temp_constraints) { 6153 if (temp_constraints == 1) { /* just normalize the constraint */ 6154 PetscScalar norm, *ptr_to_data; 6155 6156 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6157 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6158 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6159 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6160 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6161 } else { /* perform SVD */ 6162 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6163 6164 if (use_pod) { 6165 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6166 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6167 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6168 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6169 from that computed using LAPACKgesvd 6170 -> This is due to a different computation of eigenvectors in LAPACKheev 6171 -> The quality of the POD-computed basis will be the same */ 6172 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6173 /* Store upper triangular part of correlation matrix */ 6174 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6175 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6176 for (j = 0; j < temp_constraints; j++) { 6177 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6178 } 6179 /* compute eigenvalues and eigenvectors of correlation matrix */ 6180 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6181 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6182 #if !defined(PETSC_USE_COMPLEX) 6183 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6184 #else 6185 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6186 #endif 6187 PetscCall(PetscFPTrapPop()); 6188 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr); 6189 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6190 j = 0; 6191 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6192 total_counts = total_counts - j; 6193 valid_constraints = temp_constraints - j; 6194 /* scale and copy POD basis into used quadrature memory */ 6195 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6196 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6197 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6198 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6199 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6200 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6201 if (j < temp_constraints) { 6202 PetscInt ii; 6203 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6204 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6205 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6206 PetscCall(PetscFPTrapPop()); 6207 for (k = 0; k < temp_constraints - j; k++) { 6208 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6209 } 6210 } 6211 } else { 6212 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6213 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6214 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6215 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6216 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6217 #if !defined(PETSC_USE_COMPLEX) 6218 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6219 #else 6220 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6221 #endif 6222 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 6223 PetscCall(PetscFPTrapPop()); 6224 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6225 k = temp_constraints; 6226 if (k > size_of_constraint) k = size_of_constraint; 6227 j = 0; 6228 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6229 valid_constraints = k - j; 6230 total_counts = total_counts - temp_constraints + valid_constraints; 6231 #else 6232 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6233 #endif /* on missing GESVD */ 6234 } 6235 } 6236 } 6237 /* update pointers information */ 6238 if (valid_constraints) { 6239 constraints_n[total_counts_cc] = valid_constraints; 6240 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6241 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6242 /* set change_of_basis flag */ 6243 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6244 total_counts_cc++; 6245 } 6246 } 6247 /* free workspace */ 6248 if (!skip_lapack) { 6249 PetscCall(PetscFree(work)); 6250 #if defined(PETSC_USE_COMPLEX) 6251 PetscCall(PetscFree(rwork)); 6252 #endif 6253 PetscCall(PetscFree(singular_vals)); 6254 PetscCall(PetscFree(correlation_mat)); 6255 PetscCall(PetscFree(temp_basis)); 6256 } 6257 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6258 PetscCall(PetscFree(localnearnullsp)); 6259 /* free index sets of faces, edges and vertices */ 6260 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6261 } else { 6262 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6263 6264 total_counts = 0; 6265 n_vertices = 0; 6266 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6267 max_constraints = 0; 6268 total_counts_cc = 0; 6269 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6270 total_counts += pcbddc->adaptive_constraints_n[i]; 6271 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6272 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6273 } 6274 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6275 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6276 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6277 constraints_data = pcbddc->adaptive_constraints_data; 6278 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6279 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6280 total_counts_cc = 0; 6281 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6282 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6283 } 6284 6285 max_size_of_constraint = 0; 6286 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6287 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6288 /* Change of basis */ 6289 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6290 if (pcbddc->use_change_of_basis) { 6291 for (i = 0; i < sub_schurs->n_subs; i++) { 6292 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6293 } 6294 } 6295 } 6296 pcbddc->local_primal_size = total_counts; 6297 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6298 6299 /* map constraints_idxs in boundary numbering */ 6300 if (pcbddc->use_change_of_basis) { 6301 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6302 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6303 } 6304 6305 /* Create constraint matrix */ 6306 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6307 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6308 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6309 6310 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6311 /* determine if a QR strategy is needed for change of basis */ 6312 qr_needed = pcbddc->use_qr_single; 6313 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6314 total_primal_vertices = 0; 6315 pcbddc->local_primal_size_cc = 0; 6316 for (i = 0; i < total_counts_cc; i++) { 6317 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6318 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6319 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6320 pcbddc->local_primal_size_cc += 1; 6321 } else if (PetscBTLookup(change_basis, i)) { 6322 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6323 pcbddc->local_primal_size_cc += constraints_n[i]; 6324 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6325 PetscCall(PetscBTSet(qr_needed_idx, i)); 6326 qr_needed = PETSC_TRUE; 6327 } 6328 } else { 6329 pcbddc->local_primal_size_cc += 1; 6330 } 6331 } 6332 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6333 pcbddc->n_vertices = total_primal_vertices; 6334 /* permute indices in order to have a sorted set of vertices */ 6335 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6336 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6337 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6338 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6339 6340 /* nonzero structure of constraint matrix */ 6341 /* and get reference dof for local constraints */ 6342 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6343 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6344 6345 j = total_primal_vertices; 6346 total_counts = total_primal_vertices; 6347 cum = total_primal_vertices; 6348 for (i = n_vertices; i < total_counts_cc; i++) { 6349 if (!PetscBTLookup(change_basis, i)) { 6350 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6351 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6352 cum++; 6353 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6354 for (k = 0; k < constraints_n[i]; k++) { 6355 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6356 nnz[j + k] = size_of_constraint; 6357 } 6358 j += constraints_n[i]; 6359 } 6360 } 6361 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6362 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6363 PetscCall(PetscFree(nnz)); 6364 6365 /* set values in constraint matrix */ 6366 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6367 total_counts = total_primal_vertices; 6368 for (i = n_vertices; i < total_counts_cc; i++) { 6369 if (!PetscBTLookup(change_basis, i)) { 6370 PetscInt *cols; 6371 6372 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6373 cols = constraints_idxs + constraints_idxs_ptr[i]; 6374 for (k = 0; k < constraints_n[i]; k++) { 6375 PetscInt row = total_counts + k; 6376 PetscScalar *vals; 6377 6378 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6379 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6380 } 6381 total_counts += constraints_n[i]; 6382 } 6383 } 6384 /* assembling */ 6385 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6386 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6387 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6388 6389 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6390 if (pcbddc->use_change_of_basis) { 6391 /* dual and primal dofs on a single cc */ 6392 PetscInt dual_dofs, primal_dofs; 6393 /* working stuff for GEQRF */ 6394 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6395 PetscBLASInt lqr_work; 6396 /* working stuff for UNGQR */ 6397 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6398 PetscBLASInt lgqr_work; 6399 /* working stuff for TRTRS */ 6400 PetscScalar *trs_rhs = NULL; 6401 PetscBLASInt Blas_NRHS; 6402 /* pointers for values insertion into change of basis matrix */ 6403 PetscInt *start_rows, *start_cols; 6404 PetscScalar *start_vals; 6405 /* working stuff for values insertion */ 6406 PetscBT is_primal; 6407 PetscInt *aux_primal_numbering_B; 6408 /* matrix sizes */ 6409 PetscInt global_size, local_size; 6410 /* temporary change of basis */ 6411 Mat localChangeOfBasisMatrix; 6412 /* extra space for debugging */ 6413 PetscScalar *dbg_work = NULL; 6414 6415 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6416 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6417 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6418 /* nonzeros for local mat */ 6419 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6420 if (!pcbddc->benign_change || pcbddc->fake_change) { 6421 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6422 } else { 6423 const PetscInt *ii; 6424 PetscInt n; 6425 PetscBool flg_row; 6426 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6427 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6428 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6429 } 6430 for (i = n_vertices; i < total_counts_cc; i++) { 6431 if (PetscBTLookup(change_basis, i)) { 6432 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6433 if (PetscBTLookup(qr_needed_idx, i)) { 6434 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6435 } else { 6436 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6437 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6438 } 6439 } 6440 } 6441 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6442 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6443 PetscCall(PetscFree(nnz)); 6444 /* Set interior change in the matrix */ 6445 if (!pcbddc->benign_change || pcbddc->fake_change) { 6446 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 6447 } else { 6448 const PetscInt *ii, *jj; 6449 PetscScalar *aa; 6450 PetscInt n; 6451 PetscBool flg_row; 6452 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6453 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6454 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 6455 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6456 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6457 } 6458 6459 if (pcbddc->dbg_flag) { 6460 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6461 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 6462 } 6463 6464 /* Now we loop on the constraints which need a change of basis */ 6465 /* 6466 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6467 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6468 6469 Basic blocks of change of basis matrix T computed: 6470 6471 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6472 6473 | 1 0 ... 0 s_1/S | 6474 | 0 1 ... 0 s_2/S | 6475 | ... | 6476 | 0 ... 1 s_{n-1}/S | 6477 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6478 6479 with S = \sum_{i=1}^n s_i^2 6480 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6481 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6482 6483 - QR decomposition of constraints otherwise 6484 */ 6485 if (qr_needed && max_size_of_constraint) { 6486 /* space to store Q */ 6487 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 6488 /* array to store scaling factors for reflectors */ 6489 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 6490 /* first we issue queries for optimal work */ 6491 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6492 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6493 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6494 lqr_work = -1; 6495 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 6496 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr); 6497 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 6498 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 6499 lgqr_work = -1; 6500 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6501 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 6502 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 6503 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6504 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 6505 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 6506 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr); 6507 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 6508 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 6509 /* array to store rhs and solution of triangular solver */ 6510 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 6511 /* allocating workspace for check */ 6512 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 6513 } 6514 /* array to store whether a node is primal or not */ 6515 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 6516 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 6517 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 6518 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 6519 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 6520 PetscCall(PetscFree(aux_primal_numbering_B)); 6521 6522 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6523 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 6524 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 6525 if (PetscBTLookup(change_basis, total_counts)) { 6526 /* get constraint info */ 6527 primal_dofs = constraints_n[total_counts]; 6528 dual_dofs = size_of_constraint - primal_dofs; 6529 6530 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 6531 6532 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 6533 6534 /* copy quadrature constraints for change of basis check */ 6535 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6536 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6537 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6538 6539 /* compute QR decomposition of constraints */ 6540 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6541 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6542 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6543 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6544 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 6545 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr); 6546 PetscCall(PetscFPTrapPop()); 6547 6548 /* explicitly compute R^-T */ 6549 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 6550 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 6551 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6552 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 6553 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6554 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6555 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6556 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 6557 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr); 6558 PetscCall(PetscFPTrapPop()); 6559 6560 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6561 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6562 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6563 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6564 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6565 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6566 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 6567 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr); 6568 PetscCall(PetscFPTrapPop()); 6569 6570 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6571 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6572 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6573 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6574 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6575 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6576 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6577 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6578 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6579 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6580 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 6581 PetscCall(PetscFPTrapPop()); 6582 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6583 6584 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6585 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6586 /* insert cols for primal dofs */ 6587 for (j = 0; j < primal_dofs; j++) { 6588 start_vals = &qr_basis[j * size_of_constraint]; 6589 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6590 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6591 } 6592 /* insert cols for dual dofs */ 6593 for (j = 0, k = 0; j < dual_dofs; k++) { 6594 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 6595 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 6596 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6597 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6598 j++; 6599 } 6600 } 6601 6602 /* check change of basis */ 6603 if (pcbddc->dbg_flag) { 6604 PetscInt ii, jj; 6605 PetscBool valid_qr = PETSC_TRUE; 6606 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 6607 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6608 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 6609 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6610 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 6611 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 6612 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6613 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 6614 PetscCall(PetscFPTrapPop()); 6615 for (jj = 0; jj < size_of_constraint; jj++) { 6616 for (ii = 0; ii < primal_dofs; ii++) { 6617 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6618 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6619 } 6620 } 6621 if (!valid_qr) { 6622 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 6623 for (jj = 0; jj < size_of_constraint; jj++) { 6624 for (ii = 0; ii < primal_dofs; ii++) { 6625 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 6626 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6627 } 6628 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 6629 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6630 } 6631 } 6632 } 6633 } else { 6634 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 6635 } 6636 } 6637 } else { /* simple transformation block */ 6638 PetscInt row, col; 6639 PetscScalar val, norm; 6640 6641 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6642 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 6643 for (j = 0; j < size_of_constraint; j++) { 6644 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 6645 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6646 if (!PetscBTLookup(is_primal, row_B)) { 6647 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6648 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 6649 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 6650 } else { 6651 for (k = 0; k < size_of_constraint; k++) { 6652 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6653 if (row != col) { 6654 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 6655 } else { 6656 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 6657 } 6658 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 6659 } 6660 } 6661 } 6662 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 6663 } 6664 } else { 6665 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 6666 } 6667 } 6668 6669 /* free workspace */ 6670 if (qr_needed) { 6671 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 6672 PetscCall(PetscFree(trs_rhs)); 6673 PetscCall(PetscFree(qr_tau)); 6674 PetscCall(PetscFree(qr_work)); 6675 PetscCall(PetscFree(gqr_work)); 6676 PetscCall(PetscFree(qr_basis)); 6677 } 6678 PetscCall(PetscBTDestroy(&is_primal)); 6679 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6680 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6681 6682 /* assembling of global change of variable */ 6683 if (!pcbddc->fake_change) { 6684 Mat tmat; 6685 PetscInt bs; 6686 6687 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 6688 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 6689 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 6690 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 6691 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 6692 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 6693 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 6694 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 6695 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 6696 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 6697 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 6698 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 6699 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 6700 PetscCall(MatDestroy(&tmat)); 6701 PetscCall(VecSet(pcis->vec1_global, 0.0)); 6702 PetscCall(VecSet(pcis->vec1_N, 1.0)); 6703 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6704 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6705 PetscCall(VecReciprocal(pcis->vec1_global)); 6706 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 6707 6708 /* check */ 6709 if (pcbddc->dbg_flag) { 6710 PetscReal error; 6711 Vec x, x_change; 6712 6713 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 6714 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 6715 PetscCall(VecSetRandom(x, NULL)); 6716 PetscCall(VecCopy(x, pcis->vec1_global)); 6717 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6718 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6719 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 6720 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6721 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6722 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 6723 PetscCall(VecAXPY(x, -1.0, x_change)); 6724 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 6725 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 6726 PetscCall(VecDestroy(&x)); 6727 PetscCall(VecDestroy(&x_change)); 6728 } 6729 /* adapt sub_schurs computed (if any) */ 6730 if (pcbddc->use_deluxe_scaling) { 6731 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6732 6733 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6734 if (sub_schurs && sub_schurs->S_Ej_all) { 6735 Mat S_new, tmat; 6736 IS is_all_N, is_V_Sall = NULL; 6737 6738 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 6739 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 6740 if (pcbddc->deluxe_zerorows) { 6741 ISLocalToGlobalMapping NtoSall; 6742 IS is_V; 6743 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 6744 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 6745 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 6746 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6747 PetscCall(ISDestroy(&is_V)); 6748 } 6749 PetscCall(ISDestroy(&is_all_N)); 6750 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6751 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6752 PetscCall(PetscObjectReference((PetscObject)S_new)); 6753 if (pcbddc->deluxe_zerorows) { 6754 const PetscScalar *array; 6755 const PetscInt *idxs_V, *idxs_all; 6756 PetscInt i, n_V; 6757 6758 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6759 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 6760 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 6761 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 6762 PetscCall(VecGetArrayRead(pcis->D, &array)); 6763 for (i = 0; i < n_V; i++) { 6764 PetscScalar val; 6765 PetscInt idx; 6766 6767 idx = idxs_V[i]; 6768 val = array[idxs_all[idxs_V[i]]]; 6769 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 6770 } 6771 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 6772 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 6773 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 6774 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 6775 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 6776 } 6777 sub_schurs->S_Ej_all = S_new; 6778 PetscCall(MatDestroy(&S_new)); 6779 if (sub_schurs->sum_S_Ej_all) { 6780 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6781 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 6782 PetscCall(PetscObjectReference((PetscObject)S_new)); 6783 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6784 sub_schurs->sum_S_Ej_all = S_new; 6785 PetscCall(MatDestroy(&S_new)); 6786 } 6787 PetscCall(ISDestroy(&is_V_Sall)); 6788 PetscCall(MatDestroy(&tmat)); 6789 } 6790 /* destroy any change of basis context in sub_schurs */ 6791 if (sub_schurs && sub_schurs->change) { 6792 PetscInt i; 6793 6794 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 6795 PetscCall(PetscFree(sub_schurs->change)); 6796 } 6797 } 6798 if (pcbddc->switch_static) { /* need to save the local change */ 6799 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6800 } else { 6801 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 6802 } 6803 /* determine if any process has changed the pressures locally */ 6804 pcbddc->change_interior = pcbddc->benign_have_null; 6805 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6806 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6807 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6808 pcbddc->use_qr_single = qr_needed; 6809 } 6810 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6811 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6812 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 6813 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6814 } else { 6815 Mat benign_global = NULL; 6816 if (pcbddc->benign_have_null) { 6817 Mat M; 6818 6819 pcbddc->change_interior = PETSC_TRUE; 6820 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 6821 PetscCall(VecReciprocal(pcis->vec1_N)); 6822 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 6823 if (pcbddc->benign_change) { 6824 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 6825 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 6826 } else { 6827 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 6828 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 6829 } 6830 PetscCall(MatISSetLocalMat(benign_global, M)); 6831 PetscCall(MatDestroy(&M)); 6832 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 6833 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 6834 } 6835 if (pcbddc->user_ChangeOfBasisMatrix) { 6836 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix)); 6837 PetscCall(MatDestroy(&benign_global)); 6838 } else if (pcbddc->benign_have_null) { 6839 pcbddc->ChangeOfBasisMatrix = benign_global; 6840 } 6841 } 6842 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6843 IS is_global; 6844 const PetscInt *gidxs; 6845 6846 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 6847 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 6848 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 6849 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 6850 PetscCall(ISDestroy(&is_global)); 6851 } 6852 } 6853 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 6854 6855 if (!pcbddc->fake_change) { 6856 /* add pressure dofs to set of primal nodes for numbering purposes */ 6857 for (i = 0; i < pcbddc->benign_n; i++) { 6858 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6859 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6860 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6861 pcbddc->local_primal_size_cc++; 6862 pcbddc->local_primal_size++; 6863 } 6864 6865 /* check if a new primal space has been introduced (also take into account benign trick) */ 6866 pcbddc->new_primal_space_local = PETSC_TRUE; 6867 if (olocal_primal_size == pcbddc->local_primal_size) { 6868 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6869 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6870 if (!pcbddc->new_primal_space_local) { 6871 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6872 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6873 } 6874 } 6875 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6876 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 6877 } 6878 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 6879 6880 /* flush dbg viewer */ 6881 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6882 6883 /* free workspace */ 6884 PetscCall(PetscBTDestroy(&qr_needed_idx)); 6885 PetscCall(PetscBTDestroy(&change_basis)); 6886 if (!pcbddc->adaptive_selection) { 6887 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 6888 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 6889 } else { 6890 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 6891 PetscCall(PetscFree(constraints_n)); 6892 PetscCall(PetscFree(constraints_idxs_B)); 6893 } 6894 PetscFunctionReturn(PETSC_SUCCESS); 6895 } 6896 6897 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6898 { 6899 ISLocalToGlobalMapping map; 6900 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6901 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6902 PetscInt i, N; 6903 PetscBool rcsr = PETSC_FALSE; 6904 6905 PetscFunctionBegin; 6906 if (pcbddc->recompute_topography) { 6907 pcbddc->graphanalyzed = PETSC_FALSE; 6908 /* Reset previously computed graph */ 6909 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 6910 /* Init local Graph struct */ 6911 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 6912 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 6913 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 6914 6915 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 6916 /* Check validity of the csr graph passed in by the user */ 6917 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 6918 pcbddc->mat_graph->nvtxs); 6919 6920 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6921 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6922 PetscInt *xadj, *adjncy; 6923 PetscInt nvtxs; 6924 PetscBool flg_row = PETSC_FALSE; 6925 6926 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6927 if (flg_row) { 6928 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 6929 pcbddc->computed_rowadj = PETSC_TRUE; 6930 } 6931 PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6932 rcsr = PETSC_TRUE; 6933 } 6934 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6935 6936 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6937 PetscReal *lcoords; 6938 PetscInt n; 6939 MPI_Datatype dimrealtype; 6940 6941 /* TODO: support for blocked */ 6942 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 6943 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6944 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 6945 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype)); 6946 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 6947 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6948 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6949 PetscCallMPI(MPI_Type_free(&dimrealtype)); 6950 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 6951 6952 pcbddc->mat_graph->coords = lcoords; 6953 pcbddc->mat_graph->cloc = PETSC_TRUE; 6954 pcbddc->mat_graph->cnloc = n; 6955 } 6956 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 6957 pcbddc->mat_graph->nvtxs); 6958 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 6959 6960 /* Setup of Graph */ 6961 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6962 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 6963 6964 /* attach info on disconnected subdomains if present */ 6965 if (pcbddc->n_local_subs) { 6966 PetscInt *local_subs, n, totn; 6967 6968 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6969 PetscCall(PetscMalloc1(n, &local_subs)); 6970 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 6971 for (i = 0; i < pcbddc->n_local_subs; i++) { 6972 const PetscInt *idxs; 6973 PetscInt nl, j; 6974 6975 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 6976 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 6977 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 6978 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 6979 } 6980 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 6981 pcbddc->mat_graph->n_local_subs = totn + 1; 6982 pcbddc->mat_graph->local_subs = local_subs; 6983 } 6984 } 6985 6986 if (!pcbddc->graphanalyzed) { 6987 /* Graph's connected components analysis */ 6988 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 6989 pcbddc->graphanalyzed = PETSC_TRUE; 6990 pcbddc->corner_selected = pcbddc->corner_selection; 6991 } 6992 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6993 PetscFunctionReturn(PETSC_SUCCESS); 6994 } 6995 6996 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 6997 { 6998 PetscInt i, j, n; 6999 PetscScalar *alphas; 7000 PetscReal norm, *onorms; 7001 7002 PetscFunctionBegin; 7003 n = *nio; 7004 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7005 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7006 PetscCall(VecNormalize(vecs[0], &norm)); 7007 if (norm < PETSC_SMALL) { 7008 onorms[0] = 0.0; 7009 PetscCall(VecSet(vecs[0], 0.0)); 7010 } else { 7011 onorms[0] = norm; 7012 } 7013 7014 for (i = 1; i < n; i++) { 7015 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7016 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7017 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7018 PetscCall(VecNormalize(vecs[i], &norm)); 7019 if (norm < PETSC_SMALL) { 7020 onorms[i] = 0.0; 7021 PetscCall(VecSet(vecs[i], 0.0)); 7022 } else { 7023 onorms[i] = norm; 7024 } 7025 } 7026 /* push nonzero vectors at the beginning */ 7027 for (i = 0; i < n; i++) { 7028 if (onorms[i] == 0.0) { 7029 for (j = i + 1; j < n; j++) { 7030 if (onorms[j] != 0.0) { 7031 PetscCall(VecCopy(vecs[j], vecs[i])); 7032 onorms[j] = 0.0; 7033 } 7034 } 7035 } 7036 } 7037 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7038 PetscCall(PetscFree2(alphas, onorms)); 7039 PetscFunctionReturn(PETSC_SUCCESS); 7040 } 7041 7042 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7043 { 7044 ISLocalToGlobalMapping mapping; 7045 Mat A; 7046 PetscInt n_neighs, *neighs, *n_shared, **shared; 7047 PetscMPIInt size, rank, color; 7048 PetscInt *xadj, *adjncy; 7049 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7050 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7051 PetscInt void_procs, *procs_candidates = NULL; 7052 PetscInt xadj_count, *count; 7053 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7054 PetscSubcomm psubcomm; 7055 MPI_Comm subcomm; 7056 7057 PetscFunctionBegin; 7058 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7059 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7060 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7061 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7062 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7063 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7064 7065 if (have_void) *have_void = PETSC_FALSE; 7066 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7067 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7068 PetscCall(MatISGetLocalMat(mat, &A)); 7069 PetscCall(MatGetLocalSize(A, &n, NULL)); 7070 im_active = !!n; 7071 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7072 void_procs = size - active_procs; 7073 /* get ranks of non-active processes in mat communicator */ 7074 if (void_procs) { 7075 PetscInt ncand; 7076 7077 if (have_void) *have_void = PETSC_TRUE; 7078 PetscCall(PetscMalloc1(size, &procs_candidates)); 7079 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7080 for (i = 0, ncand = 0; i < size; i++) { 7081 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7082 } 7083 /* force n_subdomains to be not greater that the number of non-active processes */ 7084 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7085 } 7086 7087 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7088 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7089 PetscCall(MatGetSize(mat, &N, NULL)); 7090 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7091 PetscInt issize, isidx, dest; 7092 if (*n_subdomains == 1) dest = 0; 7093 else dest = rank; 7094 if (im_active) { 7095 issize = 1; 7096 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7097 isidx = procs_candidates[dest]; 7098 } else { 7099 isidx = dest; 7100 } 7101 } else { 7102 issize = 0; 7103 isidx = -1; 7104 } 7105 if (*n_subdomains != 1) *n_subdomains = active_procs; 7106 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7107 PetscCall(PetscFree(procs_candidates)); 7108 PetscFunctionReturn(PETSC_SUCCESS); 7109 } 7110 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL)); 7111 PetscCall(PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL)); 7112 threshold = PetscMax(threshold, 2); 7113 7114 /* Get info on mapping */ 7115 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7116 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7117 7118 /* build local CSR graph of subdomains' connectivity */ 7119 PetscCall(PetscMalloc1(2, &xadj)); 7120 xadj[0] = 0; 7121 xadj[1] = PetscMax(n_neighs - 1, 0); 7122 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7123 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7124 PetscCall(PetscCalloc1(n, &count)); 7125 for (i = 1; i < n_neighs; i++) 7126 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7127 7128 xadj_count = 0; 7129 for (i = 1; i < n_neighs; i++) { 7130 for (j = 0; j < n_shared[i]; j++) { 7131 if (count[shared[i][j]] < threshold) { 7132 adjncy[xadj_count] = neighs[i]; 7133 adjncy_wgt[xadj_count] = n_shared[i]; 7134 xadj_count++; 7135 break; 7136 } 7137 } 7138 } 7139 xadj[1] = xadj_count; 7140 PetscCall(PetscFree(count)); 7141 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7142 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7143 7144 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7145 7146 /* Restrict work on active processes only */ 7147 PetscCall(PetscMPIIntCast(im_active, &color)); 7148 if (void_procs) { 7149 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7150 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7151 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7152 subcomm = PetscSubcommChild(psubcomm); 7153 } else { 7154 psubcomm = NULL; 7155 subcomm = PetscObjectComm((PetscObject)mat); 7156 } 7157 7158 v_wgt = NULL; 7159 if (!color) { 7160 PetscCall(PetscFree(xadj)); 7161 PetscCall(PetscFree(adjncy)); 7162 PetscCall(PetscFree(adjncy_wgt)); 7163 } else { 7164 Mat subdomain_adj; 7165 IS new_ranks, new_ranks_contig; 7166 MatPartitioning partitioner; 7167 PetscInt rstart = 0, rend = 0; 7168 PetscInt *is_indices, *oldranks; 7169 PetscMPIInt size; 7170 PetscBool aggregate; 7171 7172 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7173 if (void_procs) { 7174 PetscInt prank = rank; 7175 PetscCall(PetscMalloc1(size, &oldranks)); 7176 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7177 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7178 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7179 } else { 7180 oldranks = NULL; 7181 } 7182 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7183 if (aggregate) { /* TODO: all this part could be made more efficient */ 7184 PetscInt lrows, row, ncols, *cols; 7185 PetscMPIInt nrank; 7186 PetscScalar *vals; 7187 7188 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7189 lrows = 0; 7190 if (nrank < redprocs) { 7191 lrows = size / redprocs; 7192 if (nrank < size % redprocs) lrows++; 7193 } 7194 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7195 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7196 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7197 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7198 row = nrank; 7199 ncols = xadj[1] - xadj[0]; 7200 cols = adjncy; 7201 PetscCall(PetscMalloc1(ncols, &vals)); 7202 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7203 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7204 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7205 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7206 PetscCall(PetscFree(xadj)); 7207 PetscCall(PetscFree(adjncy)); 7208 PetscCall(PetscFree(adjncy_wgt)); 7209 PetscCall(PetscFree(vals)); 7210 if (use_vwgt) { 7211 Vec v; 7212 const PetscScalar *array; 7213 PetscInt nl; 7214 7215 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7216 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7217 PetscCall(VecAssemblyBegin(v)); 7218 PetscCall(VecAssemblyEnd(v)); 7219 PetscCall(VecGetLocalSize(v, &nl)); 7220 PetscCall(VecGetArrayRead(v, &array)); 7221 PetscCall(PetscMalloc1(nl, &v_wgt)); 7222 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7223 PetscCall(VecRestoreArrayRead(v, &array)); 7224 PetscCall(VecDestroy(&v)); 7225 } 7226 } else { 7227 PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7228 if (use_vwgt) { 7229 PetscCall(PetscMalloc1(1, &v_wgt)); 7230 v_wgt[0] = n; 7231 } 7232 } 7233 /* PetscCall(MatView(subdomain_adj,0)); */ 7234 7235 /* Partition */ 7236 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7237 #if defined(PETSC_HAVE_PTSCOTCH) 7238 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7239 #elif defined(PETSC_HAVE_PARMETIS) 7240 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7241 #else 7242 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7243 #endif 7244 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7245 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7246 *n_subdomains = PetscMin((PetscInt)size, *n_subdomains); 7247 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7248 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7249 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7250 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7251 7252 /* renumber new_ranks to avoid "holes" in new set of processors */ 7253 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7254 PetscCall(ISDestroy(&new_ranks)); 7255 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7256 if (!aggregate) { 7257 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7258 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7259 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7260 } else if (oldranks) { 7261 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7262 } else { 7263 ranks_send_to_idx[0] = is_indices[0]; 7264 } 7265 } else { 7266 PetscInt idx = 0; 7267 PetscMPIInt tag; 7268 MPI_Request *reqs; 7269 7270 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7271 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7272 for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7273 PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7274 PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE)); 7275 PetscCall(PetscFree(reqs)); 7276 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7277 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7278 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7279 } else if (oldranks) { 7280 ranks_send_to_idx[0] = oldranks[idx]; 7281 } else { 7282 ranks_send_to_idx[0] = idx; 7283 } 7284 } 7285 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7286 /* clean up */ 7287 PetscCall(PetscFree(oldranks)); 7288 PetscCall(ISDestroy(&new_ranks_contig)); 7289 PetscCall(MatDestroy(&subdomain_adj)); 7290 PetscCall(MatPartitioningDestroy(&partitioner)); 7291 } 7292 PetscCall(PetscSubcommDestroy(&psubcomm)); 7293 PetscCall(PetscFree(procs_candidates)); 7294 7295 /* assemble parallel IS for sends */ 7296 i = 1; 7297 if (!color) i = 0; 7298 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7299 PetscFunctionReturn(PETSC_SUCCESS); 7300 } 7301 7302 typedef enum { 7303 MATDENSE_PRIVATE = 0, 7304 MATAIJ_PRIVATE, 7305 MATBAIJ_PRIVATE, 7306 MATSBAIJ_PRIVATE 7307 } MatTypePrivate; 7308 7309 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7310 { 7311 Mat local_mat; 7312 IS is_sends_internal; 7313 PetscInt rows, cols, new_local_rows; 7314 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7315 PetscBool ismatis, isdense, newisdense, destroy_mat; 7316 ISLocalToGlobalMapping l2gmap; 7317 PetscInt *l2gmap_indices; 7318 const PetscInt *is_indices; 7319 MatType new_local_type; 7320 /* buffers */ 7321 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7322 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7323 PetscInt *recv_buffer_idxs_local; 7324 PetscScalar *ptr_vals, *recv_buffer_vals; 7325 const PetscScalar *send_buffer_vals; 7326 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7327 /* MPI */ 7328 MPI_Comm comm, comm_n; 7329 PetscSubcomm subcomm; 7330 PetscMPIInt n_sends, n_recvs, size; 7331 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7332 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7333 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7334 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7335 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7336 7337 PetscFunctionBegin; 7338 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7339 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7340 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7341 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7342 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7343 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7344 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7345 PetscValidLogicalCollectiveInt(mat, nis, 8); 7346 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7347 if (nvecs) { 7348 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7349 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7350 } 7351 /* further checks */ 7352 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7353 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7354 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7355 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7356 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7357 if (reuse && *mat_n) { 7358 PetscInt mrows, mcols, mnrows, mncols; 7359 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7360 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7361 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7362 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7363 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7364 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7365 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7366 } 7367 PetscCall(MatGetBlockSize(local_mat, &bs)); 7368 PetscValidLogicalCollectiveInt(mat, bs, 1); 7369 7370 /* prepare IS for sending if not provided */ 7371 if (!is_sends) { 7372 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7373 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7374 } else { 7375 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7376 is_sends_internal = is_sends; 7377 } 7378 7379 /* get comm */ 7380 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7381 7382 /* compute number of sends */ 7383 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7384 PetscCall(PetscMPIIntCast(i, &n_sends)); 7385 7386 /* compute number of receives */ 7387 PetscCallMPI(MPI_Comm_size(comm, &size)); 7388 PetscCall(PetscMalloc1(size, &iflags)); 7389 PetscCall(PetscArrayzero(iflags, size)); 7390 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7391 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7392 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7393 PetscCall(PetscFree(iflags)); 7394 7395 /* restrict comm if requested */ 7396 subcomm = NULL; 7397 destroy_mat = PETSC_FALSE; 7398 if (restrict_comm) { 7399 PetscMPIInt color, subcommsize; 7400 7401 color = 0; 7402 if (restrict_full) { 7403 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7404 } else { 7405 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7406 } 7407 PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7408 subcommsize = size - subcommsize; 7409 /* check if reuse has been requested */ 7410 if (reuse) { 7411 if (*mat_n) { 7412 PetscMPIInt subcommsize2; 7413 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7414 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7415 comm_n = PetscObjectComm((PetscObject)*mat_n); 7416 } else { 7417 comm_n = PETSC_COMM_SELF; 7418 } 7419 } else { /* MAT_INITIAL_MATRIX */ 7420 PetscMPIInt rank; 7421 7422 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7423 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7424 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7425 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7426 comm_n = PetscSubcommChild(subcomm); 7427 } 7428 /* flag to destroy *mat_n if not significative */ 7429 if (color) destroy_mat = PETSC_TRUE; 7430 } else { 7431 comm_n = comm; 7432 } 7433 7434 /* prepare send/receive buffers */ 7435 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 7436 PetscCall(PetscArrayzero(ilengths_idxs, size)); 7437 PetscCall(PetscMalloc1(size, &ilengths_vals)); 7438 PetscCall(PetscArrayzero(ilengths_vals, size)); 7439 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 7440 7441 /* Get data from local matrices */ 7442 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 7443 /* TODO: See below some guidelines on how to prepare the local buffers */ 7444 /* 7445 send_buffer_vals should contain the raw values of the local matrix 7446 send_buffer_idxs should contain: 7447 - MatType_PRIVATE type 7448 - PetscInt size_of_l2gmap 7449 - PetscInt global_row_indices[size_of_l2gmap] 7450 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7451 */ 7452 { 7453 ISLocalToGlobalMapping mapping; 7454 7455 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7456 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 7457 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 7458 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 7459 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7460 send_buffer_idxs[1] = i; 7461 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 7462 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 7463 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 7464 PetscCall(PetscMPIIntCast(i, &len)); 7465 for (i = 0; i < n_sends; i++) { 7466 ilengths_vals[is_indices[i]] = len * len; 7467 ilengths_idxs[is_indices[i]] = len + 2; 7468 } 7469 } 7470 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 7471 /* additional is (if any) */ 7472 if (nis) { 7473 PetscMPIInt psum; 7474 PetscInt j; 7475 for (j = 0, psum = 0; j < nis; j++) { 7476 PetscInt plen; 7477 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7478 PetscCall(PetscMPIIntCast(plen, &len)); 7479 psum += len + 1; /* indices + length */ 7480 } 7481 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 7482 for (j = 0, psum = 0; j < nis; j++) { 7483 PetscInt plen; 7484 const PetscInt *is_array_idxs; 7485 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7486 send_buffer_idxs_is[psum] = plen; 7487 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 7488 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 7489 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 7490 psum += plen + 1; /* indices + length */ 7491 } 7492 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 7493 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 7494 } 7495 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7496 7497 buf_size_idxs = 0; 7498 buf_size_vals = 0; 7499 buf_size_idxs_is = 0; 7500 buf_size_vecs = 0; 7501 for (i = 0; i < n_recvs; i++) { 7502 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7503 buf_size_vals += (PetscInt)olengths_vals[i]; 7504 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7505 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7506 } 7507 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 7508 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 7509 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 7510 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 7511 7512 /* get new tags for clean communications */ 7513 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 7514 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 7515 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 7516 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 7517 7518 /* allocate for requests */ 7519 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 7520 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 7521 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 7522 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 7523 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 7524 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 7525 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 7526 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 7527 7528 /* communications */ 7529 ptr_idxs = recv_buffer_idxs; 7530 ptr_vals = recv_buffer_vals; 7531 ptr_idxs_is = recv_buffer_idxs_is; 7532 ptr_vecs = recv_buffer_vecs; 7533 for (i = 0; i < n_recvs; i++) { 7534 source_dest = onodes[i]; 7535 PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i])); 7536 PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i])); 7537 ptr_idxs += olengths_idxs[i]; 7538 ptr_vals += olengths_vals[i]; 7539 if (nis) { 7540 source_dest = onodes_is[i]; 7541 PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i])); 7542 ptr_idxs_is += olengths_idxs_is[i]; 7543 } 7544 if (nvecs) { 7545 source_dest = onodes[i]; 7546 PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i])); 7547 ptr_vecs += olengths_idxs[i] - 2; 7548 } 7549 } 7550 for (i = 0; i < n_sends; i++) { 7551 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 7552 PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 7553 PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 7554 if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 7555 if (nvecs) { 7556 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7557 PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 7558 } 7559 } 7560 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 7561 PetscCall(ISDestroy(&is_sends_internal)); 7562 7563 /* assemble new l2g map */ 7564 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 7565 ptr_idxs = recv_buffer_idxs; 7566 new_local_rows = 0; 7567 for (i = 0; i < n_recvs; i++) { 7568 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7569 ptr_idxs += olengths_idxs[i]; 7570 } 7571 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 7572 ptr_idxs = recv_buffer_idxs; 7573 new_local_rows = 0; 7574 for (i = 0; i < n_recvs; i++) { 7575 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 7576 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7577 ptr_idxs += olengths_idxs[i]; 7578 } 7579 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 7580 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 7581 PetscCall(PetscFree(l2gmap_indices)); 7582 7583 /* infer new local matrix type from received local matrices type */ 7584 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7585 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7586 if (n_recvs) { 7587 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7588 ptr_idxs = recv_buffer_idxs; 7589 for (i = 0; i < n_recvs; i++) { 7590 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7591 new_local_type_private = MATAIJ_PRIVATE; 7592 break; 7593 } 7594 ptr_idxs += olengths_idxs[i]; 7595 } 7596 switch (new_local_type_private) { 7597 case MATDENSE_PRIVATE: 7598 new_local_type = MATSEQAIJ; 7599 bs = 1; 7600 break; 7601 case MATAIJ_PRIVATE: 7602 new_local_type = MATSEQAIJ; 7603 bs = 1; 7604 break; 7605 case MATBAIJ_PRIVATE: 7606 new_local_type = MATSEQBAIJ; 7607 break; 7608 case MATSBAIJ_PRIVATE: 7609 new_local_type = MATSEQSBAIJ; 7610 break; 7611 default: 7612 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 7613 } 7614 } else { /* by default, new_local_type is seqaij */ 7615 new_local_type = MATSEQAIJ; 7616 bs = 1; 7617 } 7618 7619 /* create MATIS object if needed */ 7620 if (!reuse) { 7621 PetscCall(MatGetSize(mat, &rows, &cols)); 7622 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7623 } else { 7624 /* it also destroys the local matrices */ 7625 if (*mat_n) { 7626 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 7627 } else { /* this is a fake object */ 7628 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7629 } 7630 } 7631 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 7632 PetscCall(MatSetType(local_mat, new_local_type)); 7633 7634 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 7635 7636 /* Global to local map of received indices */ 7637 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 7638 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 7639 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7640 7641 /* restore attributes -> type of incoming data and its size */ 7642 buf_size_idxs = 0; 7643 for (i = 0; i < n_recvs; i++) { 7644 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7645 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 7646 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7647 } 7648 PetscCall(PetscFree(recv_buffer_idxs)); 7649 7650 /* set preallocation */ 7651 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 7652 if (!newisdense) { 7653 PetscInt *new_local_nnz = NULL; 7654 7655 ptr_idxs = recv_buffer_idxs_local; 7656 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 7657 for (i = 0; i < n_recvs; i++) { 7658 PetscInt j; 7659 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7660 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 7661 } else { 7662 /* TODO */ 7663 } 7664 ptr_idxs += olengths_idxs[i]; 7665 } 7666 if (new_local_nnz) { 7667 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 7668 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 7669 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 7670 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7671 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 7672 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7673 } else { 7674 PetscCall(MatSetUp(local_mat)); 7675 } 7676 PetscCall(PetscFree(new_local_nnz)); 7677 } else { 7678 PetscCall(MatSetUp(local_mat)); 7679 } 7680 7681 /* set values */ 7682 ptr_vals = recv_buffer_vals; 7683 ptr_idxs = recv_buffer_idxs_local; 7684 for (i = 0; i < n_recvs; i++) { 7685 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7686 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 7687 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 7688 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 7689 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 7690 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 7691 } else { 7692 /* TODO */ 7693 } 7694 ptr_idxs += olengths_idxs[i]; 7695 ptr_vals += olengths_vals[i]; 7696 } 7697 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 7698 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 7699 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 7700 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 7701 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 7702 PetscCall(PetscFree(recv_buffer_vals)); 7703 7704 #if 0 7705 if (!restrict_comm) { /* check */ 7706 Vec lvec,rvec; 7707 PetscReal infty_error; 7708 7709 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7710 PetscCall(VecSetRandom(rvec,NULL)); 7711 PetscCall(MatMult(mat,rvec,lvec)); 7712 PetscCall(VecScale(lvec,-1.0)); 7713 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7714 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7715 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7716 PetscCall(VecDestroy(&rvec)); 7717 PetscCall(VecDestroy(&lvec)); 7718 } 7719 #endif 7720 7721 /* assemble new additional is (if any) */ 7722 if (nis) { 7723 PetscInt **temp_idxs, *count_is, j, psum; 7724 7725 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 7726 PetscCall(PetscCalloc1(nis, &count_is)); 7727 ptr_idxs = recv_buffer_idxs_is; 7728 psum = 0; 7729 for (i = 0; i < n_recvs; i++) { 7730 for (j = 0; j < nis; j++) { 7731 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7732 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7733 psum += plen; 7734 ptr_idxs += plen + 1; /* shift pointer to received data */ 7735 } 7736 } 7737 PetscCall(PetscMalloc1(nis, &temp_idxs)); 7738 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 7739 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 7740 PetscCall(PetscArrayzero(count_is, nis)); 7741 ptr_idxs = recv_buffer_idxs_is; 7742 for (i = 0; i < n_recvs; i++) { 7743 for (j = 0; j < nis; j++) { 7744 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7745 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 7746 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7747 ptr_idxs += plen + 1; /* shift pointer to received data */ 7748 } 7749 } 7750 for (i = 0; i < nis; i++) { 7751 PetscCall(ISDestroy(&isarray[i])); 7752 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 7753 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 7754 } 7755 PetscCall(PetscFree(count_is)); 7756 PetscCall(PetscFree(temp_idxs[0])); 7757 PetscCall(PetscFree(temp_idxs)); 7758 } 7759 /* free workspace */ 7760 PetscCall(PetscFree(recv_buffer_idxs_is)); 7761 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 7762 PetscCall(PetscFree(send_buffer_idxs)); 7763 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 7764 if (isdense) { 7765 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7766 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 7767 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7768 } else { 7769 /* PetscCall(PetscFree(send_buffer_vals)); */ 7770 } 7771 if (nis) { 7772 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 7773 PetscCall(PetscFree(send_buffer_idxs_is)); 7774 } 7775 7776 if (nvecs) { 7777 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 7778 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 7779 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7780 PetscCall(VecDestroy(&nnsp_vec[0])); 7781 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 7782 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 7783 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 7784 /* set values */ 7785 ptr_vals = recv_buffer_vecs; 7786 ptr_idxs = recv_buffer_idxs_local; 7787 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7788 for (i = 0; i < n_recvs; i++) { 7789 PetscInt j; 7790 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 7791 ptr_idxs += olengths_idxs[i]; 7792 ptr_vals += olengths_idxs[i] - 2; 7793 } 7794 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7795 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 7796 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 7797 } 7798 7799 PetscCall(PetscFree(recv_buffer_vecs)); 7800 PetscCall(PetscFree(recv_buffer_idxs_local)); 7801 PetscCall(PetscFree(recv_req_idxs)); 7802 PetscCall(PetscFree(recv_req_vals)); 7803 PetscCall(PetscFree(recv_req_vecs)); 7804 PetscCall(PetscFree(recv_req_idxs_is)); 7805 PetscCall(PetscFree(send_req_idxs)); 7806 PetscCall(PetscFree(send_req_vals)); 7807 PetscCall(PetscFree(send_req_vecs)); 7808 PetscCall(PetscFree(send_req_idxs_is)); 7809 PetscCall(PetscFree(ilengths_vals)); 7810 PetscCall(PetscFree(ilengths_idxs)); 7811 PetscCall(PetscFree(olengths_vals)); 7812 PetscCall(PetscFree(olengths_idxs)); 7813 PetscCall(PetscFree(onodes)); 7814 if (nis) { 7815 PetscCall(PetscFree(ilengths_idxs_is)); 7816 PetscCall(PetscFree(olengths_idxs_is)); 7817 PetscCall(PetscFree(onodes_is)); 7818 } 7819 PetscCall(PetscSubcommDestroy(&subcomm)); 7820 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 7821 PetscCall(MatDestroy(mat_n)); 7822 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 7823 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7824 PetscCall(VecDestroy(&nnsp_vec[0])); 7825 } 7826 *mat_n = NULL; 7827 } 7828 PetscFunctionReturn(PETSC_SUCCESS); 7829 } 7830 7831 /* temporary hack into ksp private data structure */ 7832 #include <petsc/private/kspimpl.h> 7833 7834 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals) 7835 { 7836 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7837 PC_IS *pcis = (PC_IS *)pc->data; 7838 Mat coarse_mat, coarse_mat_is, coarse_submat_dense; 7839 Mat coarsedivudotp = NULL; 7840 Mat coarseG, t_coarse_mat_is; 7841 MatNullSpace CoarseNullSpace = NULL; 7842 ISLocalToGlobalMapping coarse_islg; 7843 IS coarse_is, *isarray, corners; 7844 PetscInt i, im_active = -1, active_procs = -1; 7845 PetscInt nis, nisdofs, nisneu, nisvert; 7846 PetscInt coarse_eqs_per_proc; 7847 PC pc_temp; 7848 PCType coarse_pc_type; 7849 KSPType coarse_ksp_type; 7850 PetscBool multilevel_requested, multilevel_allowed; 7851 PetscBool coarse_reuse; 7852 PetscInt ncoarse, nedcfield; 7853 PetscBool compute_vecs = PETSC_FALSE; 7854 PetscScalar *array; 7855 MatReuse coarse_mat_reuse; 7856 PetscBool restr, full_restr, have_void; 7857 PetscMPIInt size; 7858 7859 PetscFunctionBegin; 7860 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 7861 /* Assign global numbering to coarse dofs */ 7862 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7863 PetscInt ocoarse_size; 7864 compute_vecs = PETSC_TRUE; 7865 7866 pcbddc->new_primal_space = PETSC_TRUE; 7867 ocoarse_size = pcbddc->coarse_size; 7868 PetscCall(PetscFree(pcbddc->global_primal_indices)); 7869 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 7870 /* see if we can avoid some work */ 7871 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7872 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7873 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7874 PetscCall(KSPReset(pcbddc->coarse_ksp)); 7875 coarse_reuse = PETSC_FALSE; 7876 } else { /* we can safely reuse already computed coarse matrix */ 7877 coarse_reuse = PETSC_TRUE; 7878 } 7879 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7880 coarse_reuse = PETSC_FALSE; 7881 } 7882 /* reset any subassembling information */ 7883 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 7884 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7885 coarse_reuse = PETSC_TRUE; 7886 } 7887 if (coarse_reuse && pcbddc->coarse_ksp) { 7888 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 7889 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 7890 coarse_mat_reuse = MAT_REUSE_MATRIX; 7891 } else { 7892 coarse_mat = NULL; 7893 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7894 } 7895 7896 /* creates temporary l2gmap and IS for coarse indexes */ 7897 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 7898 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 7899 7900 /* creates temporary MATIS object for coarse matrix */ 7901 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense)); 7902 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is)); 7903 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense)); 7904 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7905 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7906 PetscCall(MatDestroy(&coarse_submat_dense)); 7907 7908 /* count "active" (i.e. with positive local size) and "void" processes */ 7909 im_active = !!(pcis->n); 7910 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7911 7912 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7913 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 7914 /* full_restr : just use the receivers from the subassembling pattern */ 7915 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 7916 coarse_mat_is = NULL; 7917 multilevel_allowed = PETSC_FALSE; 7918 multilevel_requested = PETSC_FALSE; 7919 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 7920 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 7921 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7922 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 7923 if (multilevel_requested) { 7924 ncoarse = active_procs / pcbddc->coarsening_ratio; 7925 restr = PETSC_FALSE; 7926 full_restr = PETSC_FALSE; 7927 } else { 7928 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 7929 restr = PETSC_TRUE; 7930 full_restr = PETSC_TRUE; 7931 } 7932 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7933 ncoarse = PetscMax(1, ncoarse); 7934 if (!pcbddc->coarse_subassembling) { 7935 if (pcbddc->coarsening_ratio > 1) { 7936 if (multilevel_requested) { 7937 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7938 } else { 7939 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7940 } 7941 } else { 7942 PetscMPIInt rank; 7943 7944 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 7945 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7946 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 7947 } 7948 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7949 PetscInt psum; 7950 if (pcbddc->coarse_ksp) psum = 1; 7951 else psum = 0; 7952 PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7953 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 7954 } 7955 /* determine if we can go multilevel */ 7956 if (multilevel_requested) { 7957 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7958 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7959 } 7960 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7961 7962 /* dump subassembling pattern */ 7963 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 7964 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7965 nedcfield = -1; 7966 corners = NULL; 7967 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 7968 PetscInt *tidxs, *tidxs2, nout, tsize, i; 7969 const PetscInt *idxs; 7970 ISLocalToGlobalMapping tmap; 7971 7972 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7973 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 7974 /* allocate space for temporary storage */ 7975 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 7976 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 7977 /* allocate for IS array */ 7978 nisdofs = pcbddc->n_ISForDofsLocal; 7979 if (pcbddc->nedclocal) { 7980 if (pcbddc->nedfield > -1) { 7981 nedcfield = pcbddc->nedfield; 7982 } else { 7983 nedcfield = 0; 7984 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 7985 nisdofs = 1; 7986 } 7987 } 7988 nisneu = !!pcbddc->NeumannBoundariesLocal; 7989 nisvert = 0; /* nisvert is not used */ 7990 nis = nisdofs + nisneu + nisvert; 7991 PetscCall(PetscMalloc1(nis, &isarray)); 7992 /* dofs splitting */ 7993 for (i = 0; i < nisdofs; i++) { 7994 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 7995 if (nedcfield != i) { 7996 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 7997 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7998 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7999 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8000 } else { 8001 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8002 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8003 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8004 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8005 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8006 } 8007 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8008 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8009 /* PetscCall(ISView(isarray[i],0)); */ 8010 } 8011 /* neumann boundaries */ 8012 if (pcbddc->NeumannBoundariesLocal) { 8013 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8014 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8015 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8016 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8017 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8018 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8019 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8020 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8021 } 8022 /* coordinates */ 8023 if (pcbddc->corner_selected) { 8024 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8025 PetscCall(ISGetLocalSize(corners, &tsize)); 8026 PetscCall(ISGetIndices(corners, &idxs)); 8027 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8028 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8029 PetscCall(ISRestoreIndices(corners, &idxs)); 8030 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8031 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8032 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8033 } 8034 PetscCall(PetscFree(tidxs)); 8035 PetscCall(PetscFree(tidxs2)); 8036 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8037 } else { 8038 nis = 0; 8039 nisdofs = 0; 8040 nisneu = 0; 8041 nisvert = 0; 8042 isarray = NULL; 8043 } 8044 /* destroy no longer needed map */ 8045 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8046 8047 /* subassemble */ 8048 if (multilevel_allowed) { 8049 Vec vp[1]; 8050 PetscInt nvecs = 0; 8051 PetscBool reuse, reuser; 8052 8053 if (coarse_mat) reuse = PETSC_TRUE; 8054 else reuse = PETSC_FALSE; 8055 PetscCall(MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8056 vp[0] = NULL; 8057 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8058 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8059 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8060 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8061 nvecs = 1; 8062 8063 if (pcbddc->divudotp) { 8064 Mat B, loc_divudotp; 8065 Vec v, p; 8066 IS dummy; 8067 PetscInt np; 8068 8069 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8070 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8071 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8072 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8073 PetscCall(MatCreateVecs(B, &v, &p)); 8074 PetscCall(VecSet(p, 1.)); 8075 PetscCall(MatMultTranspose(B, p, v)); 8076 PetscCall(VecDestroy(&p)); 8077 PetscCall(MatDestroy(&B)); 8078 PetscCall(VecGetArray(vp[0], &array)); 8079 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8080 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8081 PetscCall(VecResetArray(pcbddc->vec1_P)); 8082 PetscCall(VecRestoreArray(vp[0], &array)); 8083 PetscCall(ISDestroy(&dummy)); 8084 PetscCall(VecDestroy(&v)); 8085 } 8086 } 8087 if (reuser) { 8088 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8089 } else { 8090 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8091 } 8092 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8093 PetscScalar *arraym; 8094 const PetscScalar *arrayv; 8095 PetscInt nl; 8096 PetscCall(VecGetLocalSize(vp[0], &nl)); 8097 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8098 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8099 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8100 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8101 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8102 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8103 PetscCall(VecDestroy(&vp[0])); 8104 } else { 8105 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8106 } 8107 } else { 8108 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8109 } 8110 if (coarse_mat_is || coarse_mat) { 8111 if (!multilevel_allowed) { 8112 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8113 } else { 8114 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8115 if (coarse_mat_is) { 8116 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8117 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8118 coarse_mat = coarse_mat_is; 8119 } 8120 } 8121 } 8122 PetscCall(MatDestroy(&t_coarse_mat_is)); 8123 PetscCall(MatDestroy(&coarse_mat_is)); 8124 8125 /* create local to global scatters for coarse problem */ 8126 if (compute_vecs) { 8127 PetscInt lrows; 8128 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8129 if (coarse_mat) { 8130 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8131 } else { 8132 lrows = 0; 8133 } 8134 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8135 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8136 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8137 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8138 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8139 } 8140 PetscCall(ISDestroy(&coarse_is)); 8141 8142 /* set defaults for coarse KSP and PC */ 8143 if (multilevel_allowed) { 8144 coarse_ksp_type = KSPRICHARDSON; 8145 coarse_pc_type = PCBDDC; 8146 } else { 8147 coarse_ksp_type = KSPPREONLY; 8148 coarse_pc_type = PCREDUNDANT; 8149 } 8150 8151 /* print some info if requested */ 8152 if (pcbddc->dbg_flag) { 8153 if (!multilevel_allowed) { 8154 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8155 if (multilevel_requested) { 8156 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio)); 8157 } else if (pcbddc->max_levels) { 8158 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8159 } 8160 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8161 } 8162 } 8163 8164 /* communicate coarse discrete gradient */ 8165 coarseG = NULL; 8166 if (pcbddc->nedcG && multilevel_allowed) { 8167 MPI_Comm ccomm; 8168 if (coarse_mat) { 8169 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8170 } else { 8171 ccomm = MPI_COMM_NULL; 8172 } 8173 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8174 } 8175 8176 /* create the coarse KSP object only once with defaults */ 8177 if (coarse_mat) { 8178 PetscBool isredundant, isbddc, force, valid; 8179 PetscViewer dbg_viewer = NULL; 8180 PetscBool isset, issym, isher, isspd; 8181 8182 if (pcbddc->dbg_flag) { 8183 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8184 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8185 } 8186 if (!pcbddc->coarse_ksp) { 8187 char prefix[256], str_level[16]; 8188 size_t len; 8189 8190 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8191 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8192 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8193 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8194 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1)); 8195 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8196 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8197 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8198 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8199 /* TODO is this logic correct? should check for coarse_mat type */ 8200 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8201 /* prefix */ 8202 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8203 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8204 if (!pcbddc->current_level) { 8205 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8206 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8207 } else { 8208 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8209 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8210 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8211 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8212 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8213 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level))); 8214 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8215 } 8216 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8217 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8218 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8219 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8220 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8221 /* allow user customization */ 8222 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8223 /* get some info after set from options */ 8224 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8225 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8226 force = PETSC_FALSE; 8227 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8228 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8229 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8230 if (multilevel_allowed && !force && !valid) { 8231 isbddc = PETSC_TRUE; 8232 PetscCall(PCSetType(pc_temp, PCBDDC)); 8233 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8234 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8235 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8236 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8237 PetscObjectOptionsBegin((PetscObject)pc_temp); 8238 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8239 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8240 PetscOptionsEnd(); 8241 pc_temp->setfromoptionscalled++; 8242 } 8243 } 8244 } 8245 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8246 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8247 if (nisdofs) { 8248 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8249 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8250 } 8251 if (nisneu) { 8252 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8253 PetscCall(ISDestroy(&isarray[nisdofs])); 8254 } 8255 if (nisvert) { 8256 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8257 PetscCall(ISDestroy(&isarray[nis - 1])); 8258 } 8259 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8260 8261 /* get some info after set from options */ 8262 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8263 8264 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8265 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8266 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8267 force = PETSC_FALSE; 8268 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8269 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8270 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8271 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8272 if (isredundant) { 8273 KSP inner_ksp; 8274 PC inner_pc; 8275 8276 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8277 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8278 } 8279 8280 /* parameters which miss an API */ 8281 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8282 if (isbddc) { 8283 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8284 8285 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8286 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8287 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8288 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8289 if (pcbddc_coarse->benign_saddle_point) { 8290 Mat coarsedivudotp_is; 8291 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8292 IS row, col; 8293 const PetscInt *gidxs; 8294 PetscInt n, st, M, N; 8295 8296 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8297 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8298 st = st - n; 8299 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8300 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8301 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8302 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8303 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8304 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8305 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8306 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8307 PetscCall(ISGetSize(row, &M)); 8308 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8309 PetscCall(ISDestroy(&row)); 8310 PetscCall(ISDestroy(&col)); 8311 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8312 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8313 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8314 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8315 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8316 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8317 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8318 PetscCall(MatDestroy(&coarsedivudotp)); 8319 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8320 PetscCall(MatDestroy(&coarsedivudotp_is)); 8321 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8322 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8323 } 8324 } 8325 8326 /* propagate symmetry info of coarse matrix */ 8327 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8328 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8329 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8330 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8331 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8332 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8333 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8334 8335 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8336 /* set operators */ 8337 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8338 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8339 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8340 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8341 } 8342 PetscCall(MatDestroy(&coarseG)); 8343 PetscCall(PetscFree(isarray)); 8344 #if 0 8345 { 8346 PetscViewer viewer; 8347 char filename[256]; 8348 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 8349 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8350 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8351 PetscCall(MatView(coarse_mat,viewer)); 8352 PetscCall(PetscViewerPopFormat(viewer)); 8353 PetscCall(PetscViewerDestroy(&viewer)); 8354 } 8355 #endif 8356 8357 if (corners) { 8358 Vec gv; 8359 IS is; 8360 const PetscInt *idxs; 8361 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8362 PetscScalar *coords; 8363 8364 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8365 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8366 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8367 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8368 PetscCall(VecSetBlockSize(gv, cdim)); 8369 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8370 PetscCall(VecSetType(gv, VECSTANDARD)); 8371 PetscCall(VecSetFromOptions(gv)); 8372 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8373 8374 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8375 PetscCall(ISGetLocalSize(is, &n)); 8376 PetscCall(ISGetIndices(is, &idxs)); 8377 PetscCall(PetscMalloc1(n * cdim, &coords)); 8378 for (i = 0; i < n; i++) { 8379 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 8380 } 8381 PetscCall(ISRestoreIndices(is, &idxs)); 8382 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8383 8384 PetscCall(ISGetLocalSize(corners, &n)); 8385 PetscCall(ISGetIndices(corners, &idxs)); 8386 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8387 PetscCall(ISRestoreIndices(corners, &idxs)); 8388 PetscCall(PetscFree(coords)); 8389 PetscCall(VecAssemblyBegin(gv)); 8390 PetscCall(VecAssemblyEnd(gv)); 8391 PetscCall(VecGetArray(gv, &coords)); 8392 if (pcbddc->coarse_ksp) { 8393 PC coarse_pc; 8394 PetscBool isbddc; 8395 8396 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8397 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8398 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8399 PetscReal *realcoords; 8400 8401 PetscCall(VecGetLocalSize(gv, &n)); 8402 #if defined(PETSC_USE_COMPLEX) 8403 PetscCall(PetscMalloc1(n, &realcoords)); 8404 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8405 #else 8406 realcoords = coords; 8407 #endif 8408 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8409 #if defined(PETSC_USE_COMPLEX) 8410 PetscCall(PetscFree(realcoords)); 8411 #endif 8412 } 8413 } 8414 PetscCall(VecRestoreArray(gv, &coords)); 8415 PetscCall(VecDestroy(&gv)); 8416 } 8417 PetscCall(ISDestroy(&corners)); 8418 8419 if (pcbddc->coarse_ksp) { 8420 Vec crhs, csol; 8421 8422 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 8423 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 8424 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &((pcbddc->coarse_ksp)->vec_sol), NULL)); 8425 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &((pcbddc->coarse_ksp)->vec_rhs))); 8426 } 8427 PetscCall(MatDestroy(&coarsedivudotp)); 8428 8429 /* compute null space for coarse solver if the benign trick has been requested */ 8430 if (pcbddc->benign_null) { 8431 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 8432 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 8433 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8434 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8435 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8436 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8437 if (coarse_mat) { 8438 Vec nullv; 8439 PetscScalar *array, *array2; 8440 PetscInt nl; 8441 8442 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 8443 PetscCall(VecGetLocalSize(nullv, &nl)); 8444 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8445 PetscCall(VecGetArray(nullv, &array2)); 8446 PetscCall(PetscArraycpy(array2, array, nl)); 8447 PetscCall(VecRestoreArray(nullv, &array2)); 8448 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8449 PetscCall(VecNormalize(nullv, NULL)); 8450 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 8451 PetscCall(VecDestroy(&nullv)); 8452 } 8453 } 8454 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8455 8456 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8457 if (pcbddc->coarse_ksp) { 8458 PetscBool ispreonly; 8459 8460 if (CoarseNullSpace) { 8461 PetscBool isnull; 8462 8463 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 8464 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 8465 /* TODO: add local nullspaces (if any) */ 8466 } 8467 /* setup coarse ksp */ 8468 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8469 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8470 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 8471 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8472 KSP check_ksp; 8473 KSPType check_ksp_type; 8474 PC check_pc; 8475 Vec check_vec, coarse_vec; 8476 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 8477 PetscInt its; 8478 PetscBool compute_eigs; 8479 PetscReal *eigs_r, *eigs_c; 8480 PetscInt neigs; 8481 const char *prefix; 8482 8483 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8484 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 8485 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 8486 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 8487 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 8488 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 8489 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size)); 8490 /* prevent from setup unneeded object */ 8491 PetscCall(KSPGetPC(check_ksp, &check_pc)); 8492 PetscCall(PCSetType(check_pc, PCNONE)); 8493 if (ispreonly) { 8494 check_ksp_type = KSPPREONLY; 8495 compute_eigs = PETSC_FALSE; 8496 } else { 8497 check_ksp_type = KSPGMRES; 8498 compute_eigs = PETSC_TRUE; 8499 } 8500 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 8501 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 8502 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 8503 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 8504 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 8505 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 8506 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 8507 PetscCall(KSPSetFromOptions(check_ksp)); 8508 PetscCall(KSPSetUp(check_ksp)); 8509 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 8510 PetscCall(KSPSetPC(check_ksp, check_pc)); 8511 /* create random vec */ 8512 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 8513 PetscCall(VecSetRandom(check_vec, NULL)); 8514 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8515 /* solve coarse problem */ 8516 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 8517 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 8518 /* set eigenvalue estimation if preonly has not been requested */ 8519 if (compute_eigs) { 8520 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 8521 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 8522 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 8523 if (neigs) { 8524 lambda_max = eigs_r[neigs - 1]; 8525 lambda_min = eigs_r[0]; 8526 if (pcbddc->use_coarse_estimates) { 8527 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8528 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 8529 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 8530 } 8531 } 8532 } 8533 } 8534 8535 /* check coarse problem residual error */ 8536 if (pcbddc->dbg_flag) { 8537 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8538 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8539 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 8540 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 8541 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8542 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 8543 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 8544 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp), dbg_viewer)); 8545 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer)); 8546 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 8547 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 8548 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 8549 if (compute_eigs) { 8550 PetscReal lambda_max_s, lambda_min_s; 8551 KSPConvergedReason reason; 8552 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 8553 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 8554 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 8555 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 8556 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 8557 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 8558 } 8559 PetscCall(PetscViewerFlush(dbg_viewer)); 8560 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8561 } 8562 PetscCall(VecDestroy(&check_vec)); 8563 PetscCall(VecDestroy(&coarse_vec)); 8564 PetscCall(KSPDestroy(&check_ksp)); 8565 if (compute_eigs) { 8566 PetscCall(PetscFree(eigs_r)); 8567 PetscCall(PetscFree(eigs_c)); 8568 } 8569 } 8570 } 8571 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8572 /* print additional info */ 8573 if (pcbddc->dbg_flag) { 8574 /* waits until all processes reaches this point */ 8575 PetscCall(PetscBarrier((PetscObject)pc)); 8576 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 8577 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8578 } 8579 8580 /* free memory */ 8581 PetscCall(MatDestroy(&coarse_mat)); 8582 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8583 PetscFunctionReturn(PETSC_SUCCESS); 8584 } 8585 8586 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 8587 { 8588 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8589 PC_IS *pcis = (PC_IS *)pc->data; 8590 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 8591 IS subset, subset_mult, subset_n; 8592 PetscInt local_size, coarse_size = 0; 8593 PetscInt *local_primal_indices = NULL; 8594 const PetscInt *t_local_primal_indices; 8595 8596 PetscFunctionBegin; 8597 /* Compute global number of coarse dofs */ 8598 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 8599 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 8600 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 8601 PetscCall(ISDestroy(&subset_n)); 8602 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 8603 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 8604 PetscCall(ISDestroy(&subset)); 8605 PetscCall(ISDestroy(&subset_mult)); 8606 PetscCall(ISGetLocalSize(subset_n, &local_size)); 8607 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 8608 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 8609 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 8610 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 8611 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 8612 PetscCall(ISDestroy(&subset_n)); 8613 8614 /* check numbering */ 8615 if (pcbddc->dbg_flag) { 8616 PetscScalar coarsesum, *array, *array2; 8617 PetscInt i; 8618 PetscBool set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE; 8619 8620 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8621 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8622 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n")); 8623 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8624 /* counter */ 8625 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8626 PetscCall(VecSet(pcis->vec1_N, 1.0)); 8627 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8628 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8629 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8630 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8631 PetscCall(VecSet(pcis->vec1_N, 0.0)); 8632 for (i = 0; i < pcbddc->local_primal_size; i++) PetscCall(VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES)); 8633 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8634 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8635 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8636 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8637 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8638 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8639 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8640 PetscCall(VecGetArray(pcis->vec1_N, &array)); 8641 PetscCall(VecGetArray(pcis->vec2_N, &array2)); 8642 for (i = 0; i < pcis->n; i++) { 8643 if (array[i] != 0.0 && array[i] != array2[i]) { 8644 PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi; 8645 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8646 set_error = PETSC_TRUE; 8647 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi)); 8648 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh)); 8649 } 8650 } 8651 PetscCall(VecRestoreArray(pcis->vec2_N, &array2)); 8652 PetscCall(MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8653 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8654 for (i = 0; i < pcis->n; i++) { 8655 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]); 8656 } 8657 PetscCall(VecRestoreArray(pcis->vec1_N, &array)); 8658 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8659 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8660 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8661 PetscCall(VecSum(pcis->vec1_global, &coarsesum)); 8662 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum))); 8663 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8664 PetscInt *gidxs; 8665 8666 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &gidxs)); 8667 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs)); 8668 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n")); 8669 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8670 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank)); 8671 for (i = 0; i < pcbddc->local_primal_size; i++) { 8672 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i])); 8673 } 8674 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8675 PetscCall(PetscFree(gidxs)); 8676 } 8677 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8678 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8679 PetscCheck(!set_error_reduced, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "BDDC Numbering of coarse dofs failed"); 8680 } 8681 8682 /* get back data */ 8683 *coarse_size_n = coarse_size; 8684 *local_primal_indices_n = local_primal_indices; 8685 PetscFunctionReturn(PETSC_SUCCESS); 8686 } 8687 8688 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 8689 { 8690 IS localis_t; 8691 PetscInt i, lsize, *idxs, n; 8692 PetscScalar *vals; 8693 8694 PetscFunctionBegin; 8695 /* get indices in local ordering exploiting local to global map */ 8696 PetscCall(ISGetLocalSize(globalis, &lsize)); 8697 PetscCall(PetscMalloc1(lsize, &vals)); 8698 for (i = 0; i < lsize; i++) vals[i] = 1.0; 8699 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 8700 PetscCall(VecSet(gwork, 0.0)); 8701 PetscCall(VecSet(lwork, 0.0)); 8702 if (idxs) { /* multilevel guard */ 8703 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 8704 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 8705 } 8706 PetscCall(VecAssemblyBegin(gwork)); 8707 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 8708 PetscCall(PetscFree(vals)); 8709 PetscCall(VecAssemblyEnd(gwork)); 8710 /* now compute set in local ordering */ 8711 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8712 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8713 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 8714 PetscCall(VecGetSize(lwork, &n)); 8715 for (i = 0, lsize = 0; i < n; i++) { 8716 if (PetscRealPart(vals[i]) > 0.5) lsize++; 8717 } 8718 PetscCall(PetscMalloc1(lsize, &idxs)); 8719 for (i = 0, lsize = 0; i < n; i++) { 8720 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 8721 } 8722 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 8723 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 8724 *localis = localis_t; 8725 PetscFunctionReturn(PETSC_SUCCESS); 8726 } 8727 8728 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 8729 { 8730 PC_IS *pcis = (PC_IS *)pc->data; 8731 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8732 PC_IS *pcisf; 8733 PC_BDDC *pcbddcf; 8734 PC pcf; 8735 8736 PetscFunctionBegin; 8737 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 8738 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 8739 PetscCall(PCSetType(pcf, PCBDDC)); 8740 8741 pcisf = (PC_IS *)pcf->data; 8742 pcbddcf = (PC_BDDC *)pcf->data; 8743 8744 pcisf->is_B_local = pcis->is_B_local; 8745 pcisf->vec1_N = pcis->vec1_N; 8746 pcisf->BtoNmap = pcis->BtoNmap; 8747 pcisf->n = pcis->n; 8748 pcisf->n_B = pcis->n_B; 8749 8750 PetscCall(PetscFree(pcbddcf->mat_graph)); 8751 PetscCall(PetscFree(pcbddcf->sub_schurs)); 8752 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 8753 pcbddcf->sub_schurs = schurs; 8754 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 8755 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 8756 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 8757 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 8758 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 8759 pcbddcf->use_faces = PETSC_TRUE; 8760 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 8761 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 8762 pcbddcf->use_qr_single = (PetscBool)!constraints; 8763 pcbddcf->fake_change = PETSC_TRUE; 8764 pcbddcf->dbg_flag = pcbddc->dbg_flag; 8765 8766 PetscCall(PCBDDCAdaptiveSelection(pcf)); 8767 PetscCall(PCBDDCConstraintsSetUp(pcf)); 8768 8769 *change = pcbddcf->ConstraintMatrix; 8770 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 8771 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 8772 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 8773 8774 if (schurs) pcbddcf->sub_schurs = NULL; 8775 pcbddcf->ConstraintMatrix = NULL; 8776 pcbddcf->mat_graph = NULL; 8777 pcisf->is_B_local = NULL; 8778 pcisf->vec1_N = NULL; 8779 pcisf->BtoNmap = NULL; 8780 PetscCall(PCDestroy(&pcf)); 8781 PetscFunctionReturn(PETSC_SUCCESS); 8782 } 8783 8784 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8785 { 8786 PC_IS *pcis = (PC_IS *)pc->data; 8787 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8788 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 8789 Mat S_j; 8790 PetscInt *used_xadj, *used_adjncy; 8791 PetscBool free_used_adj; 8792 8793 PetscFunctionBegin; 8794 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8795 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8796 free_used_adj = PETSC_FALSE; 8797 if (pcbddc->sub_schurs_layers == -1) { 8798 used_xadj = NULL; 8799 used_adjncy = NULL; 8800 } else { 8801 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8802 used_xadj = pcbddc->mat_graph->xadj; 8803 used_adjncy = pcbddc->mat_graph->adjncy; 8804 } else if (pcbddc->computed_rowadj) { 8805 used_xadj = pcbddc->mat_graph->xadj; 8806 used_adjncy = pcbddc->mat_graph->adjncy; 8807 } else { 8808 PetscBool flg_row = PETSC_FALSE; 8809 const PetscInt *xadj, *adjncy; 8810 PetscInt nvtxs; 8811 8812 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8813 if (flg_row) { 8814 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 8815 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 8816 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 8817 free_used_adj = PETSC_TRUE; 8818 } else { 8819 pcbddc->sub_schurs_layers = -1; 8820 used_xadj = NULL; 8821 used_adjncy = NULL; 8822 } 8823 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8824 } 8825 } 8826 8827 /* setup sub_schurs data */ 8828 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 8829 if (!sub_schurs->schur_explicit) { 8830 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8831 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 8832 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 8833 } else { 8834 Mat change = NULL; 8835 Vec scaling = NULL; 8836 IS change_primal = NULL, iP; 8837 PetscInt benign_n; 8838 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8839 PetscBool need_change = PETSC_FALSE; 8840 PetscBool discrete_harmonic = PETSC_FALSE; 8841 8842 if (!pcbddc->use_vertices && reuse_solvers) { 8843 PetscInt n_vertices; 8844 8845 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 8846 reuse_solvers = (PetscBool)!n_vertices; 8847 } 8848 if (!pcbddc->benign_change_explicit) { 8849 benign_n = pcbddc->benign_n; 8850 } else { 8851 benign_n = 0; 8852 } 8853 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8854 We need a global reduction to avoid possible deadlocks. 8855 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8856 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8857 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8858 PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8859 need_change = (PetscBool)(!need_change); 8860 } 8861 /* If the user defines additional constraints, we import them here */ 8862 if (need_change) { 8863 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 8864 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 8865 } 8866 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8867 8868 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 8869 if (iP) { 8870 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 8871 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 8872 PetscOptionsEnd(); 8873 } 8874 if (discrete_harmonic) { 8875 Mat A; 8876 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 8877 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 8878 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 8879 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 8880 pcbddc->benign_zerodiag_subs, change, change_primal)); 8881 PetscCall(MatDestroy(&A)); 8882 } else { 8883 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 8884 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 8885 } 8886 PetscCall(MatDestroy(&change)); 8887 PetscCall(ISDestroy(&change_primal)); 8888 } 8889 PetscCall(MatDestroy(&S_j)); 8890 8891 /* free adjacency */ 8892 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 8893 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8894 PetscFunctionReturn(PETSC_SUCCESS); 8895 } 8896 8897 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8898 { 8899 PC_IS *pcis = (PC_IS *)pc->data; 8900 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8901 PCBDDCGraph graph; 8902 8903 PetscFunctionBegin; 8904 /* attach interface graph for determining subsets */ 8905 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8906 IS verticesIS, verticescomm; 8907 PetscInt vsize, *idxs; 8908 8909 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8910 PetscCall(ISGetSize(verticesIS, &vsize)); 8911 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 8912 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 8913 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 8914 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8915 PetscCall(PCBDDCGraphCreate(&graph)); 8916 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 8917 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 8918 PetscCall(ISDestroy(&verticescomm)); 8919 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 8920 } else { 8921 graph = pcbddc->mat_graph; 8922 } 8923 /* print some info */ 8924 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8925 IS vertices; 8926 PetscInt nv, nedges, nfaces; 8927 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 8928 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8929 PetscCall(ISGetSize(vertices, &nv)); 8930 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8931 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 8932 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 8933 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 8934 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 8935 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8936 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 8937 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8938 } 8939 8940 /* sub_schurs init */ 8941 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 8942 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 8943 8944 /* free graph struct */ 8945 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 8946 PetscFunctionReturn(PETSC_SUCCESS); 8947 } 8948 8949 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8950 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8951 { 8952 Mat At; 8953 IS rows; 8954 PetscInt rst, ren; 8955 PetscLayout rmap; 8956 8957 PetscFunctionBegin; 8958 rst = ren = 0; 8959 if (ccomm != MPI_COMM_NULL) { 8960 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 8961 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 8962 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 8963 PetscCall(PetscLayoutSetUp(rmap)); 8964 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 8965 } 8966 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 8967 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 8968 PetscCall(ISDestroy(&rows)); 8969 8970 if (ccomm != MPI_COMM_NULL) { 8971 Mat_MPIAIJ *a, *b; 8972 IS from, to; 8973 Vec gvec; 8974 PetscInt lsize; 8975 8976 PetscCall(MatCreate(ccomm, B)); 8977 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 8978 PetscCall(MatSetType(*B, MATAIJ)); 8979 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 8980 PetscCall(PetscLayoutSetUp((*B)->cmap)); 8981 a = (Mat_MPIAIJ *)At->data; 8982 b = (Mat_MPIAIJ *)(*B)->data; 8983 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 8984 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 8985 PetscCall(PetscObjectReference((PetscObject)a->A)); 8986 PetscCall(PetscObjectReference((PetscObject)a->B)); 8987 b->A = a->A; 8988 b->B = a->B; 8989 8990 b->donotstash = a->donotstash; 8991 b->roworiented = a->roworiented; 8992 b->rowindices = NULL; 8993 b->rowvalues = NULL; 8994 b->getrowactive = PETSC_FALSE; 8995 8996 (*B)->rmap = rmap; 8997 (*B)->factortype = A->factortype; 8998 (*B)->assembled = PETSC_TRUE; 8999 (*B)->insertmode = NOT_SET_VALUES; 9000 (*B)->preallocated = PETSC_TRUE; 9001 9002 if (a->colmap) { 9003 #if defined(PETSC_USE_CTABLE) 9004 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9005 #else 9006 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9007 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9008 #endif 9009 } else b->colmap = NULL; 9010 if (a->garray) { 9011 PetscInt len; 9012 len = a->B->cmap->n; 9013 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9014 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9015 } else b->garray = NULL; 9016 9017 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9018 b->lvec = a->lvec; 9019 9020 /* cannot use VecScatterCopy */ 9021 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9022 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9023 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9024 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9025 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9026 PetscCall(ISDestroy(&from)); 9027 PetscCall(ISDestroy(&to)); 9028 PetscCall(VecDestroy(&gvec)); 9029 } 9030 PetscCall(MatDestroy(&At)); 9031 PetscFunctionReturn(PETSC_SUCCESS); 9032 } 9033