1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 PetscScalar *uwork, *data, *U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM, bN, lwork, lierr, di = 1; 20 PetscInt ulw, i, nr, nc, n; 21 #if defined(PETSC_USE_COMPLEX) 22 PetscReal *rwork2; 23 #endif 24 25 PetscFunctionBegin; 26 PetscCall(MatGetSize(A, &nr, &nc)); 27 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 32 PetscCall(PetscMalloc1(ulw, &uwork)); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr, nc); 38 if (!rwork) { 39 PetscCall(PetscMalloc1(n, &sing)); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 PetscCall(PetscMalloc1(nr * nr, &U)); 46 PetscCall(PetscBLASIntCast(nr, &bM)); 47 PetscCall(PetscBLASIntCast(nc, &bN)); 48 PetscCall(PetscBLASIntCast(ulw, &lwork)); 49 PetscCall(MatDenseGetArray(A, &data)); 50 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 51 #if !defined(PETSC_USE_COMPLEX) 52 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 53 #else 54 PetscCall(PetscMalloc1(5 * n, &rwork2)); 55 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 56 PetscCall(PetscFree(rwork2)); 57 #endif 58 PetscCall(PetscFPTrapPop()); 59 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 60 PetscCall(MatDenseRestoreArray(A, &data)); 61 for (i = 0; i < n; i++) 62 if (sing[i] < PETSC_SMALL) break; 63 if (!rwork) PetscCall(PetscFree(sing)); 64 if (!work) PetscCall(PetscFree(uwork)); 65 /* create B */ 66 if (!range) { 67 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 68 PetscCall(MatDenseGetArray(*B, &data)); 69 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 70 } else { 71 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 72 PetscCall(MatDenseGetArray(*B, &data)); 73 PetscCall(PetscArraycpy(data, U, i * nr)); 74 } 75 PetscCall(MatDenseRestoreArray(*B, &data)); 76 PetscCall(PetscFree(U)); 77 PetscFunctionReturn(PETSC_SUCCESS); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 Mat GE, GEd; 89 PetscInt rsize, csize, esize; 90 PetscScalar *ptr; 91 92 PetscFunctionBegin; 93 PetscCall(ISGetSize(edge, &esize)); 94 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 95 PetscCall(ISGetSize(extrow, &rsize)); 96 PetscCall(ISGetSize(extcol, &csize)); 97 98 /* gradients */ 99 ptr = work + 5 * esize; 100 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 101 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 102 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 103 PetscCall(MatDestroy(&GE)); 104 105 /* constants */ 106 ptr += rsize * csize; 107 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 108 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 109 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 110 PetscCall(MatDestroy(&GE)); 111 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 112 PetscCall(MatDestroy(&GEd)); 113 114 if (corners) { 115 Mat GEc; 116 const PetscScalar *vals; 117 PetscScalar v; 118 119 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 120 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 121 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 122 /* v = PetscAbsScalar(vals[0]); */ 123 v = 1.; 124 cvals[0] = vals[0] / v; 125 cvals[1] = vals[1] / v; 126 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 127 PetscCall(MatScale(*GKins, 1. / v)); 128 #if defined(PRINT_GDET) 129 { 130 PetscViewer viewer; 131 char filename[256]; 132 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 133 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 134 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 135 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 136 PetscCall(MatView(GEc, viewer)); 137 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK")); 138 PetscCall(MatView(*GKins, viewer)); 139 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 140 PetscCall(MatView(GEd, viewer)); 141 PetscCall(PetscViewerDestroy(&viewer)); 142 } 143 #endif 144 PetscCall(MatDestroy(&GEd)); 145 PetscCall(MatDestroy(&GEc)); 146 } 147 PetscFunctionReturn(PETSC_SUCCESS); 148 } 149 150 PetscErrorCode PCBDDCNedelecSupport(PC pc) 151 { 152 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 153 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 154 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 155 Vec tvec; 156 PetscSF sfv; 157 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 158 MPI_Comm comm; 159 IS lned, primals, allprimals, nedfieldlocal; 160 IS *eedges, *extrows, *extcols, *alleedges; 161 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 162 PetscScalar *vals, *work; 163 PetscReal *rwork; 164 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 165 PetscInt ne, nv, Lv, order, n, field; 166 PetscInt n_neigh, *neigh, *n_shared, **shared; 167 PetscInt i, j, extmem, cum, maxsize, nee; 168 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 169 PetscInt *sfvleaves, *sfvroots; 170 PetscInt *corners, *cedges; 171 PetscInt *ecount, **eneighs, *vcount, **vneighs; 172 PetscInt *emarks; 173 PetscBool print, eerr, done, lrc[2], conforming, global, singular, setprimal; 174 175 PetscFunctionBegin; 176 /* If the discrete gradient is defined for a subset of dofs and global is true, 177 it assumes G is given in global ordering for all the dofs. 178 Otherwise, the ordering is global for the Nedelec field */ 179 order = pcbddc->nedorder; 180 conforming = pcbddc->conforming; 181 field = pcbddc->nedfield; 182 global = pcbddc->nedglobal; 183 setprimal = PETSC_FALSE; 184 print = PETSC_FALSE; 185 singular = PETSC_FALSE; 186 187 /* Command line customization */ 188 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 189 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 190 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL)); 191 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 192 /* print debug info TODO: to be removed */ 193 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 194 PetscOptionsEnd(); 195 196 /* Return if there are no edges in the decomposition and the problem is not singular */ 197 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 198 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 199 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 200 if (!singular) { 201 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 202 lrc[0] = PETSC_FALSE; 203 for (i = 0; i < n; i++) { 204 if (PetscRealPart(vals[i]) > 2.) { 205 lrc[0] = PETSC_TRUE; 206 break; 207 } 208 } 209 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 210 PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 211 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 212 } 213 214 /* Get Nedelec field */ 215 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 216 if (pcbddc->n_ISForDofsLocal && field >= 0) { 217 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 218 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 219 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 220 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 221 ne = n; 222 nedfieldlocal = NULL; 223 global = PETSC_TRUE; 224 } else if (field == PETSC_DECIDE) { 225 PetscInt rst, ren, *idx; 226 227 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 228 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 229 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 230 for (i = rst; i < ren; i++) { 231 PetscInt nc; 232 233 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 234 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 235 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 236 } 237 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 238 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 239 PetscCall(PetscMalloc1(n, &idx)); 240 for (i = 0, ne = 0; i < n; i++) 241 if (matis->sf_leafdata[i]) idx[ne++] = i; 242 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 243 } else { 244 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 245 } 246 247 /* Sanity checks */ 248 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 249 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 250 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 251 252 /* Just set primal dofs and return */ 253 if (setprimal) { 254 IS enedfieldlocal; 255 PetscInt *eidxs; 256 257 PetscCall(PetscMalloc1(ne, &eidxs)); 258 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 259 if (nedfieldlocal) { 260 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 261 for (i = 0, cum = 0; i < ne; i++) { 262 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 263 } 264 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 265 } else { 266 for (i = 0, cum = 0; i < ne; i++) { 267 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 268 } 269 } 270 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 271 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 272 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 273 PetscCall(PetscFree(eidxs)); 274 PetscCall(ISDestroy(&nedfieldlocal)); 275 PetscCall(ISDestroy(&enedfieldlocal)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 /* Compute some l2g maps */ 280 if (nedfieldlocal) { 281 IS is; 282 283 /* need to map from the local Nedelec field to local numbering */ 284 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 285 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 286 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 287 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 288 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 289 if (global) { 290 PetscCall(PetscObjectReference((PetscObject)al2g)); 291 el2g = al2g; 292 } else { 293 IS gis; 294 295 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 296 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 297 PetscCall(ISDestroy(&gis)); 298 } 299 PetscCall(ISDestroy(&is)); 300 } else { 301 /* restore default */ 302 pcbddc->nedfield = -1; 303 /* one ref for the destruction of al2g, one for el2g */ 304 PetscCall(PetscObjectReference((PetscObject)al2g)); 305 PetscCall(PetscObjectReference((PetscObject)al2g)); 306 el2g = al2g; 307 fl2g = NULL; 308 } 309 310 /* Start communication to drop connections for interior edges (for cc analysis only) */ 311 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 312 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 313 if (nedfieldlocal) { 314 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 315 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 316 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 317 } else { 318 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 319 } 320 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 321 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 322 323 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 324 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 325 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 326 if (global) { 327 PetscInt rst; 328 329 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 330 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 331 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 332 } 333 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 334 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 335 } else { 336 PetscInt *tbz; 337 338 PetscCall(PetscMalloc1(ne, &tbz)); 339 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 340 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 342 for (i = 0, cum = 0; i < ne; i++) 343 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 344 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 345 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 346 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 347 PetscCall(PetscFree(tbz)); 348 } 349 } else { /* we need the entire G to infer the nullspace */ 350 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 351 G = pcbddc->discretegradient; 352 } 353 354 /* Extract subdomain relevant rows of G */ 355 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 356 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 357 PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); 358 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 359 PetscCall(ISDestroy(&lned)); 360 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 361 PetscCall(MatDestroy(&lGall)); 362 PetscCall(MatISGetLocalMat(lGis, &lG)); 363 364 /* SF for nodal dofs communications */ 365 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 366 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 367 PetscCall(PetscObjectReference((PetscObject)vl2g)); 368 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 369 PetscCall(PetscSFCreate(comm, &sfv)); 370 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 371 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 372 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 373 i = singular ? 2 : 1; 374 PetscCall(PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots)); 375 376 /* Destroy temporary G created in MATIS format and modified G */ 377 PetscCall(PetscObjectReference((PetscObject)lG)); 378 PetscCall(MatDestroy(&lGis)); 379 PetscCall(MatDestroy(&G)); 380 381 if (print) { 382 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 383 PetscCall(MatView(lG, NULL)); 384 } 385 386 /* Save lG for values insertion in change of basis */ 387 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 388 389 /* Analyze the edge-nodes connections (duplicate lG) */ 390 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 391 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 392 PetscCall(PetscBTCreate(nv, &btv)); 393 PetscCall(PetscBTCreate(ne, &bte)); 394 PetscCall(PetscBTCreate(ne, &btb)); 395 PetscCall(PetscBTCreate(ne, &btbd)); 396 PetscCall(PetscBTCreate(nv, &btvcand)); 397 /* need to import the boundary specification to ensure the 398 proper detection of coarse edges' endpoints */ 399 if (pcbddc->DirichletBoundariesLocal) { 400 IS is; 401 402 if (fl2g) { 403 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 404 } else { 405 is = pcbddc->DirichletBoundariesLocal; 406 } 407 PetscCall(ISGetLocalSize(is, &cum)); 408 PetscCall(ISGetIndices(is, &idxs)); 409 for (i = 0; i < cum; i++) { 410 if (idxs[i] >= 0) { 411 PetscCall(PetscBTSet(btb, idxs[i])); 412 PetscCall(PetscBTSet(btbd, idxs[i])); 413 } 414 } 415 PetscCall(ISRestoreIndices(is, &idxs)); 416 if (fl2g) PetscCall(ISDestroy(&is)); 417 } 418 if (pcbddc->NeumannBoundariesLocal) { 419 IS is; 420 421 if (fl2g) { 422 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 423 } else { 424 is = pcbddc->NeumannBoundariesLocal; 425 } 426 PetscCall(ISGetLocalSize(is, &cum)); 427 PetscCall(ISGetIndices(is, &idxs)); 428 for (i = 0; i < cum; i++) { 429 if (idxs[i] >= 0) PetscCall(PetscBTSet(btb, idxs[i])); 430 } 431 PetscCall(ISRestoreIndices(is, &idxs)); 432 if (fl2g) PetscCall(ISDestroy(&is)); 433 } 434 435 /* Count neighs per dof */ 436 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs)); 437 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs)); 438 439 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 440 for proper detection of coarse edges' endpoints */ 441 PetscCall(PetscBTCreate(ne, &btee)); 442 for (i = 0; i < ne; i++) { 443 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 444 } 445 PetscCall(PetscMalloc1(ne, &marks)); 446 if (!conforming) { 447 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 448 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 449 } 450 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 451 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 452 cum = 0; 453 for (i = 0; i < ne; i++) { 454 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 455 if (!PetscBTLookup(btee, i)) { 456 marks[cum++] = i; 457 continue; 458 } 459 /* set badly connected edge dofs as primal */ 460 if (!conforming) { 461 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 462 marks[cum++] = i; 463 PetscCall(PetscBTSet(bte, i)); 464 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 465 } else { 466 /* every edge dofs should be connected through a certain number of nodal dofs 467 to other edge dofs belonging to coarse edges 468 - at most 2 endpoints 469 - order-1 interior nodal dofs 470 - no undefined nodal dofs (nconn < order) 471 */ 472 PetscInt ends = 0, ints = 0, undef = 0; 473 for (j = ii[i]; j < ii[i + 1]; j++) { 474 PetscInt v = jj[j], k; 475 PetscInt nconn = iit[v + 1] - iit[v]; 476 for (k = iit[v]; k < iit[v + 1]; k++) 477 if (!PetscBTLookup(btee, jjt[k])) nconn--; 478 if (nconn > order) ends++; 479 else if (nconn == order) ints++; 480 else undef++; 481 } 482 if (undef || ends > 2 || ints != order - 1) { 483 marks[cum++] = i; 484 PetscCall(PetscBTSet(bte, i)); 485 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 486 } 487 } 488 } 489 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 490 if (!order && ii[i + 1] != ii[i]) { 491 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 492 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 493 } 494 } 495 PetscCall(PetscBTDestroy(&btee)); 496 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 497 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 498 if (!conforming) { 499 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 500 PetscCall(MatDestroy(&lGt)); 501 } 502 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 503 504 /* identify splitpoints and corner candidates */ 505 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 506 if (print) { 507 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 508 PetscCall(MatView(lGe, NULL)); 509 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 510 PetscCall(MatView(lGt, NULL)); 511 } 512 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 513 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 514 for (i = 0; i < nv; i++) { 515 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 516 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 517 if (!order) { /* variable order */ 518 PetscReal vorder = 0.; 519 520 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 521 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 522 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 523 ord = 1; 524 } 525 PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord); 526 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 527 if (PetscBTLookup(btbd, jj[j])) { 528 bdir = PETSC_TRUE; 529 break; 530 } 531 if (vc != ecount[jj[j]]) { 532 sneighs = PETSC_FALSE; 533 } else { 534 PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]]; 535 for (k = 0; k < vc; k++) { 536 if (vn[k] != en[k]) { 537 sneighs = PETSC_FALSE; 538 break; 539 } 540 } 541 } 542 } 543 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 544 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 545 PetscCall(PetscBTSet(btv, i)); 546 } else if (test == ord) { 547 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 548 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 549 PetscCall(PetscBTSet(btv, i)); 550 } else { 551 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 552 PetscCall(PetscBTSet(btvcand, i)); 553 } 554 } 555 } 556 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 557 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 558 PetscCall(PetscBTDestroy(&btbd)); 559 560 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 561 if (order != 1) { 562 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 563 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 564 for (i = 0; i < nv; i++) { 565 if (PetscBTLookup(btvcand, i)) { 566 PetscBool found = PETSC_FALSE; 567 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 568 PetscInt k, e = jj[j]; 569 if (PetscBTLookup(bte, e)) continue; 570 for (k = iit[e]; k < iit[e + 1]; k++) { 571 PetscInt v = jjt[k]; 572 if (v != i && PetscBTLookup(btvcand, v)) { 573 found = PETSC_TRUE; 574 break; 575 } 576 } 577 } 578 if (!found) { 579 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 580 PetscCall(PetscBTClear(btvcand, i)); 581 } else { 582 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 583 } 584 } 585 } 586 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 587 } 588 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 589 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 590 PetscCall(MatDestroy(&lGe)); 591 592 /* Get the local G^T explicitly */ 593 PetscCall(MatDestroy(&lGt)); 594 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 595 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 596 597 /* Mark interior nodal dofs */ 598 PetscCall(ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 599 PetscCall(PetscBTCreate(nv, &btvi)); 600 for (i = 1; i < n_neigh; i++) { 601 for (j = 0; j < n_shared[i]; j++) PetscCall(PetscBTSet(btvi, shared[i][j])); 602 } 603 PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared)); 604 605 /* communicate corners and splitpoints */ 606 PetscCall(PetscMalloc1(nv, &vmarks)); 607 PetscCall(PetscArrayzero(sfvleaves, nv)); 608 PetscCall(PetscArrayzero(sfvroots, Lv)); 609 for (i = 0; i < nv; i++) 610 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 611 612 if (print) { 613 IS tbz; 614 615 cum = 0; 616 for (i = 0; i < nv; i++) 617 if (sfvleaves[i]) vmarks[cum++] = i; 618 619 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 620 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 621 PetscCall(ISView(tbz, NULL)); 622 PetscCall(ISDestroy(&tbz)); 623 } 624 625 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 626 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 627 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 628 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 629 630 /* Zero rows of lGt corresponding to identified corners 631 and interior nodal dofs */ 632 cum = 0; 633 for (i = 0; i < nv; i++) { 634 if (sfvleaves[i]) { 635 vmarks[cum++] = i; 636 PetscCall(PetscBTSet(btv, i)); 637 } 638 if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 639 } 640 PetscCall(PetscBTDestroy(&btvi)); 641 if (print) { 642 IS tbz; 643 644 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 645 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 646 PetscCall(ISView(tbz, NULL)); 647 PetscCall(ISDestroy(&tbz)); 648 } 649 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 650 PetscCall(PetscFree(vmarks)); 651 PetscCall(PetscSFDestroy(&sfv)); 652 PetscCall(PetscFree2(sfvleaves, sfvroots)); 653 654 /* Recompute G */ 655 PetscCall(MatDestroy(&lG)); 656 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 657 if (print) { 658 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 659 PetscCall(MatView(lG, NULL)); 660 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 661 PetscCall(MatView(lGt, NULL)); 662 } 663 664 /* Get primal dofs (if any) */ 665 cum = 0; 666 for (i = 0; i < ne; i++) { 667 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 668 } 669 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 670 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 671 if (print) { 672 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 673 PetscCall(ISView(primals, NULL)); 674 } 675 PetscCall(PetscBTDestroy(&bte)); 676 /* TODO: what if the user passed in some of them ? */ 677 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 678 PetscCall(ISDestroy(&primals)); 679 680 /* Compute edge connectivity */ 681 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 682 683 /* Symbolic conn = lG*lGt */ 684 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 685 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 686 PetscCall(MatProductSetAlgorithm(conn, "default")); 687 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 688 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 689 PetscCall(MatProductSetFromOptions(conn)); 690 PetscCall(MatProductSymbolic(conn)); 691 692 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 693 if (fl2g) { 694 PetscBT btf; 695 PetscInt *iia, *jja, *iiu, *jju; 696 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 697 698 /* create CSR for all local dofs */ 699 PetscCall(PetscMalloc1(n + 1, &iia)); 700 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 701 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 702 iiu = pcbddc->mat_graph->xadj; 703 jju = pcbddc->mat_graph->adjncy; 704 } else if (pcbddc->use_local_adj) { 705 rest = PETSC_TRUE; 706 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 707 } else { 708 free = PETSC_TRUE; 709 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 710 iiu[0] = 0; 711 for (i = 0; i < n; i++) { 712 iiu[i + 1] = i + 1; 713 jju[i] = -1; 714 } 715 } 716 717 /* import sizes of CSR */ 718 iia[0] = 0; 719 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 720 721 /* overwrite entries corresponding to the Nedelec field */ 722 PetscCall(PetscBTCreate(n, &btf)); 723 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 724 for (i = 0; i < ne; i++) { 725 PetscCall(PetscBTSet(btf, idxs[i])); 726 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 727 } 728 729 /* iia in CSR */ 730 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 731 732 /* jja in CSR */ 733 PetscCall(PetscMalloc1(iia[n], &jja)); 734 for (i = 0; i < n; i++) 735 if (!PetscBTLookup(btf, i)) 736 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 737 738 /* map edge dofs connectivity */ 739 if (jj) { 740 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 741 for (i = 0; i < ne; i++) { 742 PetscInt e = idxs[i]; 743 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 744 } 745 } 746 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 747 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER)); 748 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 749 if (free) PetscCall(PetscFree2(iiu, jju)); 750 PetscCall(PetscBTDestroy(&btf)); 751 } else { 752 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER)); 753 } 754 755 /* Analyze interface for edge dofs */ 756 PetscCall(PCBDDCAnalyzeInterface(pc)); 757 pcbddc->mat_graph->twodim = PETSC_FALSE; 758 759 /* Get coarse edges in the edge space */ 760 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 761 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 762 763 if (fl2g) { 764 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 765 PetscCall(PetscMalloc1(nee, &eedges)); 766 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 767 } else { 768 eedges = alleedges; 769 primals = allprimals; 770 } 771 772 /* Mark fine edge dofs with their coarse edge id */ 773 PetscCall(PetscArrayzero(marks, ne)); 774 PetscCall(ISGetLocalSize(primals, &cum)); 775 PetscCall(ISGetIndices(primals, &idxs)); 776 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 777 PetscCall(ISRestoreIndices(primals, &idxs)); 778 if (print) { 779 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 780 PetscCall(ISView(primals, NULL)); 781 } 782 783 maxsize = 0; 784 for (i = 0; i < nee; i++) { 785 PetscInt size, mark = i + 1; 786 787 PetscCall(ISGetLocalSize(eedges[i], &size)); 788 PetscCall(ISGetIndices(eedges[i], &idxs)); 789 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 790 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 791 maxsize = PetscMax(maxsize, size); 792 } 793 794 /* Find coarse edge endpoints */ 795 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 796 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 797 for (i = 0; i < nee; i++) { 798 PetscInt mark = i + 1, size; 799 800 PetscCall(ISGetLocalSize(eedges[i], &size)); 801 if (!size && nedfieldlocal) continue; 802 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 803 PetscCall(ISGetIndices(eedges[i], &idxs)); 804 if (print) { 805 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 806 PetscCall(ISView(eedges[i], NULL)); 807 } 808 for (j = 0; j < size; j++) { 809 PetscInt k, ee = idxs[j]; 810 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 811 for (k = ii[ee]; k < ii[ee + 1]; k++) { 812 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 813 if (PetscBTLookup(btv, jj[k])) { 814 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 815 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 816 PetscInt k2; 817 PetscBool corner = PETSC_FALSE; 818 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 819 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 820 /* it's a corner if either is connected with an edge dof belonging to a different cc or 821 if the edge dof lie on the natural part of the boundary */ 822 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 823 corner = PETSC_TRUE; 824 break; 825 } 826 } 827 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 828 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 829 PetscCall(PetscBTSet(btv, jj[k])); 830 } else { 831 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 832 } 833 } 834 } 835 } 836 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 837 } 838 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 839 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 840 PetscCall(PetscBTDestroy(&btb)); 841 842 /* Reset marked primal dofs */ 843 PetscCall(ISGetLocalSize(primals, &cum)); 844 PetscCall(ISGetIndices(primals, &idxs)); 845 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 846 PetscCall(ISRestoreIndices(primals, &idxs)); 847 848 /* Now use the initial lG */ 849 PetscCall(MatDestroy(&lG)); 850 PetscCall(MatDestroy(&lGt)); 851 lG = lGinit; 852 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 853 854 /* Compute extended cols indices */ 855 PetscCall(PetscBTCreate(nv, &btvc)); 856 PetscCall(PetscBTCreate(nee, &bter)); 857 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 858 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 859 i *= maxsize; 860 PetscCall(PetscCalloc1(nee, &extcols)); 861 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 862 eerr = PETSC_FALSE; 863 for (i = 0; i < nee; i++) { 864 PetscInt size, found = 0; 865 866 cum = 0; 867 PetscCall(ISGetLocalSize(eedges[i], &size)); 868 if (!size && nedfieldlocal) continue; 869 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 870 PetscCall(ISGetIndices(eedges[i], &idxs)); 871 PetscCall(PetscBTMemzero(nv, btvc)); 872 for (j = 0; j < size; j++) { 873 PetscInt k, ee = idxs[j]; 874 for (k = ii[ee]; k < ii[ee + 1]; k++) { 875 PetscInt vv = jj[k]; 876 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 877 else if (!PetscBTLookupSet(btvc, vv)) found++; 878 } 879 } 880 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 881 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 882 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 883 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 884 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 885 /* it may happen that endpoints are not defined at this point 886 if it is the case, mark this edge for a second pass */ 887 if (cum != size - 1 || found != 2) { 888 PetscCall(PetscBTSet(bter, i)); 889 if (print) { 890 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 891 PetscCall(ISView(eedges[i], NULL)); 892 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 893 PetscCall(ISView(extcols[i], NULL)); 894 } 895 eerr = PETSC_TRUE; 896 } 897 } 898 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 899 PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 900 if (done) { 901 PetscInt *newprimals; 902 903 PetscCall(PetscMalloc1(ne, &newprimals)); 904 PetscCall(ISGetLocalSize(primals, &cum)); 905 PetscCall(ISGetIndices(primals, &idxs)); 906 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 907 PetscCall(ISRestoreIndices(primals, &idxs)); 908 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 909 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 910 for (i = 0; i < nee; i++) { 911 PetscBool has_candidates = PETSC_FALSE; 912 if (PetscBTLookup(bter, i)) { 913 PetscInt size, mark = i + 1; 914 915 PetscCall(ISGetLocalSize(eedges[i], &size)); 916 PetscCall(ISGetIndices(eedges[i], &idxs)); 917 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 918 for (j = 0; j < size; j++) { 919 PetscInt k, ee = idxs[j]; 920 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 921 for (k = ii[ee]; k < ii[ee + 1]; k++) { 922 /* set all candidates located on the edge as corners */ 923 if (PetscBTLookup(btvcand, jj[k])) { 924 PetscInt k2, vv = jj[k]; 925 has_candidates = PETSC_TRUE; 926 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 927 PetscCall(PetscBTSet(btv, vv)); 928 /* set all edge dofs connected to candidate as primals */ 929 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 930 if (marks[jjt[k2]] == mark) { 931 PetscInt k3, ee2 = jjt[k2]; 932 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 933 newprimals[cum++] = ee2; 934 /* finally set the new corners */ 935 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 936 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 937 PetscCall(PetscBTSet(btv, jj[k3])); 938 } 939 } 940 } 941 } else { 942 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 943 } 944 } 945 } 946 if (!has_candidates) { /* circular edge */ 947 PetscInt k, ee = idxs[0], *tmarks; 948 949 PetscCall(PetscCalloc1(ne, &tmarks)); 950 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 951 for (k = ii[ee]; k < ii[ee + 1]; k++) { 952 PetscInt k2; 953 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 954 PetscCall(PetscBTSet(btv, jj[k])); 955 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 956 } 957 for (j = 0; j < size; j++) { 958 if (tmarks[idxs[j]] > 1) { 959 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 960 newprimals[cum++] = idxs[j]; 961 } 962 } 963 PetscCall(PetscFree(tmarks)); 964 } 965 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 966 } 967 PetscCall(ISDestroy(&extcols[i])); 968 } 969 PetscCall(PetscFree(extcols)); 970 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 971 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 972 if (fl2g) { 973 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 974 PetscCall(ISDestroy(&primals)); 975 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 976 PetscCall(PetscFree(eedges)); 977 } 978 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 979 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 980 PetscCall(PetscFree(newprimals)); 981 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 982 PetscCall(ISDestroy(&primals)); 983 PetscCall(PCBDDCAnalyzeInterface(pc)); 984 pcbddc->mat_graph->twodim = PETSC_FALSE; 985 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 986 if (fl2g) { 987 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 988 PetscCall(PetscMalloc1(nee, &eedges)); 989 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 990 } else { 991 eedges = alleedges; 992 primals = allprimals; 993 } 994 PetscCall(PetscCalloc1(nee, &extcols)); 995 996 /* Mark again */ 997 PetscCall(PetscArrayzero(marks, ne)); 998 for (i = 0; i < nee; i++) { 999 PetscInt size, mark = i + 1; 1000 1001 PetscCall(ISGetLocalSize(eedges[i], &size)); 1002 PetscCall(ISGetIndices(eedges[i], &idxs)); 1003 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1004 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1005 } 1006 if (print) { 1007 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1008 PetscCall(ISView(primals, NULL)); 1009 } 1010 1011 /* Recompute extended cols */ 1012 eerr = PETSC_FALSE; 1013 for (i = 0; i < nee; i++) { 1014 PetscInt size; 1015 1016 cum = 0; 1017 PetscCall(ISGetLocalSize(eedges[i], &size)); 1018 if (!size && nedfieldlocal) continue; 1019 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1020 PetscCall(ISGetIndices(eedges[i], &idxs)); 1021 for (j = 0; j < size; j++) { 1022 PetscInt k, ee = idxs[j]; 1023 for (k = ii[ee]; k < ii[ee + 1]; k++) 1024 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1025 } 1026 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1027 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1028 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1029 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1030 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1031 if (cum != size - 1) { 1032 if (print) { 1033 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1034 PetscCall(ISView(eedges[i], NULL)); 1035 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1036 PetscCall(ISView(extcols[i], NULL)); 1037 } 1038 eerr = PETSC_TRUE; 1039 } 1040 } 1041 } 1042 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1043 PetscCall(PetscFree2(extrow, gidxs)); 1044 PetscCall(PetscBTDestroy(&bter)); 1045 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1046 /* an error should not occur at this point */ 1047 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1048 1049 /* Check the number of endpoints */ 1050 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1051 PetscCall(PetscMalloc1(2 * nee, &corners)); 1052 PetscCall(PetscMalloc1(nee, &cedges)); 1053 for (i = 0; i < nee; i++) { 1054 PetscInt size, found = 0, gc[2]; 1055 1056 /* init with defaults */ 1057 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1058 PetscCall(ISGetLocalSize(eedges[i], &size)); 1059 if (!size && nedfieldlocal) continue; 1060 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1061 PetscCall(ISGetIndices(eedges[i], &idxs)); 1062 PetscCall(PetscBTMemzero(nv, btvc)); 1063 for (j = 0; j < size; j++) { 1064 PetscInt k, ee = idxs[j]; 1065 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1066 PetscInt vv = jj[k]; 1067 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1068 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1069 corners[i * 2 + found++] = vv; 1070 } 1071 } 1072 } 1073 if (found != 2) { 1074 PetscInt e; 1075 if (fl2g) { 1076 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1077 } else { 1078 e = idxs[0]; 1079 } 1080 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1081 } 1082 1083 /* get primal dof index on this coarse edge */ 1084 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1085 if (gc[0] > gc[1]) { 1086 PetscInt swap = corners[2 * i]; 1087 corners[2 * i] = corners[2 * i + 1]; 1088 corners[2 * i + 1] = swap; 1089 } 1090 cedges[i] = idxs[size - 1]; 1091 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1093 } 1094 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1095 PetscCall(PetscBTDestroy(&btvc)); 1096 1097 if (PetscDefined(USE_DEBUG)) { 1098 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1099 not interfere with neighbouring coarse edges */ 1100 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1101 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1102 for (i = 0; i < nv; i++) { 1103 PetscInt emax = 0, eemax = 0; 1104 1105 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1106 PetscCall(PetscArrayzero(emarks, nee + 1)); 1107 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1108 for (j = 1; j < nee + 1; j++) { 1109 if (emax < emarks[j]) { 1110 emax = emarks[j]; 1111 eemax = j; 1112 } 1113 } 1114 /* not relevant for edges */ 1115 if (!eemax) continue; 1116 1117 for (j = ii[i]; j < ii[i + 1]; j++) { 1118 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1119 } 1120 } 1121 PetscCall(PetscFree(emarks)); 1122 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1123 } 1124 1125 /* Compute extended rows indices for edge blocks of the change of basis */ 1126 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1127 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1128 extmem *= maxsize; 1129 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1130 PetscCall(PetscMalloc1(nee, &extrows)); 1131 PetscCall(PetscCalloc1(nee, &extrowcum)); 1132 for (i = 0; i < nv; i++) { 1133 PetscInt mark = 0, size, start; 1134 1135 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1136 for (j = ii[i]; j < ii[i + 1]; j++) 1137 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1138 1139 /* not relevant */ 1140 if (!mark) continue; 1141 1142 /* import extended row */ 1143 mark--; 1144 start = mark * extmem + extrowcum[mark]; 1145 size = ii[i + 1] - ii[i]; 1146 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1147 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1148 extrowcum[mark] += size; 1149 } 1150 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1151 PetscCall(MatDestroy(&lGt)); 1152 PetscCall(PetscFree(marks)); 1153 1154 /* Compress extrows */ 1155 cum = 0; 1156 for (i = 0; i < nee; i++) { 1157 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1158 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1159 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1160 cum = PetscMax(cum, size); 1161 } 1162 PetscCall(PetscFree(extrowcum)); 1163 PetscCall(PetscBTDestroy(&btv)); 1164 PetscCall(PetscBTDestroy(&btvcand)); 1165 1166 /* Workspace for lapack inner calls and VecSetValues */ 1167 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1168 1169 /* Create change of basis matrix (preallocation can be improved) */ 1170 PetscCall(MatCreate(comm, &T)); 1171 PetscCall(MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N)); 1172 PetscCall(MatSetType(T, MATAIJ)); 1173 PetscCall(MatSeqAIJSetPreallocation(T, 10, NULL)); 1174 PetscCall(MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL)); 1175 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1176 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1177 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1178 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1179 1180 /* Defaults to identity */ 1181 PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL)); 1182 PetscCall(VecSet(tvec, 1.0)); 1183 PetscCall(MatDiagonalSet(T, tvec, INSERT_VALUES)); 1184 PetscCall(VecDestroy(&tvec)); 1185 1186 /* Create discrete gradient for the coarser level if needed */ 1187 PetscCall(MatDestroy(&pcbddc->nedcG)); 1188 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1189 if (pcbddc->current_level < pcbddc->max_levels) { 1190 ISLocalToGlobalMapping cel2g, cvl2g; 1191 IS wis, gwis; 1192 PetscInt cnv, cne; 1193 1194 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1195 if (fl2g) { 1196 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1197 } else { 1198 PetscCall(PetscObjectReference((PetscObject)wis)); 1199 pcbddc->nedclocal = wis; 1200 } 1201 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1202 PetscCall(ISDestroy(&wis)); 1203 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1204 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1205 PetscCall(ISDestroy(&wis)); 1206 PetscCall(ISDestroy(&gwis)); 1207 1208 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1209 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1210 PetscCall(ISDestroy(&wis)); 1211 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1212 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1213 PetscCall(ISDestroy(&wis)); 1214 PetscCall(ISDestroy(&gwis)); 1215 1216 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1217 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1218 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1219 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1220 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1221 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1222 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1223 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1224 } 1225 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1226 1227 #if defined(PRINT_GDET) 1228 inc = 0; 1229 lev = pcbddc->current_level; 1230 #endif 1231 1232 /* Insert values in the change of basis matrix */ 1233 for (i = 0; i < nee; i++) { 1234 Mat Gins = NULL, GKins = NULL; 1235 IS cornersis = NULL; 1236 PetscScalar cvals[2]; 1237 1238 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1239 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1240 if (Gins && GKins) { 1241 const PetscScalar *data; 1242 const PetscInt *rows, *cols; 1243 PetscInt nrh, nch, nrc, ncc; 1244 1245 PetscCall(ISGetIndices(eedges[i], &cols)); 1246 /* H1 */ 1247 PetscCall(ISGetIndices(extrows[i], &rows)); 1248 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1249 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1250 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1251 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1252 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1253 /* complement */ 1254 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1255 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1256 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1257 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1258 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1259 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1260 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1261 1262 /* coarse discrete gradient */ 1263 if (pcbddc->nedcG) { 1264 PetscInt cols[2]; 1265 1266 cols[0] = 2 * i; 1267 cols[1] = 2 * i + 1; 1268 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1269 } 1270 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1271 } 1272 PetscCall(ISDestroy(&extrows[i])); 1273 PetscCall(ISDestroy(&extcols[i])); 1274 PetscCall(ISDestroy(&cornersis)); 1275 PetscCall(MatDestroy(&Gins)); 1276 PetscCall(MatDestroy(&GKins)); 1277 } 1278 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1279 1280 /* Start assembling */ 1281 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1282 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1283 1284 /* Free */ 1285 if (fl2g) { 1286 PetscCall(ISDestroy(&primals)); 1287 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1288 PetscCall(PetscFree(eedges)); 1289 } 1290 1291 /* hack mat_graph with primal dofs on the coarse edges */ 1292 { 1293 PCBDDCGraph graph = pcbddc->mat_graph; 1294 PetscInt *oqueue = graph->queue; 1295 PetscInt *ocptr = graph->cptr; 1296 PetscInt ncc, *idxs; 1297 1298 /* find first primal edge */ 1299 if (pcbddc->nedclocal) { 1300 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1301 } else { 1302 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1303 idxs = cedges; 1304 } 1305 cum = 0; 1306 while (cum < nee && cedges[cum] < 0) cum++; 1307 1308 /* adapt connected components */ 1309 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1310 graph->cptr[0] = 0; 1311 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1312 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1313 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1314 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1315 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1316 ncc++; 1317 lc--; 1318 cum++; 1319 while (cum < nee && cedges[cum] < 0) cum++; 1320 } 1321 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1322 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1323 ncc++; 1324 } 1325 graph->ncc = ncc; 1326 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1327 PetscCall(PetscFree2(ocptr, oqueue)); 1328 } 1329 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1330 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1331 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1332 PetscCall(MatDestroy(&conn)); 1333 1334 PetscCall(ISDestroy(&nedfieldlocal)); 1335 PetscCall(PetscFree(extrow)); 1336 PetscCall(PetscFree2(work, rwork)); 1337 PetscCall(PetscFree(corners)); 1338 PetscCall(PetscFree(cedges)); 1339 PetscCall(PetscFree(extrows)); 1340 PetscCall(PetscFree(extcols)); 1341 PetscCall(MatDestroy(&lG)); 1342 1343 /* Complete assembling */ 1344 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1345 if (pcbddc->nedcG) { 1346 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1347 #if 0 1348 PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G")); 1349 PetscCall(MatView(pcbddc->nedcG,NULL)); 1350 #endif 1351 } 1352 1353 /* set change of basis */ 1354 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, singular)); 1355 PetscCall(MatDestroy(&T)); 1356 PetscFunctionReturn(PETSC_SUCCESS); 1357 } 1358 1359 /* the near-null space of BDDC carries information on quadrature weights, 1360 and these can be collinear -> so cheat with MatNullSpaceCreate 1361 and create a suitable set of basis vectors first */ 1362 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1363 { 1364 PetscInt i; 1365 1366 PetscFunctionBegin; 1367 for (i = 0; i < nvecs; i++) { 1368 PetscInt first, last; 1369 1370 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1371 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1372 if (i >= first && i < last) { 1373 PetscScalar *data; 1374 PetscCall(VecGetArray(quad_vecs[i], &data)); 1375 if (!has_const) { 1376 data[i - first] = 1.; 1377 } else { 1378 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1379 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1380 } 1381 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1382 } 1383 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1384 } 1385 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1386 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1387 PetscInt first, last; 1388 PetscCall(VecLockReadPop(quad_vecs[i])); 1389 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1390 if (i >= first && i < last) { 1391 PetscScalar *data; 1392 PetscCall(VecGetArray(quad_vecs[i], &data)); 1393 if (!has_const) { 1394 data[i - first] = 0.; 1395 } else { 1396 data[2 * i - first] = 0.; 1397 data[2 * i - first + 1] = 0.; 1398 } 1399 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1400 } 1401 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1402 PetscCall(VecLockReadPush(quad_vecs[i])); 1403 } 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1408 { 1409 Mat loc_divudotp; 1410 Vec p, v, vins, quad_vec, *quad_vecs; 1411 ISLocalToGlobalMapping map; 1412 PetscScalar *vals; 1413 const PetscScalar *array; 1414 PetscInt i, maxneighs = 0, maxsize, *gidxs; 1415 PetscInt n_neigh, *neigh, *n_shared, **shared; 1416 PetscMPIInt rank; 1417 1418 PetscFunctionBegin; 1419 PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1420 for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs); 1421 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A))); 1422 if (!maxneighs) { 1423 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1424 *nnsp = NULL; 1425 PetscFunctionReturn(PETSC_SUCCESS); 1426 } 1427 maxsize = 0; 1428 for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize); 1429 PetscCall(PetscMalloc2(maxsize, &gidxs, maxsize, &vals)); 1430 /* create vectors to hold quadrature weights */ 1431 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1432 if (!transpose) { 1433 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1434 } else { 1435 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1436 } 1437 PetscCall(VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs)); 1438 PetscCall(VecDestroy(&quad_vec)); 1439 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp)); 1440 for (i = 0; i < maxneighs; i++) PetscCall(VecLockReadPop(quad_vecs[i])); 1441 1442 /* compute local quad vec */ 1443 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1444 if (!transpose) { 1445 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1446 } else { 1447 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1448 } 1449 PetscCall(VecSet(p, 1.)); 1450 if (!transpose) { 1451 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1452 } else { 1453 PetscCall(MatMult(loc_divudotp, p, v)); 1454 } 1455 if (vl2l) { 1456 Mat lA; 1457 VecScatter sc; 1458 1459 PetscCall(MatISGetLocalMat(A, &lA)); 1460 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1461 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1462 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1463 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1464 PetscCall(VecScatterDestroy(&sc)); 1465 } else { 1466 vins = v; 1467 } 1468 PetscCall(VecGetArrayRead(vins, &array)); 1469 PetscCall(VecDestroy(&p)); 1470 1471 /* insert in global quadrature vecs */ 1472 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1473 for (i = 1; i < n_neigh; i++) { 1474 const PetscInt *idxs; 1475 PetscInt idx, nn, j; 1476 1477 idxs = shared[i]; 1478 nn = n_shared[i]; 1479 for (j = 0; j < nn; j++) vals[j] = array[idxs[j]]; 1480 PetscCall(PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx)); 1481 idx = -(idx + 1); 1482 PetscCheck(idx >= 0 && idx < maxneighs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")", idx, maxneighs); 1483 PetscCall(ISLocalToGlobalMappingApply(map, nn, idxs, gidxs)); 1484 PetscCall(VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES)); 1485 } 1486 PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared)); 1487 PetscCall(VecRestoreArrayRead(vins, &array)); 1488 if (vl2l) PetscCall(VecDestroy(&vins)); 1489 PetscCall(VecDestroy(&v)); 1490 PetscCall(PetscFree2(gidxs, vals)); 1491 1492 /* assemble near null space */ 1493 for (i = 0; i < maxneighs; i++) PetscCall(VecAssemblyBegin(quad_vecs[i])); 1494 for (i = 0; i < maxneighs; i++) { 1495 PetscCall(VecAssemblyEnd(quad_vecs[i])); 1496 PetscCall(VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view")); 1497 PetscCall(VecLockReadPush(quad_vecs[i])); 1498 } 1499 PetscCall(VecDestroyVecs(maxneighs, &quad_vecs)); 1500 PetscFunctionReturn(PETSC_SUCCESS); 1501 } 1502 1503 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1504 { 1505 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1506 1507 PetscFunctionBegin; 1508 if (primalv) { 1509 if (pcbddc->user_primal_vertices_local) { 1510 IS list[2], newp; 1511 1512 list[0] = primalv; 1513 list[1] = pcbddc->user_primal_vertices_local; 1514 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1515 PetscCall(ISSortRemoveDups(newp)); 1516 PetscCall(ISDestroy(&list[1])); 1517 pcbddc->user_primal_vertices_local = newp; 1518 } else { 1519 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1520 } 1521 } 1522 PetscFunctionReturn(PETSC_SUCCESS); 1523 } 1524 1525 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1526 { 1527 PetscInt f, *comp = (PetscInt *)ctx; 1528 1529 PetscFunctionBegin; 1530 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1531 PetscFunctionReturn(PETSC_SUCCESS); 1532 } 1533 1534 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1535 { 1536 Vec local, global; 1537 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1538 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1539 PetscBool monolithic = PETSC_FALSE; 1540 1541 PetscFunctionBegin; 1542 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1543 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1544 PetscOptionsEnd(); 1545 /* need to convert from global to local topology information and remove references to information in global ordering */ 1546 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1547 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1548 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1549 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1550 if (monolithic) { /* just get block size to properly compute vertices */ 1551 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1552 goto boundary; 1553 } 1554 1555 if (pcbddc->user_provided_isfordofs) { 1556 if (pcbddc->n_ISForDofs) { 1557 PetscInt i; 1558 1559 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1560 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1561 PetscInt bs; 1562 1563 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1564 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1565 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1566 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1567 } 1568 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1569 pcbddc->n_ISForDofs = 0; 1570 PetscCall(PetscFree(pcbddc->ISForDofs)); 1571 } 1572 } else { 1573 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1574 DM dm; 1575 1576 PetscCall(MatGetDM(pc->pmat, &dm)); 1577 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1578 if (dm) { 1579 IS *fields; 1580 PetscInt nf, i; 1581 1582 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1583 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1584 for (i = 0; i < nf; i++) { 1585 PetscInt bs; 1586 1587 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1588 PetscCall(ISGetBlockSize(fields[i], &bs)); 1589 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1590 PetscCall(ISDestroy(&fields[i])); 1591 } 1592 PetscCall(PetscFree(fields)); 1593 pcbddc->n_ISForDofsLocal = nf; 1594 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1595 PetscContainer c; 1596 1597 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1598 if (c) { 1599 MatISLocalFields lf; 1600 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1601 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1602 } else { /* fallback, create the default fields if bs > 1 */ 1603 PetscInt i, n = matis->A->rmap->n; 1604 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1605 if (i > 1) { 1606 pcbddc->n_ISForDofsLocal = i; 1607 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1608 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1609 } 1610 } 1611 } 1612 } else { 1613 PetscInt i; 1614 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1615 } 1616 } 1617 1618 boundary: 1619 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1620 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1621 } else if (pcbddc->DirichletBoundariesLocal) { 1622 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1623 } 1624 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1625 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1626 } else if (pcbddc->NeumannBoundariesLocal) { 1627 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1628 } 1629 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1630 PetscCall(VecDestroy(&global)); 1631 PetscCall(VecDestroy(&local)); 1632 /* detect local disconnected subdomains if requested (use matis->A) */ 1633 if (pcbddc->detect_disconnected) { 1634 IS primalv = NULL; 1635 PetscInt i; 1636 PetscBool filter = pcbddc->detect_disconnected_filter; 1637 1638 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1639 PetscCall(PetscFree(pcbddc->local_subs)); 1640 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1641 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1642 PetscCall(ISDestroy(&primalv)); 1643 } 1644 /* early stage corner detection */ 1645 { 1646 DM dm; 1647 1648 PetscCall(MatGetDM(pc->pmat, &dm)); 1649 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1650 if (dm) { 1651 PetscBool isda; 1652 1653 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1654 if (isda) { 1655 ISLocalToGlobalMapping l2l; 1656 IS corners; 1657 Mat lA; 1658 PetscBool gl, lo; 1659 1660 { 1661 Vec cvec; 1662 const PetscScalar *coords; 1663 PetscInt dof, n, cdim; 1664 PetscBool memc = PETSC_TRUE; 1665 1666 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1667 PetscCall(DMGetCoordinates(dm, &cvec)); 1668 PetscCall(VecGetLocalSize(cvec, &n)); 1669 PetscCall(VecGetBlockSize(cvec, &cdim)); 1670 n /= cdim; 1671 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1672 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1673 PetscCall(VecGetArrayRead(cvec, &coords)); 1674 #if defined(PETSC_USE_COMPLEX) 1675 memc = PETSC_FALSE; 1676 #endif 1677 if (dof != 1) memc = PETSC_FALSE; 1678 if (memc) { 1679 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1680 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1681 PetscReal *bcoords = pcbddc->mat_graph->coords; 1682 PetscInt i, b, d; 1683 1684 for (i = 0; i < n; i++) { 1685 for (b = 0; b < dof; b++) { 1686 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1687 } 1688 } 1689 } 1690 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1691 pcbddc->mat_graph->cdim = cdim; 1692 pcbddc->mat_graph->cnloc = dof * n; 1693 pcbddc->mat_graph->cloc = PETSC_FALSE; 1694 } 1695 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1696 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1697 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1698 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1699 lo = (PetscBool)(l2l && corners); 1700 PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1701 if (gl) { /* From PETSc's DMDA */ 1702 const PetscInt *idx; 1703 PetscInt dof, bs, *idxout, n; 1704 1705 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1706 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1707 PetscCall(ISGetLocalSize(corners, &n)); 1708 PetscCall(ISGetIndices(corners, &idx)); 1709 if (bs == dof) { 1710 PetscCall(PetscMalloc1(n, &idxout)); 1711 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1712 } else { /* the original DMDA local-to-local map have been modified */ 1713 PetscInt i, d; 1714 1715 PetscCall(PetscMalloc1(dof * n, &idxout)); 1716 for (i = 0; i < n; i++) 1717 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1718 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1719 1720 bs = 1; 1721 n *= dof; 1722 } 1723 PetscCall(ISRestoreIndices(corners, &idx)); 1724 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1725 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1726 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1727 PetscCall(ISDestroy(&corners)); 1728 pcbddc->corner_selected = PETSC_TRUE; 1729 pcbddc->corner_selection = PETSC_TRUE; 1730 } 1731 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1732 } 1733 } 1734 } 1735 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1736 DM dm; 1737 1738 PetscCall(MatGetDM(pc->pmat, &dm)); 1739 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1740 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1741 Vec vcoords; 1742 PetscSection section; 1743 PetscReal *coords; 1744 PetscInt d, cdim, nl, nf, **ctxs; 1745 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1746 /* debug coordinates */ 1747 PetscViewer viewer; 1748 PetscBool flg; 1749 PetscViewerFormat format; 1750 const char *prefix; 1751 1752 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1753 PetscCall(DMGetLocalSection(dm, §ion)); 1754 PetscCall(PetscSectionGetNumFields(section, &nf)); 1755 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1756 PetscCall(VecGetLocalSize(vcoords, &nl)); 1757 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1758 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1759 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1760 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1761 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1762 1763 /* debug coordinates */ 1764 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1765 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1766 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1767 for (d = 0; d < cdim; d++) { 1768 PetscInt i; 1769 const PetscScalar *v; 1770 char name[16]; 1771 1772 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1773 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d)); 1774 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1775 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1776 if (flg) PetscCall(VecView(vcoords, viewer)); 1777 PetscCall(VecGetArrayRead(vcoords, &v)); 1778 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1779 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1780 } 1781 PetscCall(VecDestroy(&vcoords)); 1782 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1783 PetscCall(PetscFree(coords)); 1784 PetscCall(PetscFree(ctxs[0])); 1785 PetscCall(PetscFree2(funcs, ctxs)); 1786 if (flg) { 1787 PetscCall(PetscViewerPopFormat(viewer)); 1788 PetscCall(PetscOptionsRestoreViewer(&viewer)); 1789 } 1790 } 1791 } 1792 PetscFunctionReturn(PETSC_SUCCESS); 1793 } 1794 1795 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1796 { 1797 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1798 IS nis; 1799 const PetscInt *idxs; 1800 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1801 1802 PetscFunctionBegin; 1803 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1804 if (mop == MPI_LAND) { 1805 /* init rootdata with true */ 1806 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1807 } else { 1808 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1809 } 1810 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1811 PetscCall(ISGetLocalSize(*is, &nd)); 1812 PetscCall(ISGetIndices(*is, &idxs)); 1813 for (i = 0; i < nd; i++) 1814 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1815 PetscCall(ISRestoreIndices(*is, &idxs)); 1816 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1817 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1818 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1819 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1820 if (mop == MPI_LAND) { 1821 PetscCall(PetscMalloc1(nd, &nidxs)); 1822 } else { 1823 PetscCall(PetscMalloc1(n, &nidxs)); 1824 } 1825 for (i = 0, nnd = 0; i < n; i++) 1826 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 1827 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 1828 PetscCall(ISDestroy(is)); 1829 *is = nis; 1830 PetscFunctionReturn(PETSC_SUCCESS); 1831 } 1832 1833 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 1834 { 1835 PC_IS *pcis = (PC_IS *)pc->data; 1836 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1837 1838 PetscFunctionBegin; 1839 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 1840 if (pcbddc->ChangeOfBasisMatrix) { 1841 Vec swap; 1842 1843 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 1844 swap = pcbddc->work_change; 1845 pcbddc->work_change = r; 1846 r = swap; 1847 } 1848 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1849 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1850 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1851 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 1852 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1853 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 1854 PetscCall(VecSet(z, 0.)); 1855 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1856 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1857 if (pcbddc->ChangeOfBasisMatrix) { 1858 pcbddc->work_change = r; 1859 PetscCall(VecCopy(z, pcbddc->work_change)); 1860 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 1861 } 1862 PetscFunctionReturn(PETSC_SUCCESS); 1863 } 1864 1865 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1866 { 1867 PCBDDCBenignMatMult_ctx ctx; 1868 PetscBool apply_right, apply_left, reset_x; 1869 1870 PetscFunctionBegin; 1871 PetscCall(MatShellGetContext(A, &ctx)); 1872 if (transpose) { 1873 apply_right = ctx->apply_left; 1874 apply_left = ctx->apply_right; 1875 } else { 1876 apply_right = ctx->apply_right; 1877 apply_left = ctx->apply_left; 1878 } 1879 reset_x = PETSC_FALSE; 1880 if (apply_right) { 1881 const PetscScalar *ax; 1882 PetscInt nl, i; 1883 1884 PetscCall(VecGetLocalSize(x, &nl)); 1885 PetscCall(VecGetArrayRead(x, &ax)); 1886 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 1887 PetscCall(VecRestoreArrayRead(x, &ax)); 1888 for (i = 0; i < ctx->benign_n; i++) { 1889 PetscScalar sum, val; 1890 const PetscInt *idxs; 1891 PetscInt nz, j; 1892 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1893 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1894 sum = 0.; 1895 if (ctx->apply_p0) { 1896 val = ctx->work[idxs[nz - 1]]; 1897 for (j = 0; j < nz - 1; j++) { 1898 sum += ctx->work[idxs[j]]; 1899 ctx->work[idxs[j]] += val; 1900 } 1901 } else { 1902 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 1903 } 1904 ctx->work[idxs[nz - 1]] -= sum; 1905 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1906 } 1907 PetscCall(VecPlaceArray(x, ctx->work)); 1908 reset_x = PETSC_TRUE; 1909 } 1910 if (transpose) { 1911 PetscCall(MatMultTranspose(ctx->A, x, y)); 1912 } else { 1913 PetscCall(MatMult(ctx->A, x, y)); 1914 } 1915 if (reset_x) PetscCall(VecResetArray(x)); 1916 if (apply_left) { 1917 PetscScalar *ay; 1918 PetscInt i; 1919 1920 PetscCall(VecGetArray(y, &ay)); 1921 for (i = 0; i < ctx->benign_n; i++) { 1922 PetscScalar sum, val; 1923 const PetscInt *idxs; 1924 PetscInt nz, j; 1925 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 1926 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1927 val = -ay[idxs[nz - 1]]; 1928 if (ctx->apply_p0) { 1929 sum = 0.; 1930 for (j = 0; j < nz - 1; j++) { 1931 sum += ay[idxs[j]]; 1932 ay[idxs[j]] += val; 1933 } 1934 ay[idxs[nz - 1]] += sum; 1935 } else { 1936 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 1937 ay[idxs[nz - 1]] = 0.; 1938 } 1939 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 1940 } 1941 PetscCall(VecRestoreArray(y, &ay)); 1942 } 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1947 { 1948 PetscFunctionBegin; 1949 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 1950 PetscFunctionReturn(PETSC_SUCCESS); 1951 } 1952 1953 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1954 { 1955 PetscFunctionBegin; 1956 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 1957 PetscFunctionReturn(PETSC_SUCCESS); 1958 } 1959 1960 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1961 { 1962 PC_IS *pcis = (PC_IS *)pc->data; 1963 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1964 PCBDDCBenignMatMult_ctx ctx; 1965 1966 PetscFunctionBegin; 1967 if (!restore) { 1968 Mat A_IB, A_BI; 1969 PetscScalar *work; 1970 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1971 1972 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 1973 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 1974 PetscCall(PetscMalloc1(pcis->n, &work)); 1975 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 1976 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 1977 PetscCall(MatSetType(A_IB, MATSHELL)); 1978 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 1979 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 1980 PetscCall(PetscNew(&ctx)); 1981 PetscCall(MatShellSetContext(A_IB, ctx)); 1982 ctx->apply_left = PETSC_TRUE; 1983 ctx->apply_right = PETSC_FALSE; 1984 ctx->apply_p0 = PETSC_FALSE; 1985 ctx->benign_n = pcbddc->benign_n; 1986 if (reuse) { 1987 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1988 ctx->free = PETSC_FALSE; 1989 } else { /* TODO: could be optimized for successive solves */ 1990 ISLocalToGlobalMapping N_to_D; 1991 PetscInt i; 1992 1993 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 1994 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 1995 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 1996 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 1997 ctx->free = PETSC_TRUE; 1998 } 1999 ctx->A = pcis->A_IB; 2000 ctx->work = work; 2001 PetscCall(MatSetUp(A_IB)); 2002 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2003 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2004 pcis->A_IB = A_IB; 2005 2006 /* A_BI as A_IB^T */ 2007 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2008 pcbddc->benign_original_mat = pcis->A_BI; 2009 pcis->A_BI = A_BI; 2010 } else { 2011 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2012 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2013 PetscCall(MatDestroy(&pcis->A_IB)); 2014 pcis->A_IB = ctx->A; 2015 ctx->A = NULL; 2016 PetscCall(MatDestroy(&pcis->A_BI)); 2017 pcis->A_BI = pcbddc->benign_original_mat; 2018 pcbddc->benign_original_mat = NULL; 2019 if (ctx->free) { 2020 PetscInt i; 2021 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2022 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2023 } 2024 PetscCall(PetscFree(ctx->work)); 2025 PetscCall(PetscFree(ctx)); 2026 } 2027 PetscFunctionReturn(PETSC_SUCCESS); 2028 } 2029 2030 /* used just in bddc debug mode */ 2031 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2032 { 2033 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2034 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2035 Mat An; 2036 2037 PetscFunctionBegin; 2038 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2039 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2040 if (is1) { 2041 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2042 PetscCall(MatDestroy(&An)); 2043 } else { 2044 *B = An; 2045 } 2046 PetscFunctionReturn(PETSC_SUCCESS); 2047 } 2048 2049 /* TODO: add reuse flag */ 2050 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2051 { 2052 Mat Bt; 2053 PetscScalar *a, *bdata; 2054 const PetscInt *ii, *ij; 2055 PetscInt m, n, i, nnz, *bii, *bij; 2056 PetscBool flg_row; 2057 2058 PetscFunctionBegin; 2059 PetscCall(MatGetSize(A, &n, &m)); 2060 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2061 PetscCall(MatSeqAIJGetArray(A, &a)); 2062 nnz = n; 2063 for (i = 0; i < ii[n]; i++) { 2064 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2065 } 2066 PetscCall(PetscMalloc1(n + 1, &bii)); 2067 PetscCall(PetscMalloc1(nnz, &bij)); 2068 PetscCall(PetscMalloc1(nnz, &bdata)); 2069 nnz = 0; 2070 bii[0] = 0; 2071 for (i = 0; i < n; i++) { 2072 PetscInt j; 2073 for (j = ii[i]; j < ii[i + 1]; j++) { 2074 PetscScalar entry = a[j]; 2075 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2076 bij[nnz] = ij[j]; 2077 bdata[nnz] = entry; 2078 nnz++; 2079 } 2080 } 2081 bii[i + 1] = nnz; 2082 } 2083 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2084 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2085 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2086 { 2087 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data; 2088 b->free_a = PETSC_TRUE; 2089 b->free_ij = PETSC_TRUE; 2090 } 2091 if (*B == A) PetscCall(MatDestroy(&A)); 2092 *B = Bt; 2093 PetscFunctionReturn(PETSC_SUCCESS); 2094 } 2095 2096 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2097 { 2098 Mat B = NULL; 2099 DM dm; 2100 IS is_dummy, *cc_n; 2101 ISLocalToGlobalMapping l2gmap_dummy; 2102 PCBDDCGraph graph; 2103 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2104 PetscInt i, n; 2105 PetscInt *xadj, *adjncy; 2106 PetscBool isplex = PETSC_FALSE; 2107 2108 PetscFunctionBegin; 2109 if (ncc) *ncc = 0; 2110 if (cc) *cc = NULL; 2111 if (primalv) *primalv = NULL; 2112 PetscCall(PCBDDCGraphCreate(&graph)); 2113 PetscCall(MatGetDM(pc->pmat, &dm)); 2114 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2115 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2116 if (filter) isplex = PETSC_FALSE; 2117 2118 if (isplex) { /* this code has been modified from plexpartition.c */ 2119 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2120 PetscInt *adj = NULL; 2121 IS cellNumbering; 2122 const PetscInt *cellNum; 2123 PetscBool useCone, useClosure; 2124 PetscSection section; 2125 PetscSegBuffer adjBuffer; 2126 PetscSF sfPoint; 2127 2128 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2129 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2130 PetscCall(DMGetPointSF(dm, &sfPoint)); 2131 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2132 /* Build adjacency graph via a section/segbuffer */ 2133 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2134 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2135 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2136 /* Always use FVM adjacency to create partitioner graph */ 2137 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2138 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2139 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2140 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2141 for (n = 0, p = pStart; p < pEnd; p++) { 2142 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2143 if (nroots > 0) { 2144 if (cellNum[p] < 0) continue; 2145 } 2146 adjSize = PETSC_DETERMINE; 2147 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2148 for (a = 0; a < adjSize; ++a) { 2149 const PetscInt point = adj[a]; 2150 if (pStart <= point && point < pEnd) { 2151 PetscInt *PETSC_RESTRICT pBuf; 2152 PetscCall(PetscSectionAddDof(section, p, 1)); 2153 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2154 *pBuf = point; 2155 } 2156 } 2157 n++; 2158 } 2159 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2160 /* Derive CSR graph from section/segbuffer */ 2161 PetscCall(PetscSectionSetUp(section)); 2162 PetscCall(PetscSectionGetStorageSize(section, &size)); 2163 PetscCall(PetscMalloc1(n + 1, &xadj)); 2164 for (idx = 0, p = pStart; p < pEnd; p++) { 2165 if (nroots > 0) { 2166 if (cellNum[p] < 0) continue; 2167 } 2168 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++])); 2169 } 2170 xadj[n] = size; 2171 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2172 /* Clean up */ 2173 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2174 PetscCall(PetscSectionDestroy(§ion)); 2175 PetscCall(PetscFree(adj)); 2176 graph->xadj = xadj; 2177 graph->adjncy = adjncy; 2178 } else { 2179 Mat A; 2180 PetscBool isseqaij, flg_row; 2181 2182 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2183 if (!A->rmap->N || !A->cmap->N) { 2184 PetscCall(PCBDDCGraphDestroy(&graph)); 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2188 if (!isseqaij && filter) { 2189 PetscBool isseqdense; 2190 2191 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2192 if (!isseqdense) { 2193 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2194 } else { /* TODO: rectangular case and LDA */ 2195 PetscScalar *array; 2196 PetscReal chop = 1.e-6; 2197 2198 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2199 PetscCall(MatDenseGetArray(B, &array)); 2200 PetscCall(MatGetSize(B, &n, NULL)); 2201 for (i = 0; i < n; i++) { 2202 PetscInt j; 2203 for (j = i + 1; j < n; j++) { 2204 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2205 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2206 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2207 } 2208 } 2209 PetscCall(MatDenseRestoreArray(B, &array)); 2210 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2211 } 2212 } else { 2213 PetscCall(PetscObjectReference((PetscObject)A)); 2214 B = A; 2215 } 2216 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2217 2218 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2219 if (filter) { 2220 PetscScalar *data; 2221 PetscInt j, cum; 2222 2223 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2224 PetscCall(MatSeqAIJGetArray(B, &data)); 2225 cum = 0; 2226 for (i = 0; i < n; i++) { 2227 PetscInt t; 2228 2229 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2230 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2231 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2232 } 2233 t = xadj_filtered[i]; 2234 xadj_filtered[i] = cum; 2235 cum += t; 2236 } 2237 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2238 graph->xadj = xadj_filtered; 2239 graph->adjncy = adjncy_filtered; 2240 } else { 2241 graph->xadj = xadj; 2242 graph->adjncy = adjncy; 2243 } 2244 } 2245 /* compute local connected components using PCBDDCGraph */ 2246 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2247 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2248 PetscCall(ISDestroy(&is_dummy)); 2249 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT)); 2250 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2251 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2252 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2253 2254 /* partial clean up */ 2255 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2256 if (B) { 2257 PetscBool flg_row; 2258 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2259 PetscCall(MatDestroy(&B)); 2260 } 2261 if (isplex) { 2262 PetscCall(PetscFree(xadj)); 2263 PetscCall(PetscFree(adjncy)); 2264 } 2265 2266 /* get back data */ 2267 if (isplex) { 2268 if (ncc) *ncc = graph->ncc; 2269 if (cc || primalv) { 2270 Mat A; 2271 PetscBT btv, btvt, btvc; 2272 PetscSection subSection; 2273 PetscInt *ids, cum, cump, *cids, *pids; 2274 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2275 2276 PetscCall(DMGetDimension(dm, &dim)); 2277 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2278 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2279 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2280 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2281 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2282 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2283 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2284 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2285 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2286 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2287 2288 /* First see if we find corners for the subdomains, i.e. a vertex 2289 shared by at least dim subdomain boundary faces. This does not 2290 cover all the possible cases with simplices but it is enough 2291 for tensor cells */ 2292 if (vStart != fStart && dim <= 3) { 2293 for (PetscInt c = cStart; c < cEnd; c++) { 2294 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2295 const PetscInt *faces; 2296 2297 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2298 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2299 PetscCall(DMPlexGetCone(dm, c, &faces)); 2300 for (PetscInt f = 0; f < nf; f++) { 2301 PetscInt nc, ff; 2302 2303 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2304 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2305 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2306 } 2307 if (cnt >= mcnt) { 2308 PetscInt size, *closure = NULL; 2309 2310 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2311 for (PetscInt k = 0; k < 2 * size; k += 2) { 2312 PetscInt v = closure[k]; 2313 if (v >= vStart && v < vEnd) { 2314 PetscInt vsize, *vclosure = NULL; 2315 2316 cnt = 0; 2317 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2318 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2319 PetscInt f = vclosure[vk]; 2320 if (f >= fStart && f < fEnd) { 2321 PetscInt nc, ff; 2322 PetscBool valid = PETSC_FALSE; 2323 2324 for (PetscInt fk = 0; fk < nf; fk++) 2325 if (f == cfaces[fk]) valid = PETSC_TRUE; 2326 if (!valid) continue; 2327 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2328 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2329 if (nc == 1 && f == ff) cnt++; 2330 } 2331 } 2332 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2333 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2334 } 2335 } 2336 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2337 } 2338 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2339 } 2340 } 2341 2342 cids[0] = 0; 2343 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2344 PetscInt j; 2345 2346 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2347 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2348 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2349 2350 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2351 for (k = 0; k < 2 * size; k += 2) { 2352 PetscInt s, pp, p = closure[k], off, dof, cdof; 2353 2354 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2355 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2356 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2357 for (s = 0; s < dof - cdof; s++) { 2358 if (PetscBTLookupSet(btvt, off + s)) continue; 2359 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2360 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2361 else pids[cump++] = off + s; /* cross-vertex */ 2362 } 2363 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2364 if (pp != p) { 2365 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2366 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2367 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2368 for (s = 0; s < dof - cdof; s++) { 2369 if (PetscBTLookupSet(btvt, off + s)) continue; 2370 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2371 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2372 else pids[cump++] = off + s; /* cross-vertex */ 2373 } 2374 } 2375 } 2376 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2377 } 2378 cids[i + 1] = cum; 2379 /* mark dofs as already assigned */ 2380 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2381 } 2382 if (cc) { 2383 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2384 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2385 *cc = cc_n; 2386 } 2387 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2388 PetscCall(PetscFree3(ids, cids, pids)); 2389 PetscCall(PetscBTDestroy(&btv)); 2390 PetscCall(PetscBTDestroy(&btvt)); 2391 PetscCall(PetscBTDestroy(&btvc)); 2392 PetscCall(DMDestroy(&dm)); 2393 } 2394 } else { 2395 if (ncc) *ncc = graph->ncc; 2396 if (cc) { 2397 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2398 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2399 *cc = cc_n; 2400 } 2401 } 2402 /* clean up graph */ 2403 graph->xadj = NULL; 2404 graph->adjncy = NULL; 2405 PetscCall(PCBDDCGraphDestroy(&graph)); 2406 PetscFunctionReturn(PETSC_SUCCESS); 2407 } 2408 2409 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2410 { 2411 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2412 PC_IS *pcis = (PC_IS *)pc->data; 2413 IS dirIS = NULL; 2414 PetscInt i; 2415 2416 PetscFunctionBegin; 2417 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2418 if (zerodiag) { 2419 Mat A; 2420 Vec vec3_N; 2421 PetscScalar *vals; 2422 const PetscInt *idxs; 2423 PetscInt nz, *count; 2424 2425 /* p0 */ 2426 PetscCall(VecSet(pcis->vec1_N, 0.)); 2427 PetscCall(PetscMalloc1(pcis->n, &vals)); 2428 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2429 PetscCall(ISGetIndices(zerodiag, &idxs)); 2430 for (i = 0; i < nz; i++) vals[i] = 1.; 2431 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2432 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2433 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2434 /* v_I */ 2435 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2436 for (i = 0; i < nz; i++) vals[i] = 0.; 2437 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2438 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2439 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2440 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2441 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2442 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2443 if (dirIS) { 2444 PetscInt n; 2445 2446 PetscCall(ISGetLocalSize(dirIS, &n)); 2447 PetscCall(ISGetIndices(dirIS, &idxs)); 2448 for (i = 0; i < n; i++) vals[i] = 0.; 2449 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2450 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2451 } 2452 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2453 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2454 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2455 PetscCall(VecSet(vec3_N, 0.)); 2456 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2457 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2458 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2459 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2460 PetscCall(PetscFree(vals)); 2461 PetscCall(VecDestroy(&vec3_N)); 2462 2463 /* there should not be any pressure dofs lying on the interface */ 2464 PetscCall(PetscCalloc1(pcis->n, &count)); 2465 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2466 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2467 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2468 PetscCall(ISGetIndices(zerodiag, &idxs)); 2469 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2470 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2471 PetscCall(PetscFree(count)); 2472 } 2473 PetscCall(ISDestroy(&dirIS)); 2474 2475 /* check PCBDDCBenignGetOrSetP0 */ 2476 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2477 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2478 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2479 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2480 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2481 for (i = 0; i < pcbddc->benign_n; i++) { 2482 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2483 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2484 } 2485 PetscFunctionReturn(PETSC_SUCCESS); 2486 } 2487 2488 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2489 { 2490 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2491 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2492 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2493 PetscInt nz, n, benign_n, bsp = 1; 2494 PetscInt *interior_dofs, n_interior_dofs, nneu; 2495 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2496 2497 PetscFunctionBegin; 2498 if (reuse) goto project_b0; 2499 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2500 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2501 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2502 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2503 has_null_pressures = PETSC_TRUE; 2504 have_null = PETSC_TRUE; 2505 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2506 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2507 Checks if all the pressure dofs in each subdomain have a zero diagonal 2508 If not, a change of basis on pressures is not needed 2509 since the local Schur complements are already SPD 2510 */ 2511 if (pcbddc->n_ISForDofsLocal) { 2512 IS iP = NULL; 2513 PetscInt p, *pp; 2514 PetscBool flg; 2515 2516 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2517 n = pcbddc->n_ISForDofsLocal; 2518 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2519 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2520 PetscOptionsEnd(); 2521 if (!flg) { 2522 n = 1; 2523 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2524 } 2525 2526 bsp = 0; 2527 for (p = 0; p < n; p++) { 2528 PetscInt bs; 2529 2530 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2531 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2532 bsp += bs; 2533 } 2534 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2535 bsp = 0; 2536 for (p = 0; p < n; p++) { 2537 const PetscInt *idxs; 2538 PetscInt b, bs, npl, *bidxs; 2539 2540 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2541 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2542 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2543 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2544 for (b = 0; b < bs; b++) { 2545 PetscInt i; 2546 2547 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2548 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2549 bsp++; 2550 } 2551 PetscCall(PetscFree(bidxs)); 2552 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2553 } 2554 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2555 2556 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2557 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2558 if (iP) { 2559 IS newpressures; 2560 2561 PetscCall(ISDifference(pressures, iP, &newpressures)); 2562 PetscCall(ISDestroy(&pressures)); 2563 pressures = newpressures; 2564 } 2565 PetscCall(ISSorted(pressures, &sorted)); 2566 if (!sorted) PetscCall(ISSort(pressures)); 2567 PetscCall(PetscFree(pp)); 2568 } 2569 2570 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2571 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2572 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2573 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2574 PetscCall(ISSorted(zerodiag, &sorted)); 2575 if (!sorted) PetscCall(ISSort(zerodiag)); 2576 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2577 zerodiag_save = zerodiag; 2578 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2579 if (!nz) { 2580 if (n) have_null = PETSC_FALSE; 2581 has_null_pressures = PETSC_FALSE; 2582 PetscCall(ISDestroy(&zerodiag)); 2583 } 2584 recompute_zerodiag = PETSC_FALSE; 2585 2586 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2587 zerodiag_subs = NULL; 2588 benign_n = 0; 2589 n_interior_dofs = 0; 2590 interior_dofs = NULL; 2591 nneu = 0; 2592 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2593 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2594 if (checkb) { /* need to compute interior nodes */ 2595 PetscInt n, i, j; 2596 PetscInt n_neigh, *neigh, *n_shared, **shared; 2597 PetscInt *iwork; 2598 2599 PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping, &n)); 2600 PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2601 PetscCall(PetscCalloc1(n, &iwork)); 2602 PetscCall(PetscMalloc1(n, &interior_dofs)); 2603 for (i = 1; i < n_neigh; i++) 2604 for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1; 2605 for (i = 0; i < n; i++) 2606 if (!iwork[i]) interior_dofs[n_interior_dofs++] = i; 2607 PetscCall(PetscFree(iwork)); 2608 PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared)); 2609 } 2610 if (has_null_pressures) { 2611 IS *subs; 2612 PetscInt nsubs, i, j, nl; 2613 const PetscInt *idxs; 2614 PetscScalar *array; 2615 Vec *work; 2616 2617 subs = pcbddc->local_subs; 2618 nsubs = pcbddc->n_local_subs; 2619 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2620 if (checkb) { 2621 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2622 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2623 PetscCall(ISGetIndices(zerodiag, &idxs)); 2624 /* work[0] = 1_p */ 2625 PetscCall(VecSet(work[0], 0.)); 2626 PetscCall(VecGetArray(work[0], &array)); 2627 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2628 PetscCall(VecRestoreArray(work[0], &array)); 2629 /* work[0] = 1_v */ 2630 PetscCall(VecSet(work[1], 1.)); 2631 PetscCall(VecGetArray(work[1], &array)); 2632 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2633 PetscCall(VecRestoreArray(work[1], &array)); 2634 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2635 } 2636 2637 if (nsubs > 1 || bsp > 1) { 2638 IS *is; 2639 PetscInt b, totb; 2640 2641 totb = bsp; 2642 is = bsp > 1 ? bzerodiag : &zerodiag; 2643 nsubs = PetscMax(nsubs, 1); 2644 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2645 for (b = 0; b < totb; b++) { 2646 for (i = 0; i < nsubs; i++) { 2647 ISLocalToGlobalMapping l2g; 2648 IS t_zerodiag_subs; 2649 PetscInt nl; 2650 2651 if (subs) { 2652 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2653 } else { 2654 IS tis; 2655 2656 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2657 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2658 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2659 PetscCall(ISDestroy(&tis)); 2660 } 2661 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2662 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2663 if (nl) { 2664 PetscBool valid = PETSC_TRUE; 2665 2666 if (checkb) { 2667 PetscCall(VecSet(matis->x, 0)); 2668 PetscCall(ISGetLocalSize(subs[i], &nl)); 2669 PetscCall(ISGetIndices(subs[i], &idxs)); 2670 PetscCall(VecGetArray(matis->x, &array)); 2671 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2672 PetscCall(VecRestoreArray(matis->x, &array)); 2673 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2674 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2675 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2676 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2677 PetscCall(VecGetArray(matis->y, &array)); 2678 for (j = 0; j < n_interior_dofs; j++) { 2679 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2680 valid = PETSC_FALSE; 2681 break; 2682 } 2683 } 2684 PetscCall(VecRestoreArray(matis->y, &array)); 2685 } 2686 if (valid && nneu) { 2687 const PetscInt *idxs; 2688 PetscInt nzb; 2689 2690 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2691 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2692 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2693 if (nzb) valid = PETSC_FALSE; 2694 } 2695 if (valid && pressures) { 2696 IS t_pressure_subs, tmp; 2697 PetscInt i1, i2; 2698 2699 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2700 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2701 PetscCall(ISGetLocalSize(tmp, &i1)); 2702 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2703 if (i2 != i1) valid = PETSC_FALSE; 2704 PetscCall(ISDestroy(&t_pressure_subs)); 2705 PetscCall(ISDestroy(&tmp)); 2706 } 2707 if (valid) { 2708 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2709 benign_n++; 2710 } else recompute_zerodiag = PETSC_TRUE; 2711 } 2712 PetscCall(ISDestroy(&t_zerodiag_subs)); 2713 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2714 } 2715 } 2716 } else { /* there's just one subdomain (or zero if they have not been detected */ 2717 PetscBool valid = PETSC_TRUE; 2718 2719 if (nneu) valid = PETSC_FALSE; 2720 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2721 if (valid && checkb) { 2722 PetscCall(MatMult(matis->A, work[0], matis->x)); 2723 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2724 PetscCall(VecGetArray(matis->x, &array)); 2725 for (j = 0; j < n_interior_dofs; j++) { 2726 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2727 valid = PETSC_FALSE; 2728 break; 2729 } 2730 } 2731 PetscCall(VecRestoreArray(matis->x, &array)); 2732 } 2733 if (valid) { 2734 benign_n = 1; 2735 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2736 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2737 zerodiag_subs[0] = zerodiag; 2738 } 2739 } 2740 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2741 } 2742 PetscCall(PetscFree(interior_dofs)); 2743 2744 if (!benign_n) { 2745 PetscInt n; 2746 2747 PetscCall(ISDestroy(&zerodiag)); 2748 recompute_zerodiag = PETSC_FALSE; 2749 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2750 if (n) have_null = PETSC_FALSE; 2751 } 2752 2753 /* final check for null pressures */ 2754 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2755 2756 if (recompute_zerodiag) { 2757 PetscCall(ISDestroy(&zerodiag)); 2758 if (benign_n == 1) { 2759 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2760 zerodiag = zerodiag_subs[0]; 2761 } else { 2762 PetscInt i, nzn, *new_idxs; 2763 2764 nzn = 0; 2765 for (i = 0; i < benign_n; i++) { 2766 PetscInt ns; 2767 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2768 nzn += ns; 2769 } 2770 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2771 nzn = 0; 2772 for (i = 0; i < benign_n; i++) { 2773 PetscInt ns, *idxs; 2774 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2775 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2776 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2777 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2778 nzn += ns; 2779 } 2780 PetscCall(PetscSortInt(nzn, new_idxs)); 2781 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2782 } 2783 have_null = PETSC_FALSE; 2784 } 2785 2786 /* determines if the coarse solver will be singular or not */ 2787 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2788 2789 /* Prepare matrix to compute no-net-flux */ 2790 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2791 Mat A, loc_divudotp; 2792 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2793 IS row, col, isused = NULL; 2794 PetscInt M, N, n, st, n_isused; 2795 2796 if (pressures) { 2797 isused = pressures; 2798 } else { 2799 isused = zerodiag_save; 2800 } 2801 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2802 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2803 PetscCall(MatGetLocalSize(A, &n, NULL)); 2804 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2805 n_isused = 0; 2806 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 2807 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2808 st = st - n_isused; 2809 if (n) { 2810 const PetscInt *gidxs; 2811 2812 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2813 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2814 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2815 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2816 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2817 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2818 } else { 2819 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 2820 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2821 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 2822 } 2823 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 2824 PetscCall(ISGetSize(row, &M)); 2825 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 2826 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 2827 PetscCall(ISDestroy(&row)); 2828 PetscCall(ISDestroy(&col)); 2829 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 2830 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 2831 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 2832 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 2833 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2834 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2835 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 2836 PetscCall(MatDestroy(&loc_divudotp)); 2837 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2838 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2839 } 2840 PetscCall(ISDestroy(&zerodiag_save)); 2841 PetscCall(ISDestroy(&pressures)); 2842 if (bzerodiag) { 2843 PetscInt i; 2844 2845 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 2846 PetscCall(PetscFree(bzerodiag)); 2847 } 2848 pcbddc->benign_n = benign_n; 2849 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2850 2851 /* determines if the problem has subdomains with 0 pressure block */ 2852 have_null = (PetscBool)(!!pcbddc->benign_n); 2853 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 2854 2855 project_b0: 2856 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2857 /* change of basis and p0 dofs */ 2858 if (pcbddc->benign_n) { 2859 PetscInt i, s, *nnz; 2860 2861 /* local change of basis for pressures */ 2862 PetscCall(MatDestroy(&pcbddc->benign_change)); 2863 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 2864 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 2865 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 2866 PetscCall(PetscMalloc1(n, &nnz)); 2867 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 2868 for (i = 0; i < pcbddc->benign_n; i++) { 2869 const PetscInt *idxs; 2870 PetscInt nzs, j; 2871 2872 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 2873 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2874 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 2875 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 2876 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2877 } 2878 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 2879 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2880 PetscCall(PetscFree(nnz)); 2881 /* set identity by default */ 2882 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 2883 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 2884 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 2885 /* set change on pressures */ 2886 for (s = 0; s < pcbddc->benign_n; s++) { 2887 PetscScalar *array; 2888 const PetscInt *idxs; 2889 PetscInt nzs; 2890 2891 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 2892 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2893 for (i = 0; i < nzs - 1; i++) { 2894 PetscScalar vals[2]; 2895 PetscInt cols[2]; 2896 2897 cols[0] = idxs[i]; 2898 cols[1] = idxs[nzs - 1]; 2899 vals[0] = 1.; 2900 vals[1] = 1.; 2901 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 2902 } 2903 PetscCall(PetscMalloc1(nzs, &array)); 2904 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 2905 array[nzs - 1] = 1.; 2906 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 2907 /* store local idxs for p0 */ 2908 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 2909 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 2910 PetscCall(PetscFree(array)); 2911 } 2912 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2913 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 2914 2915 /* project if needed */ 2916 if (pcbddc->benign_change_explicit) { 2917 Mat M; 2918 2919 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 2920 PetscCall(MatDestroy(&pcbddc->local_mat)); 2921 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 2922 PetscCall(MatDestroy(&M)); 2923 } 2924 /* store global idxs for p0 */ 2925 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 2926 } 2927 *zerodiaglocal = zerodiag; 2928 PetscFunctionReturn(PETSC_SUCCESS); 2929 } 2930 2931 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2932 { 2933 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2934 PetscScalar *array; 2935 2936 PetscFunctionBegin; 2937 if (!pcbddc->benign_sf) { 2938 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 2939 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 2940 } 2941 if (get) { 2942 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 2943 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2944 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 2945 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 2946 } else { 2947 PetscCall(VecGetArray(v, &array)); 2948 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2949 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 2950 PetscCall(VecRestoreArray(v, &array)); 2951 } 2952 PetscFunctionReturn(PETSC_SUCCESS); 2953 } 2954 2955 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2956 { 2957 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2958 2959 PetscFunctionBegin; 2960 /* TODO: add error checking 2961 - avoid nested pop (or push) calls. 2962 - cannot push before pop. 2963 - cannot call this if pcbddc->local_mat is NULL 2964 */ 2965 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 2966 if (pop) { 2967 if (pcbddc->benign_change_explicit) { 2968 IS is_p0; 2969 MatReuse reuse; 2970 2971 /* extract B_0 */ 2972 reuse = MAT_INITIAL_MATRIX; 2973 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 2974 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 2975 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 2976 /* remove rows and cols from local problem */ 2977 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 2978 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 2979 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 2980 PetscCall(ISDestroy(&is_p0)); 2981 } else { 2982 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2983 PetscScalar *vals; 2984 PetscInt i, n, *idxs_ins; 2985 2986 PetscCall(VecGetLocalSize(matis->y, &n)); 2987 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 2988 if (!pcbddc->benign_B0) { 2989 PetscInt *nnz; 2990 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 2991 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 2992 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 2993 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 2994 for (i = 0; i < pcbddc->benign_n; i++) { 2995 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 2996 nnz[i] = n - nnz[i]; 2997 } 2998 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 2999 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3000 PetscCall(PetscFree(nnz)); 3001 } 3002 3003 for (i = 0; i < pcbddc->benign_n; i++) { 3004 PetscScalar *array; 3005 PetscInt *idxs, j, nz, cum; 3006 3007 PetscCall(VecSet(matis->x, 0.)); 3008 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3009 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3010 for (j = 0; j < nz; j++) vals[j] = 1.; 3011 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3012 PetscCall(VecAssemblyBegin(matis->x)); 3013 PetscCall(VecAssemblyEnd(matis->x)); 3014 PetscCall(VecSet(matis->y, 0.)); 3015 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3016 PetscCall(VecGetArray(matis->y, &array)); 3017 cum = 0; 3018 for (j = 0; j < n; j++) { 3019 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3020 vals[cum] = array[j]; 3021 idxs_ins[cum] = j; 3022 cum++; 3023 } 3024 } 3025 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3026 PetscCall(VecRestoreArray(matis->y, &array)); 3027 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3028 } 3029 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3030 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3031 PetscCall(PetscFree2(idxs_ins, vals)); 3032 } 3033 } else { /* push */ 3034 3035 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3036 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3037 PetscScalar *B0_vals; 3038 PetscInt *B0_cols, B0_ncol; 3039 3040 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3041 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3042 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3043 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3044 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3045 } 3046 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3047 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3048 } 3049 PetscFunctionReturn(PETSC_SUCCESS); 3050 } 3051 3052 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3053 { 3054 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3055 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3056 PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork; 3057 PetscBLASInt *B_iwork, *B_ifail; 3058 PetscScalar *work, lwork; 3059 PetscScalar *St, *S, *eigv; 3060 PetscScalar *Sarray, *Starray; 3061 PetscReal *eigs, thresh, lthresh, uthresh; 3062 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3063 PetscBool allocated_S_St, upart; 3064 #if defined(PETSC_USE_COMPLEX) 3065 PetscReal *rwork; 3066 #endif 3067 3068 PetscFunctionBegin; 3069 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3070 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3071 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3072 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3073 sub_schurs->is_posdef); 3074 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3075 3076 if (pcbddc->dbg_flag) { 3077 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3078 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3079 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3080 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3081 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3082 } 3083 3084 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3085 3086 /* max size of subsets */ 3087 mss = 0; 3088 for (i = 0; i < sub_schurs->n_subs; i++) { 3089 PetscInt subset_size; 3090 3091 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3092 mss = PetscMax(mss, subset_size); 3093 } 3094 3095 /* min/max and threshold */ 3096 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3097 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3098 nmax = PetscMax(nmin, nmax); 3099 allocated_S_St = PETSC_FALSE; 3100 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3101 allocated_S_St = PETSC_TRUE; 3102 } 3103 3104 /* allocate lapack workspace */ 3105 cum = cum2 = 0; 3106 maxneigs = 0; 3107 for (i = 0; i < sub_schurs->n_subs; i++) { 3108 PetscInt n, subset_size; 3109 3110 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3111 n = PetscMin(subset_size, nmax); 3112 cum += subset_size; 3113 cum2 += subset_size * n; 3114 maxneigs = PetscMax(maxneigs, n); 3115 } 3116 lwork = 0; 3117 if (mss) { 3118 PetscScalar sdummy = 0.; 3119 PetscBLASInt B_itype = 1; 3120 PetscBLASInt B_N = mss, idummy = 0; 3121 PetscReal rdummy = 0., zero = 0.0; 3122 PetscReal eps = 0.0; /* dlamch? */ 3123 3124 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3125 B_lwork = -1; 3126 /* some implementations may complain about NULL pointers, even if we are querying */ 3127 S = &sdummy; 3128 St = &sdummy; 3129 eigs = &rdummy; 3130 eigv = &sdummy; 3131 B_iwork = &idummy; 3132 B_ifail = &idummy; 3133 #if defined(PETSC_USE_COMPLEX) 3134 rwork = &rdummy; 3135 #endif 3136 thresh = 1.0; 3137 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3138 #if defined(PETSC_USE_COMPLEX) 3139 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3140 #else 3141 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3142 #endif 3143 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr); 3144 PetscCall(PetscFPTrapPop()); 3145 } 3146 3147 nv = 0; 3148 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3149 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3150 } 3151 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3152 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3153 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3154 #if defined(PETSC_USE_COMPLEX) 3155 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3156 #endif 3157 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3158 &pcbddc->adaptive_constraints_data)); 3159 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3160 3161 maxneigs = 0; 3162 cum = cumarray = 0; 3163 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3164 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3165 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3166 const PetscInt *idxs; 3167 3168 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3169 for (cum = 0; cum < nv; cum++) { 3170 pcbddc->adaptive_constraints_n[cum] = 1; 3171 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3172 pcbddc->adaptive_constraints_data[cum] = 1.0; 3173 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3174 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3175 } 3176 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3177 } 3178 3179 if (mss) { /* multilevel */ 3180 if (sub_schurs->gdsw) { 3181 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3182 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3183 } else { 3184 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3185 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3186 } 3187 } 3188 3189 lthresh = pcbddc->adaptive_threshold[0]; 3190 uthresh = pcbddc->adaptive_threshold[1]; 3191 upart = pcbddc->use_deluxe_scaling; 3192 for (i = 0; i < sub_schurs->n_subs; i++) { 3193 const PetscInt *idxs; 3194 PetscReal upper, lower; 3195 PetscInt j, subset_size, eigs_start = 0; 3196 PetscBLASInt B_N; 3197 PetscBool same_data = PETSC_FALSE; 3198 PetscBool scal = PETSC_FALSE; 3199 3200 if (upart) { 3201 upper = PETSC_MAX_REAL; 3202 lower = uthresh; 3203 } else { 3204 if (sub_schurs->gdsw) { 3205 upper = uthresh; 3206 lower = PETSC_MIN_REAL; 3207 } else { 3208 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3209 upper = 1. / uthresh; 3210 lower = 0.; 3211 } 3212 } 3213 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3214 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3215 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3216 /* this is experimental: we assume the dofs have been properly grouped to have 3217 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3218 if (!sub_schurs->is_posdef) { 3219 Mat T; 3220 3221 for (j = 0; j < subset_size; j++) { 3222 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3223 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3224 PetscCall(MatScale(T, -1.0)); 3225 PetscCall(MatDestroy(&T)); 3226 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3227 PetscCall(MatScale(T, -1.0)); 3228 PetscCall(MatDestroy(&T)); 3229 if (sub_schurs->change_primal_sub) { 3230 PetscInt nz, k; 3231 const PetscInt *idxs; 3232 3233 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3234 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3235 for (k = 0; k < nz; k++) { 3236 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3237 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3238 } 3239 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3240 } 3241 scal = PETSC_TRUE; 3242 break; 3243 } 3244 } 3245 } 3246 3247 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3248 if (sub_schurs->is_symmetric) { 3249 PetscInt j, k; 3250 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3251 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3252 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3253 } 3254 for (j = 0; j < subset_size; j++) { 3255 for (k = j; k < subset_size; k++) { 3256 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3257 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3258 } 3259 } 3260 } else { 3261 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3262 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3263 } 3264 } else { 3265 S = Sarray + cumarray; 3266 St = Starray + cumarray; 3267 } 3268 /* see if we can save some work */ 3269 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3270 3271 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3272 B_neigs = 0; 3273 } else { 3274 PetscBLASInt B_itype = 1; 3275 PetscBLASInt B_IL, B_IU; 3276 PetscReal eps = -1.0; /* dlamch? */ 3277 PetscInt nmin_s; 3278 PetscBool compute_range; 3279 3280 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3281 B_neigs = 0; 3282 compute_range = (PetscBool)!same_data; 3283 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3284 3285 if (pcbddc->dbg_flag) { 3286 PetscInt nc = 0; 3287 3288 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3289 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, 3290 sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc)); 3291 } 3292 3293 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3294 if (compute_range) { 3295 /* ask for eigenvalues larger than thresh */ 3296 if (sub_schurs->is_posdef) { 3297 #if defined(PETSC_USE_COMPLEX) 3298 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3299 #else 3300 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3301 #endif 3302 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3303 } else { /* no theory so far, but it works nicely */ 3304 PetscInt recipe = 0, recipe_m = 1; 3305 PetscReal bb[2]; 3306 3307 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3308 switch (recipe) { 3309 case 0: 3310 if (scal) { 3311 bb[0] = PETSC_MIN_REAL; 3312 bb[1] = lthresh; 3313 } else { 3314 bb[0] = uthresh; 3315 bb[1] = PETSC_MAX_REAL; 3316 } 3317 #if defined(PETSC_USE_COMPLEX) 3318 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3319 #else 3320 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3321 #endif 3322 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3323 break; 3324 case 1: 3325 bb[0] = PETSC_MIN_REAL; 3326 bb[1] = lthresh * lthresh; 3327 #if defined(PETSC_USE_COMPLEX) 3328 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3329 #else 3330 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3331 #endif 3332 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3333 if (!scal) { 3334 PetscBLASInt B_neigs2 = 0; 3335 3336 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3337 bb[1] = PETSC_MAX_REAL; 3338 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3339 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3340 #if defined(PETSC_USE_COMPLEX) 3341 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3342 #else 3343 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3344 #endif 3345 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3346 B_neigs += B_neigs2; 3347 } 3348 break; 3349 case 2: 3350 if (scal) { 3351 bb[0] = PETSC_MIN_REAL; 3352 bb[1] = 0; 3353 #if defined(PETSC_USE_COMPLEX) 3354 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3355 #else 3356 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3357 #endif 3358 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3359 } else { 3360 PetscBLASInt B_neigs2 = 0; 3361 PetscBool do_copy = PETSC_FALSE; 3362 3363 lthresh = PetscMax(lthresh, 0.0); 3364 if (lthresh > 0.0) { 3365 bb[0] = PETSC_MIN_REAL; 3366 bb[1] = lthresh * lthresh; 3367 3368 do_copy = PETSC_TRUE; 3369 #if defined(PETSC_USE_COMPLEX) 3370 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3371 #else 3372 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3373 #endif 3374 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3375 } 3376 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3377 bb[1] = PETSC_MAX_REAL; 3378 if (do_copy) { 3379 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3380 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3381 } 3382 #if defined(PETSC_USE_COMPLEX) 3383 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3384 #else 3385 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3386 #endif 3387 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3388 B_neigs += B_neigs2; 3389 } 3390 break; 3391 case 3: 3392 if (scal) { 3393 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3394 } else { 3395 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3396 } 3397 if (!scal) { 3398 bb[0] = uthresh; 3399 bb[1] = PETSC_MAX_REAL; 3400 #if defined(PETSC_USE_COMPLEX) 3401 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3402 #else 3403 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3404 #endif 3405 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3406 } 3407 if (recipe_m > 0 && B_N - B_neigs > 0) { 3408 PetscBLASInt B_neigs2 = 0; 3409 3410 B_IL = 1; 3411 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3412 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3413 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3414 #if defined(PETSC_USE_COMPLEX) 3415 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3416 #else 3417 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3418 #endif 3419 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3420 B_neigs += B_neigs2; 3421 } 3422 break; 3423 case 4: 3424 bb[0] = PETSC_MIN_REAL; 3425 bb[1] = lthresh; 3426 #if defined(PETSC_USE_COMPLEX) 3427 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3428 #else 3429 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3430 #endif 3431 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3432 { 3433 PetscBLASInt B_neigs2 = 0; 3434 3435 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3436 bb[1] = PETSC_MAX_REAL; 3437 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3438 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3439 #if defined(PETSC_USE_COMPLEX) 3440 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3441 #else 3442 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3443 #endif 3444 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3445 B_neigs += B_neigs2; 3446 } 3447 break; 3448 case 5: /* same as before: first compute all eigenvalues, then filter */ 3449 #if defined(PETSC_USE_COMPLEX) 3450 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3451 #else 3452 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3453 #endif 3454 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3455 { 3456 PetscInt e, k, ne; 3457 for (e = 0, ne = 0; e < B_neigs; e++) { 3458 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3459 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3460 eigs[ne] = eigs[e]; 3461 ne++; 3462 } 3463 } 3464 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3465 B_neigs = ne; 3466 } 3467 break; 3468 default: 3469 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3470 } 3471 } 3472 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3473 B_IU = PetscMax(1, PetscMin(B_N, nmax)); 3474 B_IL = 1; 3475 #if defined(PETSC_USE_COMPLEX) 3476 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3477 #else 3478 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3479 #endif 3480 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3481 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3482 PetscInt k; 3483 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3484 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3485 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3486 nmin = nmax; 3487 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3488 for (k = 0; k < nmax; k++) { 3489 eigs[k] = 1. / PETSC_SMALL; 3490 eigv[k * (subset_size + 1)] = 1.0; 3491 } 3492 } 3493 PetscCall(PetscFPTrapPop()); 3494 if (B_ierr) { 3495 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3496 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3497 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3498 } 3499 3500 if (B_neigs > nmax) { 3501 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3502 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3503 B_neigs = nmax; 3504 } 3505 3506 nmin_s = PetscMin(nmin, B_N); 3507 if (B_neigs < nmin_s) { 3508 PetscBLASInt B_neigs2 = 0; 3509 3510 if (upart) { 3511 if (scal) { 3512 B_IU = nmin_s; 3513 B_IL = B_neigs + 1; 3514 } else { 3515 B_IL = B_N - nmin_s + 1; 3516 B_IU = B_N - B_neigs; 3517 } 3518 } else { 3519 B_IL = B_neigs + 1; 3520 B_IU = nmin_s; 3521 } 3522 if (pcbddc->dbg_flag) { 3523 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3524 } 3525 if (sub_schurs->is_symmetric) { 3526 PetscInt j, k; 3527 for (j = 0; j < subset_size; j++) { 3528 for (k = j; k < subset_size; k++) { 3529 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3530 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3531 } 3532 } 3533 } else { 3534 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3535 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3536 } 3537 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3538 #if defined(PETSC_USE_COMPLEX) 3539 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3540 #else 3541 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3542 #endif 3543 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3544 PetscCall(PetscFPTrapPop()); 3545 B_neigs += B_neigs2; 3546 } 3547 if (B_ierr) { 3548 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3549 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3550 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3551 } 3552 if (pcbddc->dbg_flag) { 3553 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3554 for (j = 0; j < B_neigs; j++) { 3555 if (!sub_schurs->gdsw) { 3556 if (eigs[j] == 0.0) { 3557 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3558 } else { 3559 if (upart) { 3560 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3561 } else { 3562 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start]))); 3563 } 3564 } 3565 } else { 3566 double pg = (double)eigs[j + eigs_start]; 3567 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3568 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3569 } 3570 } 3571 } 3572 } 3573 /* change the basis back to the original one */ 3574 if (sub_schurs->change) { 3575 Mat change, phi, phit; 3576 3577 if (pcbddc->dbg_flag > 2) { 3578 PetscInt ii; 3579 for (ii = 0; ii < B_neigs; ii++) { 3580 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3581 for (j = 0; j < B_N; j++) { 3582 #if defined(PETSC_USE_COMPLEX) 3583 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3584 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3585 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3586 #else 3587 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3588 #endif 3589 } 3590 } 3591 } 3592 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3593 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3594 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi)); 3595 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3596 PetscCall(MatDestroy(&phit)); 3597 PetscCall(MatDestroy(&phi)); 3598 } 3599 maxneigs = PetscMax(B_neigs, maxneigs); 3600 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3601 if (B_neigs) { 3602 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3603 3604 if (pcbddc->dbg_flag > 1) { 3605 PetscInt ii; 3606 for (ii = 0; ii < B_neigs; ii++) { 3607 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3608 for (j = 0; j < B_N; j++) { 3609 #if defined(PETSC_USE_COMPLEX) 3610 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3611 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3612 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3613 #else 3614 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3615 #endif 3616 } 3617 } 3618 } 3619 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3620 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3621 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3622 cum++; 3623 } 3624 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3625 /* shift for next computation */ 3626 cumarray += subset_size * subset_size; 3627 } 3628 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3629 3630 if (mss) { 3631 if (sub_schurs->gdsw) { 3632 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3633 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3634 } else { 3635 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3636 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3637 /* destroy matrices (junk) */ 3638 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3639 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3640 } 3641 } 3642 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3643 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3644 #if defined(PETSC_USE_COMPLEX) 3645 PetscCall(PetscFree(rwork)); 3646 #endif 3647 if (pcbddc->dbg_flag) { 3648 PetscInt maxneigs_r; 3649 PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3650 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3651 } 3652 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3653 PetscFunctionReturn(PETSC_SUCCESS); 3654 } 3655 3656 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3657 { 3658 PetscScalar *coarse_submat_vals; 3659 3660 PetscFunctionBegin; 3661 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3662 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3663 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3664 3665 /* Setup local neumann solver ksp_R */ 3666 /* PCBDDCSetUpLocalScatters should be called first! */ 3667 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3668 3669 /* 3670 Setup local correction and local part of coarse basis. 3671 Gives back the dense local part of the coarse matrix in column major ordering 3672 */ 3673 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat_vals)); 3674 3675 /* Compute total number of coarse nodes and setup coarse solver */ 3676 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals)); 3677 3678 /* free */ 3679 PetscCall(PetscFree(coarse_submat_vals)); 3680 PetscFunctionReturn(PETSC_SUCCESS); 3681 } 3682 3683 PetscErrorCode PCBDDCResetCustomization(PC pc) 3684 { 3685 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3686 3687 PetscFunctionBegin; 3688 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3689 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3690 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3691 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3692 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3693 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3694 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3695 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3696 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3697 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3698 PetscFunctionReturn(PETSC_SUCCESS); 3699 } 3700 3701 PetscErrorCode PCBDDCResetTopography(PC pc) 3702 { 3703 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3704 PetscInt i; 3705 3706 PetscFunctionBegin; 3707 PetscCall(MatDestroy(&pcbddc->nedcG)); 3708 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3709 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3710 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3711 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3712 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3713 PetscCall(VecDestroy(&pcbddc->work_change)); 3714 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3715 PetscCall(MatDestroy(&pcbddc->divudotp)); 3716 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3717 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3718 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3719 pcbddc->n_local_subs = 0; 3720 PetscCall(PetscFree(pcbddc->local_subs)); 3721 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3722 pcbddc->graphanalyzed = PETSC_FALSE; 3723 pcbddc->recompute_topography = PETSC_TRUE; 3724 pcbddc->corner_selected = PETSC_FALSE; 3725 PetscFunctionReturn(PETSC_SUCCESS); 3726 } 3727 3728 PetscErrorCode PCBDDCResetSolvers(PC pc) 3729 { 3730 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3731 3732 PetscFunctionBegin; 3733 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3734 if (pcbddc->coarse_phi_B) { 3735 PetscScalar *array; 3736 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &array)); 3737 PetscCall(PetscFree(array)); 3738 } 3739 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3740 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3741 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3742 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3743 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3744 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3745 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3746 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3747 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3748 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3749 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3750 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3751 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3752 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3753 PetscCall(KSPReset(pcbddc->ksp_D)); 3754 PetscCall(KSPReset(pcbddc->ksp_R)); 3755 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3756 PetscCall(MatDestroy(&pcbddc->local_mat)); 3757 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3758 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3759 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3760 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3761 PetscCall(MatDestroy(&pcbddc->benign_change)); 3762 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3763 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3764 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3765 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3766 if (pcbddc->benign_zerodiag_subs) { 3767 PetscInt i; 3768 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3769 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3770 } 3771 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3772 PetscFunctionReturn(PETSC_SUCCESS); 3773 } 3774 3775 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3776 { 3777 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3778 PC_IS *pcis = (PC_IS *)pc->data; 3779 VecType impVecType; 3780 PetscInt n_constraints, n_R, old_size; 3781 3782 PetscFunctionBegin; 3783 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3784 n_R = pcis->n - pcbddc->n_vertices; 3785 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3786 /* local work vectors (try to avoid unneeded work)*/ 3787 /* R nodes */ 3788 old_size = -1; 3789 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3790 if (n_R != old_size) { 3791 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3792 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3793 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3794 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3795 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3796 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3797 } 3798 /* local primal dofs */ 3799 old_size = -1; 3800 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3801 if (pcbddc->local_primal_size != old_size) { 3802 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3803 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3804 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3805 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3806 } 3807 /* local explicit constraints */ 3808 old_size = -1; 3809 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 3810 if (n_constraints && n_constraints != old_size) { 3811 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3812 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3813 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3814 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3815 } 3816 PetscFunctionReturn(PETSC_SUCCESS); 3817 } 3818 3819 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3820 { 3821 /* pointers to pcis and pcbddc */ 3822 PC_IS *pcis = (PC_IS *)pc->data; 3823 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3824 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3825 /* submatrices of local problem */ 3826 Mat A_RV, A_VR, A_VV, local_auxmat2_R; 3827 /* submatrices of local coarse problem */ 3828 Mat S_VV, S_CV, S_VC, S_CC; 3829 /* working matrices */ 3830 Mat C_CR; 3831 /* additional working stuff */ 3832 PC pc_R; 3833 Mat F, Brhs = NULL; 3834 Vec dummy_vec; 3835 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 3836 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3837 PetscScalar *work; 3838 PetscInt *idx_V_B; 3839 PetscInt lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I; 3840 PetscInt i, n_R, n_D, n_B; 3841 PetscScalar one = 1.0, m_one = -1.0; 3842 3843 PetscFunctionBegin; 3844 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 3845 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3846 3847 /* Set Non-overlapping dimensions */ 3848 n_vertices = pcbddc->n_vertices; 3849 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3850 n_B = pcis->n_B; 3851 n_D = pcis->n - n_B; 3852 n_R = pcis->n - n_vertices; 3853 3854 /* vertices in boundary numbering */ 3855 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 3856 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 3857 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 3858 3859 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3860 PetscCall(PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals)); 3861 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV)); 3862 PetscCall(MatDenseSetLDA(S_VV, pcbddc->local_primal_size)); 3863 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, PetscSafePointerPlusOffset(coarse_submat_vals, n_vertices), &S_CV)); 3864 PetscCall(MatDenseSetLDA(S_CV, pcbddc->local_primal_size)); 3865 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, PetscSafePointerPlusOffset(coarse_submat_vals, pcbddc->local_primal_size * n_vertices), &S_VC)); 3866 PetscCall(MatDenseSetLDA(S_VC, pcbddc->local_primal_size)); 3867 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, PetscSafePointerPlusOffset(coarse_submat_vals, (pcbddc->local_primal_size + 1) * n_vertices), &S_CC)); 3868 PetscCall(MatDenseSetLDA(S_CC, pcbddc->local_primal_size)); 3869 3870 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3871 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 3872 PetscCall(PCSetUp(pc_R)); 3873 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 3874 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 3875 lda_rhs = n_R; 3876 need_benign_correction = PETSC_FALSE; 3877 if (isLU || isCHOL) { 3878 PetscCall(PCFactorGetMatrix(pc_R, &F)); 3879 } else if (sub_schurs && sub_schurs->reuse_solver) { 3880 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3881 MatFactorType type; 3882 3883 F = reuse_solver->F; 3884 PetscCall(MatGetFactorType(F, &type)); 3885 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3886 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 3887 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 3888 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3889 } else F = NULL; 3890 3891 /* determine if we can use a sparse right-hand side */ 3892 sparserhs = PETSC_FALSE; 3893 if (F) { 3894 MatSolverType solver; 3895 3896 PetscCall(MatFactorGetSolverType(F, &solver)); 3897 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 3898 } 3899 3900 /* allocate workspace */ 3901 n = 0; 3902 if (n_constraints) n += lda_rhs * n_constraints; 3903 if (n_vertices) { 3904 n = PetscMax(2 * lda_rhs * n_vertices, n); 3905 n = PetscMax((lda_rhs + n_B) * n_vertices, n); 3906 } 3907 if (!pcbddc->symmetric_primal) n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n); 3908 PetscCall(PetscMalloc1(n, &work)); 3909 3910 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3911 dummy_vec = NULL; 3912 if (need_benign_correction && lda_rhs != n_R && F) { 3913 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 3914 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 3915 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 3916 } 3917 3918 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3919 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3920 3921 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3922 if (n_constraints) { 3923 Mat M3, C_B; 3924 IS is_aux; 3925 3926 /* Extract constraints on R nodes: C_{CR} */ 3927 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux)); 3928 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 3929 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 3930 3931 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3932 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3933 if (!sparserhs) { 3934 PetscCall(PetscArrayzero(work, lda_rhs * n_constraints)); 3935 for (i = 0; i < n_constraints; i++) { 3936 const PetscScalar *row_cmat_values; 3937 const PetscInt *row_cmat_indices; 3938 PetscInt size_of_constraint, j; 3939 3940 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3941 for (j = 0; j < size_of_constraint; j++) work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j]; 3942 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 3943 } 3944 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs)); 3945 } else { 3946 Mat tC_CR; 3947 3948 PetscCall(MatScale(C_CR, -1.0)); 3949 if (lda_rhs != n_R) { 3950 PetscScalar *aa; 3951 PetscInt r, *ii, *jj; 3952 PetscBool done; 3953 3954 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3955 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 3956 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 3957 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 3958 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 3959 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 3960 } else { 3961 PetscCall(PetscObjectReference((PetscObject)C_CR)); 3962 tC_CR = C_CR; 3963 } 3964 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 3965 PetscCall(MatDestroy(&tC_CR)); 3966 } 3967 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R)); 3968 if (F) { 3969 if (need_benign_correction) { 3970 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3971 3972 /* rhs is already zero on interior dofs, no need to change the rhs */ 3973 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 3974 } 3975 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 3976 if (need_benign_correction) { 3977 PetscScalar *marr; 3978 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3979 3980 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 3981 if (lda_rhs != n_R) { 3982 for (i = 0; i < n_constraints; i++) { 3983 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 3984 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 3985 PetscCall(VecResetArray(dummy_vec)); 3986 } 3987 } else { 3988 for (i = 0; i < n_constraints; i++) { 3989 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 3990 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 3991 PetscCall(VecResetArray(pcbddc->vec1_R)); 3992 } 3993 } 3994 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 3995 } 3996 } else { 3997 PetscScalar *marr; 3998 3999 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4000 for (i = 0; i < n_constraints; i++) { 4001 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 4002 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4003 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4004 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4005 PetscCall(VecResetArray(pcbddc->vec1_R)); 4006 PetscCall(VecResetArray(pcbddc->vec2_R)); 4007 } 4008 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4009 } 4010 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4011 PetscCall(MatDestroy(&Brhs)); 4012 if (!pcbddc->switch_static) { 4013 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2)); 4014 for (i = 0; i < n_constraints; i++) { 4015 Vec r, b; 4016 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 4017 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 4018 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4019 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4020 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 4021 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 4022 } 4023 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 4024 } else { 4025 if (lda_rhs != n_R) { 4026 IS dummy; 4027 4028 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy)); 4029 PetscCall(MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4030 PetscCall(ISDestroy(&dummy)); 4031 } else { 4032 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4033 pcbddc->local_auxmat2 = local_auxmat2_R; 4034 } 4035 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3)); 4036 } 4037 PetscCall(ISDestroy(&is_aux)); 4038 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4039 PetscCall(MatScale(M3, m_one)); 4040 if (isCHOL) { 4041 PetscCall(MatCholeskyFactor(M3, NULL, NULL)); 4042 } else { 4043 PetscCall(MatLUFactor(M3, NULL, NULL, NULL)); 4044 } 4045 PetscCall(MatSeqDenseInvertFactors_Private(M3)); 4046 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4047 PetscCall(MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1)); 4048 PetscCall(MatDestroy(&C_B)); 4049 PetscCall(MatCopy(M3, S_CC, SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4050 PetscCall(MatDestroy(&M3)); 4051 } 4052 4053 /* Get submatrices from subdomain matrix */ 4054 if (n_vertices) { 4055 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4056 PetscBool oldpin; 4057 #endif 4058 PetscBool isaij; 4059 IS is_aux; 4060 4061 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4062 IS tis; 4063 4064 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4065 PetscCall(ISSort(tis)); 4066 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4067 PetscCall(ISDestroy(&tis)); 4068 } else { 4069 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4070 } 4071 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4072 oldpin = pcbddc->local_mat->boundtocpu; 4073 #endif 4074 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4075 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4076 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4077 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij)); 4078 if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4079 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4080 } 4081 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4082 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4083 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4084 #endif 4085 PetscCall(ISDestroy(&is_aux)); 4086 } 4087 4088 /* Matrix of coarse basis functions (local) */ 4089 if (pcbddc->coarse_phi_B) { 4090 PetscInt on_B, on_primal, on_D = n_D; 4091 if (pcbddc->coarse_phi_D) PetscCall(MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL)); 4092 PetscCall(MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal)); 4093 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4094 PetscScalar *marray; 4095 4096 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &marray)); 4097 PetscCall(PetscFree(marray)); 4098 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4099 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4100 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4101 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4102 } 4103 } 4104 4105 if (!pcbddc->coarse_phi_B) { 4106 PetscScalar *marr; 4107 4108 /* memory size */ 4109 n = n_B * pcbddc->local_primal_size; 4110 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size; 4111 if (!pcbddc->symmetric_primal) n *= 2; 4112 PetscCall(PetscCalloc1(n, &marr)); 4113 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B)); 4114 marr = PetscSafePointerPlusOffset(marr, n_B * pcbddc->local_primal_size); 4115 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4116 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D)); 4117 marr += n_D * pcbddc->local_primal_size; 4118 } 4119 if (!pcbddc->symmetric_primal) { 4120 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B)); 4121 marr += n_B * pcbddc->local_primal_size; 4122 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D)); 4123 } else { 4124 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 4125 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4126 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4127 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 4128 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4129 } 4130 } 4131 } 4132 4133 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4134 p0_lidx_I = NULL; 4135 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4136 const PetscInt *idxs; 4137 4138 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4139 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4140 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4141 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4142 } 4143 4144 /* vertices */ 4145 if (n_vertices) { 4146 PetscBool restoreavr = PETSC_FALSE; 4147 4148 PetscCall(MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV)); 4149 4150 if (n_R) { 4151 Mat A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */ 4152 PetscBLASInt B_N, B_one = 1; 4153 const PetscScalar *x; 4154 PetscScalar *y; 4155 4156 PetscCall(MatScale(A_RV, m_one)); 4157 if (need_benign_correction) { 4158 ISLocalToGlobalMapping RtoN; 4159 IS is_p0; 4160 PetscInt *idxs_p0, n; 4161 4162 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4163 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4164 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4165 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4166 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4167 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4168 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4169 PetscCall(ISDestroy(&is_p0)); 4170 } 4171 4172 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV)); 4173 if (!sparserhs || need_benign_correction) { 4174 if (lda_rhs == n_R) { 4175 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4176 } else { 4177 PetscScalar *av, *array; 4178 const PetscInt *xadj, *adjncy; 4179 PetscInt n; 4180 PetscBool flg_row; 4181 4182 array = work + lda_rhs * n_vertices; 4183 PetscCall(PetscArrayzero(array, lda_rhs * n_vertices)); 4184 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4185 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4186 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4187 for (i = 0; i < n; i++) { 4188 PetscInt j; 4189 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j]; 4190 } 4191 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4192 PetscCall(MatDestroy(&A_RV)); 4193 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV)); 4194 } 4195 if (need_benign_correction) { 4196 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4197 PetscScalar *marr; 4198 4199 PetscCall(MatDenseGetArray(A_RV, &marr)); 4200 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4201 4202 | 0 0 0 | (V) 4203 L = | 0 0 -1 | (P-p0) 4204 | 0 0 -1 | (p0) 4205 4206 */ 4207 for (i = 0; i < reuse_solver->benign_n; i++) { 4208 const PetscScalar *vals; 4209 const PetscInt *idxs, *idxs_zero; 4210 PetscInt n, j, nz; 4211 4212 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4213 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4214 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4215 for (j = 0; j < n; j++) { 4216 PetscScalar val = vals[j]; 4217 PetscInt k, col = idxs[j]; 4218 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4219 } 4220 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4221 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4222 } 4223 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4224 } 4225 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4226 Brhs = A_RV; 4227 } else { 4228 Mat tA_RVT, A_RVT; 4229 4230 if (!pcbddc->symmetric_primal) { 4231 /* A_RV already scaled by -1 */ 4232 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4233 } else { 4234 restoreavr = PETSC_TRUE; 4235 PetscCall(MatScale(A_VR, -1.0)); 4236 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4237 A_RVT = A_VR; 4238 } 4239 if (lda_rhs != n_R) { 4240 PetscScalar *aa; 4241 PetscInt r, *ii, *jj; 4242 PetscBool done; 4243 4244 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4245 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4246 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4247 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4248 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4249 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4250 } else { 4251 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4252 tA_RVT = A_RVT; 4253 } 4254 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4255 PetscCall(MatDestroy(&tA_RVT)); 4256 PetscCall(MatDestroy(&A_RVT)); 4257 } 4258 if (F) { 4259 /* need to correct the rhs */ 4260 if (need_benign_correction) { 4261 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4262 PetscScalar *marr; 4263 4264 PetscCall(MatDenseGetArray(Brhs, &marr)); 4265 if (lda_rhs != n_R) { 4266 for (i = 0; i < n_vertices; i++) { 4267 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4268 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4269 PetscCall(VecResetArray(dummy_vec)); 4270 } 4271 } else { 4272 for (i = 0; i < n_vertices; i++) { 4273 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4274 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4275 PetscCall(VecResetArray(pcbddc->vec1_R)); 4276 } 4277 } 4278 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4279 } 4280 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4281 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4282 /* need to correct the solution */ 4283 if (need_benign_correction) { 4284 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4285 PetscScalar *marr; 4286 4287 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4288 if (lda_rhs != n_R) { 4289 for (i = 0; i < n_vertices; i++) { 4290 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4291 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4292 PetscCall(VecResetArray(dummy_vec)); 4293 } 4294 } else { 4295 for (i = 0; i < n_vertices; i++) { 4296 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4297 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4298 PetscCall(VecResetArray(pcbddc->vec1_R)); 4299 } 4300 } 4301 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4302 } 4303 } else { 4304 PetscCall(MatDenseGetArray(Brhs, &y)); 4305 for (i = 0; i < n_vertices; i++) { 4306 PetscCall(VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs)); 4307 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs)); 4308 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4309 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4310 PetscCall(VecResetArray(pcbddc->vec1_R)); 4311 PetscCall(VecResetArray(pcbddc->vec2_R)); 4312 } 4313 PetscCall(MatDenseRestoreArray(Brhs, &y)); 4314 } 4315 PetscCall(MatDestroy(&A_RV)); 4316 PetscCall(MatDestroy(&Brhs)); 4317 /* S_VV and S_CV */ 4318 if (n_constraints) { 4319 Mat B; 4320 4321 PetscCall(PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices)); 4322 for (i = 0; i < n_vertices; i++) { 4323 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs)); 4324 PetscCall(VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B)); 4325 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4326 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 4327 PetscCall(VecResetArray(pcis->vec1_B)); 4328 PetscCall(VecResetArray(pcbddc->vec1_R)); 4329 } 4330 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B)); 4331 /* Reuse dense S_C = pcbddc->local_auxmat1 * B */ 4332 PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV)); 4333 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4334 PetscCall(MatProductSetFromOptions(S_CV)); 4335 PetscCall(MatProductSymbolic(S_CV)); 4336 PetscCall(MatProductNumeric(S_CV)); 4337 PetscCall(MatProductClear(S_CV)); 4338 4339 PetscCall(MatDestroy(&B)); 4340 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B)); 4341 /* Reuse B = local_auxmat2_R * S_CV */ 4342 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B)); 4343 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4344 PetscCall(MatProductSetFromOptions(B)); 4345 PetscCall(MatProductSymbolic(B)); 4346 PetscCall(MatProductNumeric(B)); 4347 4348 PetscCall(MatScale(S_CV, m_one)); 4349 PetscCall(PetscBLASIntCast(lda_rhs * n_vertices, &B_N)); 4350 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one)); 4351 PetscCall(MatDestroy(&B)); 4352 } 4353 if (lda_rhs != n_R) { 4354 PetscCall(MatDestroy(&A_RRmA_RV)); 4355 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV)); 4356 PetscCall(MatDenseSetLDA(A_RRmA_RV, lda_rhs)); 4357 } 4358 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt)); 4359 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4360 if (need_benign_correction) { 4361 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4362 PetscScalar *marr, *sums; 4363 4364 PetscCall(PetscMalloc1(n_vertices, &sums)); 4365 PetscCall(MatDenseGetArray(S_VVt, &marr)); 4366 for (i = 0; i < reuse_solver->benign_n; i++) { 4367 const PetscScalar *vals; 4368 const PetscInt *idxs, *idxs_zero; 4369 PetscInt n, j, nz; 4370 4371 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4372 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4373 for (j = 0; j < n_vertices; j++) { 4374 PetscInt k; 4375 sums[j] = 0.; 4376 for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs]; 4377 } 4378 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4379 for (j = 0; j < n; j++) { 4380 PetscScalar val = vals[j]; 4381 PetscInt k; 4382 for (k = 0; k < n_vertices; k++) marr[idxs[j] + k * n_vertices] += val * sums[k]; 4383 } 4384 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4385 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4386 } 4387 PetscCall(PetscFree(sums)); 4388 PetscCall(MatDenseRestoreArray(S_VVt, &marr)); 4389 PetscCall(MatDestroy(&A_RV_bcorr)); 4390 } 4391 PetscCall(MatDestroy(&A_RRmA_RV)); 4392 PetscCall(PetscBLASIntCast(n_vertices * n_vertices, &B_N)); 4393 PetscCall(MatDenseGetArrayRead(A_VV, &x)); 4394 PetscCall(MatDenseGetArray(S_VVt, &y)); 4395 PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one)); 4396 PetscCall(MatDenseRestoreArrayRead(A_VV, &x)); 4397 PetscCall(MatDenseRestoreArray(S_VVt, &y)); 4398 PetscCall(MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN)); 4399 PetscCall(MatDestroy(&S_VVt)); 4400 } else { 4401 PetscCall(MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN)); 4402 } 4403 PetscCall(MatDestroy(&A_VV)); 4404 4405 /* coarse basis functions */ 4406 for (i = 0; i < n_vertices; i++) { 4407 Vec v; 4408 PetscScalar one = 1.0, zero = 0.0; 4409 4410 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4411 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v)); 4412 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4413 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4414 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4415 PetscMPIInt rank; 4416 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank)); 4417 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4418 } 4419 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4420 PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */ 4421 PetscCall(VecAssemblyEnd(v)); 4422 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v)); 4423 4424 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4425 PetscInt j; 4426 4427 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v)); 4428 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4429 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4430 if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */ 4431 PetscMPIInt rank; 4432 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank)); 4433 PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), PETSC_ERR_PLIB, "Expected a sequential dense matrix"); 4434 } 4435 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4436 PetscCall(VecAssemblyBegin(v)); 4437 PetscCall(VecAssemblyEnd(v)); 4438 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v)); 4439 } 4440 PetscCall(VecResetArray(pcbddc->vec1_R)); 4441 } 4442 /* if n_R == 0 the object is not destroyed */ 4443 PetscCall(MatDestroy(&A_RV)); 4444 } 4445 PetscCall(VecDestroy(&dummy_vec)); 4446 4447 if (n_constraints) { 4448 Mat B; 4449 4450 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B)); 4451 PetscCall(MatScale(S_CC, m_one)); 4452 PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B)); 4453 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4454 PetscCall(MatProductSetFromOptions(B)); 4455 PetscCall(MatProductSymbolic(B)); 4456 PetscCall(MatProductNumeric(B)); 4457 4458 PetscCall(MatScale(S_CC, m_one)); 4459 if (n_vertices) { 4460 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4461 PetscCall(MatTransposeSetPrecursor(S_CV, S_VC)); 4462 PetscCall(MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC)); 4463 } else { 4464 Mat S_VCt; 4465 4466 if (lda_rhs != n_R) { 4467 PetscCall(MatDestroy(&B)); 4468 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B)); 4469 PetscCall(MatDenseSetLDA(B, lda_rhs)); 4470 } 4471 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt)); 4472 PetscCall(MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN)); 4473 PetscCall(MatDestroy(&S_VCt)); 4474 } 4475 } 4476 PetscCall(MatDestroy(&B)); 4477 /* coarse basis functions */ 4478 for (i = 0; i < n_constraints; i++) { 4479 Vec v; 4480 4481 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i)); 4482 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4483 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4484 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4485 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v)); 4486 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4487 PetscInt j; 4488 PetscScalar zero = 0.0; 4489 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4490 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4491 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4492 for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES)); 4493 PetscCall(VecAssemblyBegin(v)); 4494 PetscCall(VecAssemblyEnd(v)); 4495 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v)); 4496 } 4497 PetscCall(VecResetArray(pcbddc->vec1_R)); 4498 } 4499 } 4500 if (n_constraints) PetscCall(MatDestroy(&local_auxmat2_R)); 4501 PetscCall(PetscFree(p0_lidx_I)); 4502 4503 /* coarse matrix entries relative to B_0 */ 4504 if (pcbddc->benign_n) { 4505 Mat B0_B, B0_BPHI; 4506 IS is_dummy; 4507 const PetscScalar *data; 4508 PetscInt j; 4509 4510 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4511 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4512 PetscCall(ISDestroy(&is_dummy)); 4513 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4514 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4515 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 4516 for (j = 0; j < pcbddc->benign_n; j++) { 4517 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4518 for (i = 0; i < pcbddc->local_primal_size; i++) { 4519 coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j]; 4520 coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j]; 4521 } 4522 } 4523 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 4524 PetscCall(MatDestroy(&B0_B)); 4525 PetscCall(MatDestroy(&B0_BPHI)); 4526 } 4527 4528 /* compute other basis functions for non-symmetric problems */ 4529 if (!pcbddc->symmetric_primal) { 4530 Mat B_V = NULL, B_C = NULL; 4531 PetscScalar *marray; 4532 4533 if (n_constraints) { 4534 Mat S_CCT, C_CRT; 4535 4536 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 4537 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 4538 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C)); 4539 PetscCall(MatDestroy(&S_CCT)); 4540 if (n_vertices) { 4541 Mat S_VCT; 4542 4543 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 4544 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V)); 4545 PetscCall(MatDestroy(&S_VCT)); 4546 } 4547 PetscCall(MatDestroy(&C_CRT)); 4548 } else { 4549 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 4550 } 4551 if (n_vertices && n_R) { 4552 PetscScalar *av, *marray; 4553 const PetscInt *xadj, *adjncy; 4554 PetscInt n; 4555 PetscBool flg_row; 4556 4557 /* B_V = B_V - A_VR^T */ 4558 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4559 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4560 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 4561 PetscCall(MatDenseGetArray(B_V, &marray)); 4562 for (i = 0; i < n; i++) { 4563 PetscInt j; 4564 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 4565 } 4566 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4567 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4568 PetscCall(MatDestroy(&A_VR)); 4569 } 4570 4571 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4572 if (n_vertices) { 4573 PetscCall(MatDenseGetArray(B_V, &marray)); 4574 for (i = 0; i < n_vertices; i++) { 4575 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 4576 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4577 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4578 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4579 PetscCall(VecResetArray(pcbddc->vec1_R)); 4580 PetscCall(VecResetArray(pcbddc->vec2_R)); 4581 } 4582 PetscCall(MatDenseRestoreArray(B_V, &marray)); 4583 } 4584 if (B_C) { 4585 PetscCall(MatDenseGetArray(B_C, &marray)); 4586 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 4587 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 4588 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 4589 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4590 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4591 PetscCall(VecResetArray(pcbddc->vec1_R)); 4592 PetscCall(VecResetArray(pcbddc->vec2_R)); 4593 } 4594 PetscCall(MatDenseRestoreArray(B_C, &marray)); 4595 } 4596 /* coarse basis functions */ 4597 for (i = 0; i < pcbddc->local_primal_size; i++) { 4598 Vec v; 4599 4600 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 4601 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 4602 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4603 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4604 if (i < n_vertices) { 4605 PetscScalar one = 1.0; 4606 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 4607 PetscCall(VecAssemblyBegin(v)); 4608 PetscCall(VecAssemblyEnd(v)); 4609 } 4610 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 4611 4612 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4613 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 4614 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4615 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 4616 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 4617 } 4618 PetscCall(VecResetArray(pcbddc->vec1_R)); 4619 } 4620 PetscCall(MatDestroy(&B_V)); 4621 PetscCall(MatDestroy(&B_C)); 4622 } 4623 4624 /* free memory */ 4625 PetscCall(PetscFree(idx_V_B)); 4626 PetscCall(MatDestroy(&S_VV)); 4627 PetscCall(MatDestroy(&S_CV)); 4628 PetscCall(MatDestroy(&S_VC)); 4629 PetscCall(MatDestroy(&S_CC)); 4630 PetscCall(PetscFree(work)); 4631 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 4632 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 4633 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4634 4635 /* Checking coarse_sub_mat and coarse basis functions */ 4636 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4637 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4638 if (pcbddc->dbg_flag) { 4639 Mat coarse_sub_mat; 4640 Mat AUXMAT, TM1, TM2, TM3, TM4; 4641 Mat coarse_phi_D, coarse_phi_B; 4642 Mat coarse_psi_D, coarse_psi_B; 4643 Mat A_II, A_BB, A_IB, A_BI; 4644 Mat C_B, CPHI; 4645 IS is_dummy; 4646 Vec mones; 4647 MatType checkmattype = MATSEQAIJ; 4648 PetscReal real_value; 4649 4650 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4651 Mat A; 4652 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 4653 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 4654 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 4655 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 4656 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 4657 PetscCall(MatDestroy(&A)); 4658 } else { 4659 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 4660 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 4661 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 4662 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 4663 } 4664 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 4665 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 4666 if (!pcbddc->symmetric_primal) { 4667 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 4668 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 4669 } 4670 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat)); 4671 4672 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 4673 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 4674 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4675 if (!pcbddc->symmetric_primal) { 4676 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4677 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4678 PetscCall(MatDestroy(&AUXMAT)); 4679 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4680 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4681 PetscCall(MatDestroy(&AUXMAT)); 4682 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4683 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4684 PetscCall(MatDestroy(&AUXMAT)); 4685 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4686 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4687 PetscCall(MatDestroy(&AUXMAT)); 4688 } else { 4689 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 4690 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 4691 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4692 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 4693 PetscCall(MatDestroy(&AUXMAT)); 4694 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 4695 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 4696 PetscCall(MatDestroy(&AUXMAT)); 4697 } 4698 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 4699 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 4700 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 4701 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 4702 if (pcbddc->benign_n) { 4703 Mat B0_B, B0_BPHI; 4704 const PetscScalar *data2; 4705 PetscScalar *data; 4706 PetscInt j; 4707 4708 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4709 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4710 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4711 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4712 PetscCall(MatDenseGetArray(TM1, &data)); 4713 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 4714 for (j = 0; j < pcbddc->benign_n; j++) { 4715 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4716 for (i = 0; i < pcbddc->local_primal_size; i++) { 4717 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 4718 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 4719 } 4720 } 4721 PetscCall(MatDenseRestoreArray(TM1, &data)); 4722 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 4723 PetscCall(MatDestroy(&B0_B)); 4724 PetscCall(ISDestroy(&is_dummy)); 4725 PetscCall(MatDestroy(&B0_BPHI)); 4726 } 4727 #if 0 4728 { 4729 PetscViewer viewer; 4730 char filename[256]; 4731 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 4732 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 4733 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 4734 PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed")); 4735 PetscCall(MatView(coarse_sub_mat,viewer)); 4736 PetscCall(PetscObjectSetName((PetscObject)TM1,"projected")); 4737 PetscCall(MatView(TM1,viewer)); 4738 if (pcbddc->coarse_phi_B) { 4739 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 4740 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 4741 } 4742 if (pcbddc->coarse_phi_D) { 4743 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 4744 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 4745 } 4746 if (pcbddc->coarse_psi_B) { 4747 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 4748 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 4749 } 4750 if (pcbddc->coarse_psi_D) { 4751 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 4752 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 4753 } 4754 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 4755 PetscCall(MatView(pcbddc->local_mat,viewer)); 4756 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 4757 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 4758 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 4759 PetscCall(ISView(pcis->is_I_local,viewer)); 4760 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 4761 PetscCall(ISView(pcis->is_B_local,viewer)); 4762 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 4763 PetscCall(ISView(pcbddc->is_R_local,viewer)); 4764 PetscCall(PetscOptionsRestoreViewer(&viewer)); 4765 } 4766 #endif 4767 PetscCall(MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN)); 4768 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 4769 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 4770 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4771 4772 /* check constraints */ 4773 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 4774 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4775 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4776 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4777 } else { 4778 PetscScalar *data; 4779 Mat tmat; 4780 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 4781 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 4782 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 4783 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 4784 PetscCall(MatDestroy(&tmat)); 4785 } 4786 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 4787 PetscCall(VecSet(mones, -1.0)); 4788 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4789 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4790 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4791 if (!pcbddc->symmetric_primal) { 4792 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 4793 PetscCall(VecSet(mones, -1.0)); 4794 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 4795 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 4796 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 4797 } 4798 PetscCall(MatDestroy(&C_B)); 4799 PetscCall(MatDestroy(&CPHI)); 4800 PetscCall(ISDestroy(&is_dummy)); 4801 PetscCall(VecDestroy(&mones)); 4802 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 4803 PetscCall(MatDestroy(&A_II)); 4804 PetscCall(MatDestroy(&A_BB)); 4805 PetscCall(MatDestroy(&A_IB)); 4806 PetscCall(MatDestroy(&A_BI)); 4807 PetscCall(MatDestroy(&TM1)); 4808 PetscCall(MatDestroy(&TM2)); 4809 PetscCall(MatDestroy(&TM3)); 4810 PetscCall(MatDestroy(&TM4)); 4811 PetscCall(MatDestroy(&coarse_phi_D)); 4812 PetscCall(MatDestroy(&coarse_phi_B)); 4813 if (!pcbddc->symmetric_primal) { 4814 PetscCall(MatDestroy(&coarse_psi_D)); 4815 PetscCall(MatDestroy(&coarse_psi_B)); 4816 } 4817 PetscCall(MatDestroy(&coarse_sub_mat)); 4818 } 4819 /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */ 4820 { 4821 PetscBool gpu; 4822 4823 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu)); 4824 if (gpu) { 4825 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 4826 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 4827 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 4828 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 4829 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 4830 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 4831 } 4832 } 4833 /* get back data */ 4834 *coarse_submat_vals_n = coarse_submat_vals; 4835 PetscFunctionReturn(PETSC_SUCCESS); 4836 } 4837 4838 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 4839 { 4840 Mat *work_mat; 4841 IS isrow_s, iscol_s; 4842 PetscBool rsorted, csorted; 4843 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 4844 4845 PetscFunctionBegin; 4846 PetscCall(ISSorted(isrow, &rsorted)); 4847 PetscCall(ISSorted(iscol, &csorted)); 4848 PetscCall(ISGetLocalSize(isrow, &rsize)); 4849 PetscCall(ISGetLocalSize(iscol, &csize)); 4850 4851 if (!rsorted) { 4852 const PetscInt *idxs; 4853 PetscInt *idxs_sorted, i; 4854 4855 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 4856 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 4857 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 4858 PetscCall(ISGetIndices(isrow, &idxs)); 4859 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 4860 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4861 PetscCall(ISRestoreIndices(isrow, &idxs)); 4862 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 4863 } else { 4864 PetscCall(PetscObjectReference((PetscObject)isrow)); 4865 isrow_s = isrow; 4866 } 4867 4868 if (!csorted) { 4869 if (isrow == iscol) { 4870 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 4871 iscol_s = isrow_s; 4872 } else { 4873 const PetscInt *idxs; 4874 PetscInt *idxs_sorted, i; 4875 4876 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 4877 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 4878 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 4879 PetscCall(ISGetIndices(iscol, &idxs)); 4880 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 4881 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4882 PetscCall(ISRestoreIndices(iscol, &idxs)); 4883 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 4884 } 4885 } else { 4886 PetscCall(PetscObjectReference((PetscObject)iscol)); 4887 iscol_s = iscol; 4888 } 4889 4890 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 4891 4892 if (!rsorted || !csorted) { 4893 Mat new_mat; 4894 IS is_perm_r, is_perm_c; 4895 4896 if (!rsorted) { 4897 PetscInt *idxs_r, i; 4898 PetscCall(PetscMalloc1(rsize, &idxs_r)); 4899 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 4900 PetscCall(PetscFree(idxs_perm_r)); 4901 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 4902 } else { 4903 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 4904 } 4905 PetscCall(ISSetPermutation(is_perm_r)); 4906 4907 if (!csorted) { 4908 if (isrow_s == iscol_s) { 4909 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 4910 is_perm_c = is_perm_r; 4911 } else { 4912 PetscInt *idxs_c, i; 4913 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 4914 PetscCall(PetscMalloc1(csize, &idxs_c)); 4915 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 4916 PetscCall(PetscFree(idxs_perm_c)); 4917 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 4918 } 4919 } else { 4920 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 4921 } 4922 PetscCall(ISSetPermutation(is_perm_c)); 4923 4924 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 4925 PetscCall(MatDestroy(&work_mat[0])); 4926 work_mat[0] = new_mat; 4927 PetscCall(ISDestroy(&is_perm_r)); 4928 PetscCall(ISDestroy(&is_perm_c)); 4929 } 4930 4931 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 4932 *B = work_mat[0]; 4933 PetscCall(MatDestroyMatrices(1, &work_mat)); 4934 PetscCall(ISDestroy(&isrow_s)); 4935 PetscCall(ISDestroy(&iscol_s)); 4936 PetscFunctionReturn(PETSC_SUCCESS); 4937 } 4938 4939 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4940 { 4941 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 4942 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4943 Mat new_mat, lA; 4944 IS is_local, is_global; 4945 PetscInt local_size; 4946 PetscBool isseqaij, issym, isset; 4947 4948 PetscFunctionBegin; 4949 PetscCall(MatDestroy(&pcbddc->local_mat)); 4950 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 4951 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 4952 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 4953 PetscCall(ISDestroy(&is_local)); 4954 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 4955 PetscCall(ISDestroy(&is_global)); 4956 4957 if (pcbddc->dbg_flag) { 4958 Vec x, x_change; 4959 PetscReal error; 4960 4961 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 4962 PetscCall(VecSetRandom(x, NULL)); 4963 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 4964 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4965 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 4966 PetscCall(MatMult(new_mat, matis->x, matis->y)); 4967 if (!pcbddc->change_interior) { 4968 const PetscScalar *x, *y, *v; 4969 PetscReal lerror = 0.; 4970 PetscInt i; 4971 4972 PetscCall(VecGetArrayRead(matis->x, &x)); 4973 PetscCall(VecGetArrayRead(matis->y, &y)); 4974 PetscCall(VecGetArrayRead(matis->counter, &v)); 4975 for (i = 0; i < local_size; i++) 4976 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 4977 PetscCall(VecRestoreArrayRead(matis->x, &x)); 4978 PetscCall(VecRestoreArrayRead(matis->y, &y)); 4979 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 4980 PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 4981 if (error > PETSC_SMALL) { 4982 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4983 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 4984 } else { 4985 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 4986 } 4987 } 4988 } 4989 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4990 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 4991 PetscCall(VecAXPY(x, -1.0, x_change)); 4992 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 4993 if (error > PETSC_SMALL) { 4994 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4995 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 4996 } else { 4997 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 4998 } 4999 } 5000 PetscCall(VecDestroy(&x)); 5001 PetscCall(VecDestroy(&x_change)); 5002 } 5003 5004 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5005 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5006 5007 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5008 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5009 if (isseqaij) { 5010 PetscCall(MatDestroy(&pcbddc->local_mat)); 5011 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5012 if (lA) { 5013 Mat work; 5014 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5015 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5016 PetscCall(MatDestroy(&work)); 5017 } 5018 } else { 5019 Mat work_mat; 5020 5021 PetscCall(MatDestroy(&pcbddc->local_mat)); 5022 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5023 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5024 PetscCall(MatDestroy(&work_mat)); 5025 if (lA) { 5026 Mat work; 5027 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5028 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5029 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5030 PetscCall(MatDestroy(&work)); 5031 } 5032 } 5033 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5034 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5035 PetscCall(MatDestroy(&new_mat)); 5036 PetscFunctionReturn(PETSC_SUCCESS); 5037 } 5038 5039 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5040 { 5041 PC_IS *pcis = (PC_IS *)pc->data; 5042 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5043 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5044 PetscInt *idx_R_local = NULL; 5045 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5046 PetscInt vbs, bs; 5047 PetscBT bitmask = NULL; 5048 5049 PetscFunctionBegin; 5050 /* 5051 No need to setup local scatters if 5052 - primal space is unchanged 5053 AND 5054 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5055 AND 5056 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5057 */ 5058 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5059 /* destroy old objects */ 5060 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5061 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5062 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5063 /* Set Non-overlapping dimensions */ 5064 n_B = pcis->n_B; 5065 n_D = pcis->n - n_B; 5066 n_vertices = pcbddc->n_vertices; 5067 5068 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5069 5070 /* create auxiliary bitmask and allocate workspace */ 5071 if (!sub_schurs || !sub_schurs->reuse_solver) { 5072 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5073 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5074 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5075 5076 for (i = 0, n_R = 0; i < pcis->n; i++) { 5077 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5078 } 5079 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5080 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5081 5082 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5083 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5084 } 5085 5086 /* Block code */ 5087 vbs = 1; 5088 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5089 if (bs > 1 && !(n_vertices % bs)) { 5090 PetscBool is_blocked = PETSC_TRUE; 5091 PetscInt *vary; 5092 if (!sub_schurs || !sub_schurs->reuse_solver) { 5093 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5094 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5095 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5096 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5097 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5098 for (i = 0; i < pcis->n / bs; i++) { 5099 if (vary[i] != 0 && vary[i] != bs) { 5100 is_blocked = PETSC_FALSE; 5101 break; 5102 } 5103 } 5104 PetscCall(PetscFree(vary)); 5105 } else { 5106 /* Verify directly the R set */ 5107 for (i = 0; i < n_R / bs; i++) { 5108 PetscInt j, node = idx_R_local[bs * i]; 5109 for (j = 1; j < bs; j++) { 5110 if (node != idx_R_local[bs * i + j] - j) { 5111 is_blocked = PETSC_FALSE; 5112 break; 5113 } 5114 } 5115 } 5116 } 5117 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5118 vbs = bs; 5119 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5120 } 5121 } 5122 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5123 if (sub_schurs && sub_schurs->reuse_solver) { 5124 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5125 5126 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5127 PetscCall(ISDestroy(&reuse_solver->is_R)); 5128 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5129 reuse_solver->is_R = pcbddc->is_R_local; 5130 } else { 5131 PetscCall(PetscFree(idx_R_local)); 5132 } 5133 5134 /* print some info if requested */ 5135 if (pcbddc->dbg_flag) { 5136 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5137 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5138 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5139 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5140 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5141 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5142 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5143 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5144 } 5145 5146 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5147 if (!sub_schurs || !sub_schurs->reuse_solver) { 5148 IS is_aux1, is_aux2; 5149 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5150 5151 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5152 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5153 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5154 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5155 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5156 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5157 for (i = 0, j = 0; i < n_R; i++) { 5158 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5159 } 5160 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5161 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5162 for (i = 0, j = 0; i < n_B; i++) { 5163 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5164 } 5165 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5166 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5167 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5168 PetscCall(ISDestroy(&is_aux1)); 5169 PetscCall(ISDestroy(&is_aux2)); 5170 5171 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5172 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5173 for (i = 0, j = 0; i < n_R; i++) { 5174 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5175 } 5176 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5177 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5178 PetscCall(ISDestroy(&is_aux1)); 5179 } 5180 PetscCall(PetscBTDestroy(&bitmask)); 5181 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5182 } else { 5183 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5184 IS tis; 5185 PetscInt schur_size; 5186 5187 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5188 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5189 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5190 PetscCall(ISDestroy(&tis)); 5191 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5192 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5193 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5194 PetscCall(ISDestroy(&tis)); 5195 } 5196 } 5197 PetscFunctionReturn(PETSC_SUCCESS); 5198 } 5199 5200 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5201 { 5202 MatNullSpace NullSpace; 5203 Mat dmat; 5204 const Vec *nullvecs; 5205 Vec v, v2, *nullvecs2; 5206 VecScatter sct = NULL; 5207 PetscContainer c; 5208 PetscScalar *ddata; 5209 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5210 PetscBool nnsp_has_cnst; 5211 5212 PetscFunctionBegin; 5213 if (!is && !B) { /* MATIS */ 5214 Mat_IS *matis = (Mat_IS *)A->data; 5215 5216 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5217 sct = matis->cctx; 5218 PetscCall(PetscObjectReference((PetscObject)sct)); 5219 } else { 5220 PetscCall(MatGetNullSpace(B, &NullSpace)); 5221 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5222 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5223 } 5224 PetscCall(MatGetNullSpace(A, &NullSpace)); 5225 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5226 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5227 5228 PetscCall(MatCreateVecs(A, &v, NULL)); 5229 PetscCall(MatCreateVecs(B, &v2, NULL)); 5230 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5231 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs)); 5232 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5233 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5234 PetscCall(VecGetBlockSize(v2, &bs)); 5235 PetscCall(VecGetSize(v2, &N)); 5236 PetscCall(VecGetLocalSize(v2, &n)); 5237 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5238 for (k = 0; k < nnsp_size; k++) { 5239 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5240 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5241 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5242 } 5243 if (nnsp_has_cnst) { 5244 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5245 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5246 } 5247 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5248 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5249 5250 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5251 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c)); 5252 PetscCall(PetscContainerSetPointer(c, ddata)); 5253 PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault)); 5254 PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c)); 5255 PetscCall(PetscContainerDestroy(&c)); 5256 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5257 PetscCall(MatDestroy(&dmat)); 5258 5259 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5260 PetscCall(PetscFree(nullvecs2)); 5261 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5262 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5263 PetscCall(VecDestroy(&v)); 5264 PetscCall(VecDestroy(&v2)); 5265 PetscCall(VecScatterDestroy(&sct)); 5266 PetscFunctionReturn(PETSC_SUCCESS); 5267 } 5268 5269 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5270 { 5271 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5272 PC_IS *pcis = (PC_IS *)pc->data; 5273 PC pc_temp; 5274 Mat A_RR; 5275 MatNullSpace nnsp; 5276 MatReuse reuse; 5277 PetscScalar m_one = -1.0; 5278 PetscReal value; 5279 PetscInt n_D, n_R; 5280 PetscBool issbaij, opts, isset, issym; 5281 void (*f)(void) = NULL; 5282 char dir_prefix[256], neu_prefix[256], str_level[16]; 5283 size_t len; 5284 5285 PetscFunctionBegin; 5286 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5287 /* approximate solver, propagate NearNullSpace if needed */ 5288 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5289 MatNullSpace gnnsp1, gnnsp2; 5290 PetscBool lhas, ghas; 5291 5292 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5293 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5294 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5295 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5296 PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5297 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5298 } 5299 5300 /* compute prefixes */ 5301 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5302 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5303 if (!pcbddc->current_level) { 5304 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5305 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5306 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5307 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5308 } else { 5309 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level)); 5310 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5311 len -= 15; /* remove "pc_bddc_coarse_" */ 5312 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5313 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5314 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5315 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5316 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5317 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5318 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5319 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5320 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5321 } 5322 5323 /* DIRICHLET PROBLEM */ 5324 if (dirichlet) { 5325 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5326 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5327 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5328 if (pcbddc->dbg_flag) { 5329 Mat A_IIn; 5330 5331 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5332 PetscCall(MatDestroy(&pcis->A_II)); 5333 pcis->A_II = A_IIn; 5334 } 5335 } 5336 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5337 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5338 5339 /* Matrix for Dirichlet problem is pcis->A_II */ 5340 n_D = pcis->n - pcis->n_B; 5341 opts = PETSC_FALSE; 5342 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5343 opts = PETSC_TRUE; 5344 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5345 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 5346 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5347 /* default */ 5348 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5349 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5350 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5351 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5352 if (issbaij) { 5353 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5354 } else { 5355 PetscCall(PCSetType(pc_temp, PCLU)); 5356 } 5357 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5358 } 5359 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5360 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5361 /* Allow user's customization */ 5362 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5363 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5364 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5365 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5366 } 5367 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5368 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5369 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5370 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5371 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5372 const PetscInt *idxs; 5373 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5374 5375 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5376 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5377 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5378 for (i = 0; i < nl; i++) { 5379 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5380 } 5381 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5382 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5383 PetscCall(PetscFree(scoords)); 5384 } 5385 if (sub_schurs && sub_schurs->reuse_solver) { 5386 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5387 5388 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5389 } 5390 5391 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5392 if (!n_D) { 5393 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5394 PetscCall(PCSetType(pc_temp, PCNONE)); 5395 } 5396 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5397 /* set ksp_D into pcis data */ 5398 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5399 PetscCall(KSPDestroy(&pcis->ksp_D)); 5400 pcis->ksp_D = pcbddc->ksp_D; 5401 } 5402 5403 /* NEUMANN PROBLEM */ 5404 A_RR = NULL; 5405 if (neumann) { 5406 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5407 PetscInt ibs, mbs; 5408 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5409 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5410 5411 reuse_neumann_solver = PETSC_FALSE; 5412 if (sub_schurs && sub_schurs->reuse_solver) { 5413 IS iP; 5414 5415 reuse_neumann_solver = PETSC_TRUE; 5416 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5417 if (iP) reuse_neumann_solver = PETSC_FALSE; 5418 } 5419 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5420 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5421 if (pcbddc->ksp_R) { /* already created ksp */ 5422 PetscInt nn_R; 5423 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5424 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5425 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5426 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5427 PetscCall(KSPReset(pcbddc->ksp_R)); 5428 PetscCall(MatDestroy(&A_RR)); 5429 reuse = MAT_INITIAL_MATRIX; 5430 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5431 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5432 PetscCall(MatDestroy(&A_RR)); 5433 reuse = MAT_INITIAL_MATRIX; 5434 } else { /* safe to reuse the matrix */ 5435 reuse = MAT_REUSE_MATRIX; 5436 } 5437 } 5438 /* last check */ 5439 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5440 PetscCall(MatDestroy(&A_RR)); 5441 reuse = MAT_INITIAL_MATRIX; 5442 } 5443 } else { /* first time, so we need to create the matrix */ 5444 reuse = MAT_INITIAL_MATRIX; 5445 } 5446 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5447 TODO: Get Rid of these conversions */ 5448 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5449 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5450 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5451 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5452 if (matis->A == pcbddc->local_mat) { 5453 PetscCall(MatDestroy(&pcbddc->local_mat)); 5454 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5455 } else { 5456 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5457 } 5458 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 5459 if (matis->A == pcbddc->local_mat) { 5460 PetscCall(MatDestroy(&pcbddc->local_mat)); 5461 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5462 } else { 5463 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5464 } 5465 } 5466 /* extract A_RR */ 5467 if (reuse_neumann_solver) { 5468 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5469 5470 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5471 PetscCall(MatDestroy(&A_RR)); 5472 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5473 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 5474 } else { 5475 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 5476 } 5477 } else { 5478 PetscCall(MatDestroy(&A_RR)); 5479 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 5480 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5481 } 5482 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5483 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 5484 } 5485 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5486 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 5487 opts = PETSC_FALSE; 5488 if (!pcbddc->ksp_R) { /* create object if not present */ 5489 opts = PETSC_TRUE; 5490 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 5491 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 5492 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 5493 /* default */ 5494 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 5495 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 5496 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5497 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 5498 if (issbaij) { 5499 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5500 } else { 5501 PetscCall(PCSetType(pc_temp, PCLU)); 5502 } 5503 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 5504 } 5505 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 5506 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 5507 if (opts) { /* Allow user's customization once */ 5508 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5509 } 5510 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5511 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5512 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 5513 } 5514 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5515 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5516 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5517 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5518 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5519 const PetscInt *idxs; 5520 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5521 5522 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 5523 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 5524 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5525 for (i = 0; i < nl; i++) { 5526 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5527 } 5528 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 5529 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5530 PetscCall(PetscFree(scoords)); 5531 } 5532 5533 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5534 if (!n_R) { 5535 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5536 PetscCall(PCSetType(pc_temp, PCNONE)); 5537 } 5538 /* Reuse solver if it is present */ 5539 if (reuse_neumann_solver) { 5540 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5541 5542 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 5543 } 5544 PetscCall(KSPSetUp(pcbddc->ksp_R)); 5545 } 5546 5547 if (pcbddc->dbg_flag) { 5548 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5549 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5550 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5551 } 5552 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5553 5554 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5555 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 5556 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 5557 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 5558 /* check Dirichlet and Neumann solvers */ 5559 if (pcbddc->dbg_flag) { 5560 if (dirichlet) { /* Dirichlet */ 5561 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 5562 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 5563 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 5564 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 5565 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 5566 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 5567 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value)); 5568 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5569 } 5570 if (neumann) { /* Neumann */ 5571 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 5572 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 5573 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 5574 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5575 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 5576 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 5577 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value)); 5578 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5579 } 5580 } 5581 /* free Neumann problem's matrix */ 5582 PetscCall(MatDestroy(&A_RR)); 5583 PetscFunctionReturn(PETSC_SUCCESS); 5584 } 5585 5586 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5587 { 5588 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5589 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5590 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 5591 5592 PetscFunctionBegin; 5593 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 5594 if (!pcbddc->switch_static) { 5595 if (applytranspose && pcbddc->local_auxmat1) { 5596 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 5597 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5598 } 5599 if (!reuse_solver) { 5600 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5601 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5602 } else { 5603 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5604 5605 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5606 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 5607 } 5608 } else { 5609 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5610 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5611 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5612 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5613 if (applytranspose && pcbddc->local_auxmat1) { 5614 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 5615 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 5616 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5617 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 5618 } 5619 } 5620 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5621 if (!reuse_solver || pcbddc->switch_static) { 5622 if (applytranspose) { 5623 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5624 } else { 5625 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 5626 } 5627 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 5628 } else { 5629 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5630 5631 if (applytranspose) { 5632 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5633 } else { 5634 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 5635 } 5636 } 5637 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 5638 PetscCall(VecSet(inout_B, 0.)); 5639 if (!pcbddc->switch_static) { 5640 if (!reuse_solver) { 5641 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5642 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5643 } else { 5644 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5645 5646 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5647 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 5648 } 5649 if (!applytranspose && pcbddc->local_auxmat1) { 5650 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5651 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 5652 } 5653 } else { 5654 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5655 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5656 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5657 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5658 if (!applytranspose && pcbddc->local_auxmat1) { 5659 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 5660 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 5661 } 5662 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5663 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 5664 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5665 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 5666 } 5667 PetscFunctionReturn(PETSC_SUCCESS); 5668 } 5669 5670 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5671 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5672 { 5673 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5674 PC_IS *pcis = (PC_IS *)pc->data; 5675 const PetscScalar zero = 0.0; 5676 5677 PetscFunctionBegin; 5678 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5679 if (!pcbddc->benign_apply_coarse_only) { 5680 if (applytranspose) { 5681 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 5682 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5683 } else { 5684 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 5685 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 5686 } 5687 } else { 5688 PetscCall(VecSet(pcbddc->vec1_P, zero)); 5689 } 5690 5691 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5692 if (pcbddc->benign_n) { 5693 PetscScalar *array; 5694 PetscInt j; 5695 5696 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5697 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 5698 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5699 } 5700 5701 /* start communications from local primal nodes to rhs of coarse solver */ 5702 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 5703 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 5704 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 5705 5706 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5707 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5708 if (pcbddc->coarse_ksp) { 5709 Mat coarse_mat; 5710 Vec rhs, sol; 5711 MatNullSpace nullsp; 5712 PetscBool isbddc = PETSC_FALSE; 5713 5714 if (pcbddc->benign_have_null) { 5715 PC coarse_pc; 5716 5717 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5718 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 5719 /* we need to propagate to coarser levels the need for a possible benign correction */ 5720 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5721 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data; 5722 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5723 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5724 } 5725 } 5726 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 5727 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 5728 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 5729 if (applytranspose) { 5730 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 5731 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 5732 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5733 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 5734 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5735 } else { 5736 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 5737 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5738 PC coarse_pc; 5739 5740 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 5741 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5742 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 5743 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 5744 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 5745 } else { 5746 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 5747 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 5748 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 5749 } 5750 } 5751 /* we don't need the benign correction at coarser levels anymore */ 5752 if (pcbddc->benign_have_null && isbddc) { 5753 PC coarse_pc; 5754 PC_BDDC *coarsepcbddc; 5755 5756 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 5757 coarsepcbddc = (PC_BDDC *)coarse_pc->data; 5758 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5759 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5760 } 5761 } 5762 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 5763 5764 /* Local solution on R nodes */ 5765 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 5766 /* communications from coarse sol to local primal nodes */ 5767 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 5768 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 5769 5770 /* Sum contributions from the two levels */ 5771 if (!pcbddc->benign_apply_coarse_only) { 5772 if (applytranspose) { 5773 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5774 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5775 } else { 5776 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 5777 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 5778 } 5779 /* store p0 */ 5780 if (pcbddc->benign_n) { 5781 PetscScalar *array; 5782 PetscInt j; 5783 5784 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 5785 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 5786 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 5787 } 5788 } else { /* expand the coarse solution */ 5789 if (applytranspose) { 5790 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 5791 } else { 5792 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 5793 } 5794 } 5795 PetscFunctionReturn(PETSC_SUCCESS); 5796 } 5797 5798 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 5799 { 5800 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5801 Vec from, to; 5802 const PetscScalar *array; 5803 5804 PetscFunctionBegin; 5805 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5806 from = pcbddc->coarse_vec; 5807 to = pcbddc->vec1_P; 5808 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5809 Vec tvec; 5810 5811 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5812 PetscCall(VecResetArray(tvec)); 5813 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 5814 PetscCall(VecGetArrayRead(tvec, &array)); 5815 PetscCall(VecPlaceArray(from, array)); 5816 PetscCall(VecRestoreArrayRead(tvec, &array)); 5817 } 5818 } else { /* from local to global -> put data in coarse right hand side */ 5819 from = pcbddc->vec1_P; 5820 to = pcbddc->coarse_vec; 5821 } 5822 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5823 PetscFunctionReturn(PETSC_SUCCESS); 5824 } 5825 5826 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5827 { 5828 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5829 Vec from, to; 5830 const PetscScalar *array; 5831 5832 PetscFunctionBegin; 5833 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5834 from = pcbddc->coarse_vec; 5835 to = pcbddc->vec1_P; 5836 } else { /* from local to global -> put data in coarse right hand side */ 5837 from = pcbddc->vec1_P; 5838 to = pcbddc->coarse_vec; 5839 } 5840 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 5841 if (smode == SCATTER_FORWARD) { 5842 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5843 Vec tvec; 5844 5845 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 5846 PetscCall(VecGetArrayRead(to, &array)); 5847 PetscCall(VecPlaceArray(tvec, array)); 5848 PetscCall(VecRestoreArrayRead(to, &array)); 5849 } 5850 } else { 5851 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5852 PetscCall(VecResetArray(from)); 5853 } 5854 } 5855 PetscFunctionReturn(PETSC_SUCCESS); 5856 } 5857 5858 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5859 { 5860 PC_IS *pcis = (PC_IS *)pc->data; 5861 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5862 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5863 /* one and zero */ 5864 PetscScalar one = 1.0, zero = 0.0; 5865 /* space to store constraints and their local indices */ 5866 PetscScalar *constraints_data; 5867 PetscInt *constraints_idxs, *constraints_idxs_B; 5868 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 5869 PetscInt *constraints_n; 5870 /* iterators */ 5871 PetscInt i, j, k, total_counts, total_counts_cc, cum; 5872 /* BLAS integers */ 5873 PetscBLASInt lwork, lierr; 5874 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 5875 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 5876 /* reuse */ 5877 PetscInt olocal_primal_size, olocal_primal_size_cc; 5878 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 5879 /* change of basis */ 5880 PetscBool qr_needed; 5881 PetscBT change_basis, qr_needed_idx; 5882 /* auxiliary stuff */ 5883 PetscInt *nnz, *is_indices; 5884 PetscInt ncc; 5885 /* some quantities */ 5886 PetscInt n_vertices, total_primal_vertices, valid_constraints; 5887 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 5888 PetscReal tol; /* tolerance for retaining eigenmodes */ 5889 5890 PetscFunctionBegin; 5891 tol = PetscSqrtReal(PETSC_SMALL); 5892 /* Destroy Mat objects computed previously */ 5893 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 5894 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 5895 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 5896 /* save info on constraints from previous setup (if any) */ 5897 olocal_primal_size = pcbddc->local_primal_size; 5898 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5899 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 5900 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 5901 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 5902 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 5903 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 5904 5905 if (!pcbddc->adaptive_selection) { 5906 IS ISForVertices, *ISForFaces, *ISForEdges; 5907 MatNullSpace nearnullsp; 5908 const Vec *nearnullvecs; 5909 Vec *localnearnullsp; 5910 PetscScalar *array; 5911 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 5912 PetscBool nnsp_has_cnst; 5913 /* LAPACK working arrays for SVD or POD */ 5914 PetscBool skip_lapack, boolforchange; 5915 PetscScalar *work; 5916 PetscReal *singular_vals; 5917 #if defined(PETSC_USE_COMPLEX) 5918 PetscReal *rwork; 5919 #endif 5920 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 5921 PetscBLASInt dummy_int = 1; 5922 PetscScalar dummy_scalar = 1.; 5923 PetscBool use_pod = PETSC_FALSE; 5924 5925 /* MKL SVD with same input gives different results on different processes! */ 5926 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 5927 use_pod = PETSC_TRUE; 5928 #endif 5929 /* Get index sets for faces, edges and vertices from graph */ 5930 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 5931 o_nf = n_ISForFaces; 5932 o_ne = n_ISForEdges; 5933 n_vertices = 0; 5934 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 5935 /* print some info */ 5936 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5937 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 5938 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 5939 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5940 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 5941 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 5942 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 5943 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 5944 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5945 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 5946 } 5947 5948 if (!pcbddc->use_vertices) n_vertices = 0; 5949 if (!pcbddc->use_edges) n_ISForEdges = 0; 5950 if (!pcbddc->use_faces) n_ISForFaces = 0; 5951 5952 /* check if near null space is attached to global mat */ 5953 if (pcbddc->use_nnsp) { 5954 PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 5955 } else nearnullsp = NULL; 5956 5957 if (nearnullsp) { 5958 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 5959 /* remove any stored info */ 5960 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 5961 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 5962 /* store information for BDDC solver reuse */ 5963 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 5964 pcbddc->onearnullspace = nearnullsp; 5965 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 5966 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 5967 } else { /* if near null space is not provided BDDC uses constants by default */ 5968 nnsp_size = 0; 5969 nnsp_has_cnst = PETSC_TRUE; 5970 } 5971 /* get max number of constraints on a single cc */ 5972 max_constraints = nnsp_size; 5973 if (nnsp_has_cnst) max_constraints++; 5974 5975 /* 5976 Evaluate maximum storage size needed by the procedure 5977 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5978 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5979 There can be multiple constraints per connected component 5980 */ 5981 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 5982 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 5983 5984 total_counts = n_ISForFaces + n_ISForEdges; 5985 total_counts *= max_constraints; 5986 total_counts += n_vertices; 5987 PetscCall(PetscBTCreate(total_counts, &change_basis)); 5988 5989 total_counts = 0; 5990 max_size_of_constraint = 0; 5991 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 5992 IS used_is; 5993 if (i < n_ISForEdges) { 5994 used_is = ISForEdges[i]; 5995 } else { 5996 used_is = ISForFaces[i - n_ISForEdges]; 5997 } 5998 PetscCall(ISGetSize(used_is, &j)); 5999 total_counts += j; 6000 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6001 } 6002 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6003 6004 /* get local part of global near null space vectors */ 6005 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6006 for (k = 0; k < nnsp_size; k++) { 6007 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6008 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6009 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6010 } 6011 6012 /* whether or not to skip lapack calls */ 6013 skip_lapack = PETSC_TRUE; 6014 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6015 6016 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6017 if (!skip_lapack) { 6018 PetscScalar temp_work; 6019 6020 if (use_pod) { 6021 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6022 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6023 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6024 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6025 #if defined(PETSC_USE_COMPLEX) 6026 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6027 #endif 6028 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6029 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6030 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6031 lwork = -1; 6032 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6033 #if !defined(PETSC_USE_COMPLEX) 6034 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6035 #else 6036 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6037 #endif 6038 PetscCall(PetscFPTrapPop()); 6039 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr); 6040 } else { 6041 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6042 /* SVD */ 6043 PetscInt max_n, min_n; 6044 max_n = max_size_of_constraint; 6045 min_n = max_constraints; 6046 if (max_size_of_constraint < max_constraints) { 6047 min_n = max_size_of_constraint; 6048 max_n = max_constraints; 6049 } 6050 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6051 #if defined(PETSC_USE_COMPLEX) 6052 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6053 #endif 6054 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6055 lwork = -1; 6056 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6057 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6058 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6059 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6060 #if !defined(PETSC_USE_COMPLEX) 6061 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6062 #else 6063 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6064 #endif 6065 PetscCall(PetscFPTrapPop()); 6066 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr); 6067 #else 6068 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6069 #endif /* on missing GESVD */ 6070 } 6071 /* Allocate optimal workspace */ 6072 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6073 PetscCall(PetscMalloc1(lwork, &work)); 6074 } 6075 /* Now we can loop on constraining sets */ 6076 total_counts = 0; 6077 constraints_idxs_ptr[0] = 0; 6078 constraints_data_ptr[0] = 0; 6079 /* vertices */ 6080 if (n_vertices) { 6081 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6082 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6083 for (i = 0; i < n_vertices; i++) { 6084 constraints_n[total_counts] = 1; 6085 constraints_data[total_counts] = 1.0; 6086 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6087 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6088 total_counts++; 6089 } 6090 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6091 } 6092 6093 /* edges and faces */ 6094 total_counts_cc = total_counts; 6095 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6096 IS used_is; 6097 PetscBool idxs_copied = PETSC_FALSE; 6098 6099 if (ncc < n_ISForEdges) { 6100 used_is = ISForEdges[ncc]; 6101 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6102 } else { 6103 used_is = ISForFaces[ncc - n_ISForEdges]; 6104 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6105 } 6106 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6107 6108 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6109 if (!size_of_constraint) continue; 6110 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6111 /* change of basis should not be performed on local periodic nodes */ 6112 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6113 if (nnsp_has_cnst) { 6114 PetscScalar quad_value; 6115 6116 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6117 idxs_copied = PETSC_TRUE; 6118 6119 if (!pcbddc->use_nnsp_true) { 6120 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6121 } else { 6122 quad_value = 1.0; 6123 } 6124 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6125 temp_constraints++; 6126 total_counts++; 6127 } 6128 for (k = 0; k < nnsp_size; k++) { 6129 PetscReal real_value; 6130 PetscScalar *ptr_to_data; 6131 6132 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6133 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6134 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6135 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6136 /* check if array is null on the connected component */ 6137 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6138 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6139 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6140 temp_constraints++; 6141 total_counts++; 6142 if (!idxs_copied) { 6143 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6144 idxs_copied = PETSC_TRUE; 6145 } 6146 } 6147 } 6148 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6149 valid_constraints = temp_constraints; 6150 if (!pcbddc->use_nnsp_true && temp_constraints) { 6151 if (temp_constraints == 1) { /* just normalize the constraint */ 6152 PetscScalar norm, *ptr_to_data; 6153 6154 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6155 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6156 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6157 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6158 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6159 } else { /* perform SVD */ 6160 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6161 6162 if (use_pod) { 6163 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6164 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6165 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6166 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6167 from that computed using LAPACKgesvd 6168 -> This is due to a different computation of eigenvectors in LAPACKheev 6169 -> The quality of the POD-computed basis will be the same */ 6170 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6171 /* Store upper triangular part of correlation matrix */ 6172 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6173 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6174 for (j = 0; j < temp_constraints; j++) { 6175 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6176 } 6177 /* compute eigenvalues and eigenvectors of correlation matrix */ 6178 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6179 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6180 #if !defined(PETSC_USE_COMPLEX) 6181 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6182 #else 6183 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6184 #endif 6185 PetscCall(PetscFPTrapPop()); 6186 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr); 6187 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6188 j = 0; 6189 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6190 total_counts = total_counts - j; 6191 valid_constraints = temp_constraints - j; 6192 /* scale and copy POD basis into used quadrature memory */ 6193 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6194 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6195 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6196 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6197 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6198 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6199 if (j < temp_constraints) { 6200 PetscInt ii; 6201 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6202 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6203 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6204 PetscCall(PetscFPTrapPop()); 6205 for (k = 0; k < temp_constraints - j; k++) { 6206 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6207 } 6208 } 6209 } else { 6210 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6211 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6212 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6213 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6214 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6215 #if !defined(PETSC_USE_COMPLEX) 6216 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6217 #else 6218 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6219 #endif 6220 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 6221 PetscCall(PetscFPTrapPop()); 6222 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6223 k = temp_constraints; 6224 if (k > size_of_constraint) k = size_of_constraint; 6225 j = 0; 6226 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6227 valid_constraints = k - j; 6228 total_counts = total_counts - temp_constraints + valid_constraints; 6229 #else 6230 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6231 #endif /* on missing GESVD */ 6232 } 6233 } 6234 } 6235 /* update pointers information */ 6236 if (valid_constraints) { 6237 constraints_n[total_counts_cc] = valid_constraints; 6238 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6239 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6240 /* set change_of_basis flag */ 6241 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6242 total_counts_cc++; 6243 } 6244 } 6245 /* free workspace */ 6246 if (!skip_lapack) { 6247 PetscCall(PetscFree(work)); 6248 #if defined(PETSC_USE_COMPLEX) 6249 PetscCall(PetscFree(rwork)); 6250 #endif 6251 PetscCall(PetscFree(singular_vals)); 6252 PetscCall(PetscFree(correlation_mat)); 6253 PetscCall(PetscFree(temp_basis)); 6254 } 6255 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6256 PetscCall(PetscFree(localnearnullsp)); 6257 /* free index sets of faces, edges and vertices */ 6258 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6259 } else { 6260 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6261 6262 total_counts = 0; 6263 n_vertices = 0; 6264 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6265 max_constraints = 0; 6266 total_counts_cc = 0; 6267 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6268 total_counts += pcbddc->adaptive_constraints_n[i]; 6269 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6270 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6271 } 6272 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6273 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6274 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6275 constraints_data = pcbddc->adaptive_constraints_data; 6276 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6277 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6278 total_counts_cc = 0; 6279 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6280 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6281 } 6282 6283 max_size_of_constraint = 0; 6284 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6285 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6286 /* Change of basis */ 6287 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6288 if (pcbddc->use_change_of_basis) { 6289 for (i = 0; i < sub_schurs->n_subs; i++) { 6290 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6291 } 6292 } 6293 } 6294 pcbddc->local_primal_size = total_counts; 6295 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6296 6297 /* map constraints_idxs in boundary numbering */ 6298 if (pcbddc->use_change_of_basis) { 6299 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6300 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6301 } 6302 6303 /* Create constraint matrix */ 6304 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6305 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6306 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6307 6308 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6309 /* determine if a QR strategy is needed for change of basis */ 6310 qr_needed = pcbddc->use_qr_single; 6311 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6312 total_primal_vertices = 0; 6313 pcbddc->local_primal_size_cc = 0; 6314 for (i = 0; i < total_counts_cc; i++) { 6315 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6316 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6317 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6318 pcbddc->local_primal_size_cc += 1; 6319 } else if (PetscBTLookup(change_basis, i)) { 6320 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6321 pcbddc->local_primal_size_cc += constraints_n[i]; 6322 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6323 PetscCall(PetscBTSet(qr_needed_idx, i)); 6324 qr_needed = PETSC_TRUE; 6325 } 6326 } else { 6327 pcbddc->local_primal_size_cc += 1; 6328 } 6329 } 6330 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6331 pcbddc->n_vertices = total_primal_vertices; 6332 /* permute indices in order to have a sorted set of vertices */ 6333 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6334 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6335 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6336 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6337 6338 /* nonzero structure of constraint matrix */ 6339 /* and get reference dof for local constraints */ 6340 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6341 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6342 6343 j = total_primal_vertices; 6344 total_counts = total_primal_vertices; 6345 cum = total_primal_vertices; 6346 for (i = n_vertices; i < total_counts_cc; i++) { 6347 if (!PetscBTLookup(change_basis, i)) { 6348 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6349 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6350 cum++; 6351 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6352 for (k = 0; k < constraints_n[i]; k++) { 6353 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6354 nnz[j + k] = size_of_constraint; 6355 } 6356 j += constraints_n[i]; 6357 } 6358 } 6359 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6360 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6361 PetscCall(PetscFree(nnz)); 6362 6363 /* set values in constraint matrix */ 6364 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6365 total_counts = total_primal_vertices; 6366 for (i = n_vertices; i < total_counts_cc; i++) { 6367 if (!PetscBTLookup(change_basis, i)) { 6368 PetscInt *cols; 6369 6370 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6371 cols = constraints_idxs + constraints_idxs_ptr[i]; 6372 for (k = 0; k < constraints_n[i]; k++) { 6373 PetscInt row = total_counts + k; 6374 PetscScalar *vals; 6375 6376 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6377 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6378 } 6379 total_counts += constraints_n[i]; 6380 } 6381 } 6382 /* assembling */ 6383 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6384 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6385 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6386 6387 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6388 if (pcbddc->use_change_of_basis) { 6389 /* dual and primal dofs on a single cc */ 6390 PetscInt dual_dofs, primal_dofs; 6391 /* working stuff for GEQRF */ 6392 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6393 PetscBLASInt lqr_work; 6394 /* working stuff for UNGQR */ 6395 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6396 PetscBLASInt lgqr_work; 6397 /* working stuff for TRTRS */ 6398 PetscScalar *trs_rhs = NULL; 6399 PetscBLASInt Blas_NRHS; 6400 /* pointers for values insertion into change of basis matrix */ 6401 PetscInt *start_rows, *start_cols; 6402 PetscScalar *start_vals; 6403 /* working stuff for values insertion */ 6404 PetscBT is_primal; 6405 PetscInt *aux_primal_numbering_B; 6406 /* matrix sizes */ 6407 PetscInt global_size, local_size; 6408 /* temporary change of basis */ 6409 Mat localChangeOfBasisMatrix; 6410 /* extra space for debugging */ 6411 PetscScalar *dbg_work = NULL; 6412 6413 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6414 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6415 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6416 /* nonzeros for local mat */ 6417 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6418 if (!pcbddc->benign_change || pcbddc->fake_change) { 6419 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6420 } else { 6421 const PetscInt *ii; 6422 PetscInt n; 6423 PetscBool flg_row; 6424 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6425 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6426 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6427 } 6428 for (i = n_vertices; i < total_counts_cc; i++) { 6429 if (PetscBTLookup(change_basis, i)) { 6430 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6431 if (PetscBTLookup(qr_needed_idx, i)) { 6432 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6433 } else { 6434 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6435 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6436 } 6437 } 6438 } 6439 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6440 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6441 PetscCall(PetscFree(nnz)); 6442 /* Set interior change in the matrix */ 6443 if (!pcbddc->benign_change || pcbddc->fake_change) { 6444 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 6445 } else { 6446 const PetscInt *ii, *jj; 6447 PetscScalar *aa; 6448 PetscInt n; 6449 PetscBool flg_row; 6450 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6451 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6452 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 6453 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6454 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6455 } 6456 6457 if (pcbddc->dbg_flag) { 6458 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6459 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 6460 } 6461 6462 /* Now we loop on the constraints which need a change of basis */ 6463 /* 6464 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6465 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6466 6467 Basic blocks of change of basis matrix T computed: 6468 6469 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6470 6471 | 1 0 ... 0 s_1/S | 6472 | 0 1 ... 0 s_2/S | 6473 | ... | 6474 | 0 ... 1 s_{n-1}/S | 6475 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6476 6477 with S = \sum_{i=1}^n s_i^2 6478 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6479 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6480 6481 - QR decomposition of constraints otherwise 6482 */ 6483 if (qr_needed && max_size_of_constraint) { 6484 /* space to store Q */ 6485 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 6486 /* array to store scaling factors for reflectors */ 6487 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 6488 /* first we issue queries for optimal work */ 6489 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6490 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6491 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6492 lqr_work = -1; 6493 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 6494 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr); 6495 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 6496 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 6497 lgqr_work = -1; 6498 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6499 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 6500 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 6501 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6502 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 6503 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 6504 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr); 6505 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 6506 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 6507 /* array to store rhs and solution of triangular solver */ 6508 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 6509 /* allocating workspace for check */ 6510 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 6511 } 6512 /* array to store whether a node is primal or not */ 6513 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 6514 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 6515 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 6516 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 6517 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 6518 PetscCall(PetscFree(aux_primal_numbering_B)); 6519 6520 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6521 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 6522 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 6523 if (PetscBTLookup(change_basis, total_counts)) { 6524 /* get constraint info */ 6525 primal_dofs = constraints_n[total_counts]; 6526 dual_dofs = size_of_constraint - primal_dofs; 6527 6528 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 6529 6530 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 6531 6532 /* copy quadrature constraints for change of basis check */ 6533 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6534 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6535 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6536 6537 /* compute QR decomposition of constraints */ 6538 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6539 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6540 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6541 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6542 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 6543 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr); 6544 PetscCall(PetscFPTrapPop()); 6545 6546 /* explicitly compute R^-T */ 6547 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 6548 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 6549 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6550 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 6551 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6552 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6553 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6554 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 6555 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr); 6556 PetscCall(PetscFPTrapPop()); 6557 6558 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 6559 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6560 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6561 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6562 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6563 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6564 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 6565 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr); 6566 PetscCall(PetscFPTrapPop()); 6567 6568 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6569 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6570 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6571 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6572 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 6573 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 6574 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6575 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 6576 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6577 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6578 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 6579 PetscCall(PetscFPTrapPop()); 6580 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 6581 6582 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6583 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6584 /* insert cols for primal dofs */ 6585 for (j = 0; j < primal_dofs; j++) { 6586 start_vals = &qr_basis[j * size_of_constraint]; 6587 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6588 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6589 } 6590 /* insert cols for dual dofs */ 6591 for (j = 0, k = 0; j < dual_dofs; k++) { 6592 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 6593 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 6594 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6595 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 6596 j++; 6597 } 6598 } 6599 6600 /* check change of basis */ 6601 if (pcbddc->dbg_flag) { 6602 PetscInt ii, jj; 6603 PetscBool valid_qr = PETSC_TRUE; 6604 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 6605 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6606 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 6607 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6608 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 6609 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 6610 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6611 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 6612 PetscCall(PetscFPTrapPop()); 6613 for (jj = 0; jj < size_of_constraint; jj++) { 6614 for (ii = 0; ii < primal_dofs; ii++) { 6615 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6616 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6617 } 6618 } 6619 if (!valid_qr) { 6620 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 6621 for (jj = 0; jj < size_of_constraint; jj++) { 6622 for (ii = 0; ii < primal_dofs; ii++) { 6623 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 6624 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6625 } 6626 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 6627 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 6628 } 6629 } 6630 } 6631 } else { 6632 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 6633 } 6634 } 6635 } else { /* simple transformation block */ 6636 PetscInt row, col; 6637 PetscScalar val, norm; 6638 6639 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6640 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 6641 for (j = 0; j < size_of_constraint; j++) { 6642 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 6643 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 6644 if (!PetscBTLookup(is_primal, row_B)) { 6645 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6646 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 6647 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 6648 } else { 6649 for (k = 0; k < size_of_constraint; k++) { 6650 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 6651 if (row != col) { 6652 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 6653 } else { 6654 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 6655 } 6656 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 6657 } 6658 } 6659 } 6660 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 6661 } 6662 } else { 6663 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 6664 } 6665 } 6666 6667 /* free workspace */ 6668 if (qr_needed) { 6669 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 6670 PetscCall(PetscFree(trs_rhs)); 6671 PetscCall(PetscFree(qr_tau)); 6672 PetscCall(PetscFree(qr_work)); 6673 PetscCall(PetscFree(gqr_work)); 6674 PetscCall(PetscFree(qr_basis)); 6675 } 6676 PetscCall(PetscBTDestroy(&is_primal)); 6677 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6678 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 6679 6680 /* assembling of global change of variable */ 6681 if (!pcbddc->fake_change) { 6682 Mat tmat; 6683 PetscInt bs; 6684 6685 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 6686 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 6687 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 6688 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 6689 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 6690 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 6691 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 6692 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 6693 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 6694 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 6695 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 6696 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 6697 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 6698 PetscCall(MatDestroy(&tmat)); 6699 PetscCall(VecSet(pcis->vec1_global, 0.0)); 6700 PetscCall(VecSet(pcis->vec1_N, 1.0)); 6701 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6702 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 6703 PetscCall(VecReciprocal(pcis->vec1_global)); 6704 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 6705 6706 /* check */ 6707 if (pcbddc->dbg_flag) { 6708 PetscReal error; 6709 Vec x, x_change; 6710 6711 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 6712 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 6713 PetscCall(VecSetRandom(x, NULL)); 6714 PetscCall(VecCopy(x, pcis->vec1_global)); 6715 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6716 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 6717 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 6718 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6719 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 6720 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 6721 PetscCall(VecAXPY(x, -1.0, x_change)); 6722 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 6723 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 6724 PetscCall(VecDestroy(&x)); 6725 PetscCall(VecDestroy(&x_change)); 6726 } 6727 /* adapt sub_schurs computed (if any) */ 6728 if (pcbddc->use_deluxe_scaling) { 6729 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6730 6731 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6732 if (sub_schurs && sub_schurs->S_Ej_all) { 6733 Mat S_new, tmat; 6734 IS is_all_N, is_V_Sall = NULL; 6735 6736 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 6737 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 6738 if (pcbddc->deluxe_zerorows) { 6739 ISLocalToGlobalMapping NtoSall; 6740 IS is_V; 6741 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 6742 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 6743 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 6744 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 6745 PetscCall(ISDestroy(&is_V)); 6746 } 6747 PetscCall(ISDestroy(&is_all_N)); 6748 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6749 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 6750 PetscCall(PetscObjectReference((PetscObject)S_new)); 6751 if (pcbddc->deluxe_zerorows) { 6752 const PetscScalar *array; 6753 const PetscInt *idxs_V, *idxs_all; 6754 PetscInt i, n_V; 6755 6756 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6757 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 6758 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 6759 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 6760 PetscCall(VecGetArrayRead(pcis->D, &array)); 6761 for (i = 0; i < n_V; i++) { 6762 PetscScalar val; 6763 PetscInt idx; 6764 6765 idx = idxs_V[i]; 6766 val = array[idxs_all[idxs_V[i]]]; 6767 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 6768 } 6769 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 6770 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 6771 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 6772 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 6773 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 6774 } 6775 sub_schurs->S_Ej_all = S_new; 6776 PetscCall(MatDestroy(&S_new)); 6777 if (sub_schurs->sum_S_Ej_all) { 6778 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 6779 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 6780 PetscCall(PetscObjectReference((PetscObject)S_new)); 6781 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 6782 sub_schurs->sum_S_Ej_all = S_new; 6783 PetscCall(MatDestroy(&S_new)); 6784 } 6785 PetscCall(ISDestroy(&is_V_Sall)); 6786 PetscCall(MatDestroy(&tmat)); 6787 } 6788 /* destroy any change of basis context in sub_schurs */ 6789 if (sub_schurs && sub_schurs->change) { 6790 PetscInt i; 6791 6792 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 6793 PetscCall(PetscFree(sub_schurs->change)); 6794 } 6795 } 6796 if (pcbddc->switch_static) { /* need to save the local change */ 6797 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6798 } else { 6799 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 6800 } 6801 /* determine if any process has changed the pressures locally */ 6802 pcbddc->change_interior = pcbddc->benign_have_null; 6803 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6804 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6805 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6806 pcbddc->use_qr_single = qr_needed; 6807 } 6808 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6809 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6810 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 6811 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6812 } else { 6813 Mat benign_global = NULL; 6814 if (pcbddc->benign_have_null) { 6815 Mat M; 6816 6817 pcbddc->change_interior = PETSC_TRUE; 6818 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 6819 PetscCall(VecReciprocal(pcis->vec1_N)); 6820 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 6821 if (pcbddc->benign_change) { 6822 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 6823 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 6824 } else { 6825 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 6826 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 6827 } 6828 PetscCall(MatISSetLocalMat(benign_global, M)); 6829 PetscCall(MatDestroy(&M)); 6830 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 6831 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 6832 } 6833 if (pcbddc->user_ChangeOfBasisMatrix) { 6834 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix)); 6835 PetscCall(MatDestroy(&benign_global)); 6836 } else if (pcbddc->benign_have_null) { 6837 pcbddc->ChangeOfBasisMatrix = benign_global; 6838 } 6839 } 6840 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6841 IS is_global; 6842 const PetscInt *gidxs; 6843 6844 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 6845 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 6846 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 6847 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 6848 PetscCall(ISDestroy(&is_global)); 6849 } 6850 } 6851 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 6852 6853 if (!pcbddc->fake_change) { 6854 /* add pressure dofs to set of primal nodes for numbering purposes */ 6855 for (i = 0; i < pcbddc->benign_n; i++) { 6856 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6857 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6858 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6859 pcbddc->local_primal_size_cc++; 6860 pcbddc->local_primal_size++; 6861 } 6862 6863 /* check if a new primal space has been introduced (also take into account benign trick) */ 6864 pcbddc->new_primal_space_local = PETSC_TRUE; 6865 if (olocal_primal_size == pcbddc->local_primal_size) { 6866 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6867 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6868 if (!pcbddc->new_primal_space_local) { 6869 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 6870 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6871 } 6872 } 6873 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6874 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 6875 } 6876 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 6877 6878 /* flush dbg viewer */ 6879 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6880 6881 /* free workspace */ 6882 PetscCall(PetscBTDestroy(&qr_needed_idx)); 6883 PetscCall(PetscBTDestroy(&change_basis)); 6884 if (!pcbddc->adaptive_selection) { 6885 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 6886 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 6887 } else { 6888 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 6889 PetscCall(PetscFree(constraints_n)); 6890 PetscCall(PetscFree(constraints_idxs_B)); 6891 } 6892 PetscFunctionReturn(PETSC_SUCCESS); 6893 } 6894 6895 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6896 { 6897 ISLocalToGlobalMapping map; 6898 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6899 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6900 PetscInt i, N; 6901 PetscBool rcsr = PETSC_FALSE; 6902 6903 PetscFunctionBegin; 6904 if (pcbddc->recompute_topography) { 6905 pcbddc->graphanalyzed = PETSC_FALSE; 6906 /* Reset previously computed graph */ 6907 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 6908 /* Init local Graph struct */ 6909 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 6910 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 6911 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 6912 6913 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 6914 /* Check validity of the csr graph passed in by the user */ 6915 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 6916 pcbddc->mat_graph->nvtxs); 6917 6918 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6919 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6920 PetscInt *xadj, *adjncy; 6921 PetscInt nvtxs; 6922 PetscBool flg_row = PETSC_FALSE; 6923 6924 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6925 if (flg_row) { 6926 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 6927 pcbddc->computed_rowadj = PETSC_TRUE; 6928 } 6929 PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 6930 rcsr = PETSC_TRUE; 6931 } 6932 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6933 6934 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6935 PetscReal *lcoords; 6936 PetscInt n; 6937 MPI_Datatype dimrealtype; 6938 6939 /* TODO: support for blocked */ 6940 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 6941 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6942 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 6943 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype)); 6944 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 6945 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6946 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 6947 PetscCallMPI(MPI_Type_free(&dimrealtype)); 6948 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 6949 6950 pcbddc->mat_graph->coords = lcoords; 6951 pcbddc->mat_graph->cloc = PETSC_TRUE; 6952 pcbddc->mat_graph->cnloc = n; 6953 } 6954 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 6955 pcbddc->mat_graph->nvtxs); 6956 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 6957 6958 /* Setup of Graph */ 6959 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6960 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 6961 6962 /* attach info on disconnected subdomains if present */ 6963 if (pcbddc->n_local_subs) { 6964 PetscInt *local_subs, n, totn; 6965 6966 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 6967 PetscCall(PetscMalloc1(n, &local_subs)); 6968 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 6969 for (i = 0; i < pcbddc->n_local_subs; i++) { 6970 const PetscInt *idxs; 6971 PetscInt nl, j; 6972 6973 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 6974 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 6975 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 6976 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 6977 } 6978 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 6979 pcbddc->mat_graph->n_local_subs = totn + 1; 6980 pcbddc->mat_graph->local_subs = local_subs; 6981 } 6982 } 6983 6984 if (!pcbddc->graphanalyzed) { 6985 /* Graph's connected components analysis */ 6986 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 6987 pcbddc->graphanalyzed = PETSC_TRUE; 6988 pcbddc->corner_selected = pcbddc->corner_selection; 6989 } 6990 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6991 PetscFunctionReturn(PETSC_SUCCESS); 6992 } 6993 6994 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 6995 { 6996 PetscInt i, j, n; 6997 PetscScalar *alphas; 6998 PetscReal norm, *onorms; 6999 7000 PetscFunctionBegin; 7001 n = *nio; 7002 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7003 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7004 PetscCall(VecNormalize(vecs[0], &norm)); 7005 if (norm < PETSC_SMALL) { 7006 onorms[0] = 0.0; 7007 PetscCall(VecSet(vecs[0], 0.0)); 7008 } else { 7009 onorms[0] = norm; 7010 } 7011 7012 for (i = 1; i < n; i++) { 7013 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7014 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7015 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7016 PetscCall(VecNormalize(vecs[i], &norm)); 7017 if (norm < PETSC_SMALL) { 7018 onorms[i] = 0.0; 7019 PetscCall(VecSet(vecs[i], 0.0)); 7020 } else { 7021 onorms[i] = norm; 7022 } 7023 } 7024 /* push nonzero vectors at the beginning */ 7025 for (i = 0; i < n; i++) { 7026 if (onorms[i] == 0.0) { 7027 for (j = i + 1; j < n; j++) { 7028 if (onorms[j] != 0.0) { 7029 PetscCall(VecCopy(vecs[j], vecs[i])); 7030 onorms[j] = 0.0; 7031 } 7032 } 7033 } 7034 } 7035 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7036 PetscCall(PetscFree2(alphas, onorms)); 7037 PetscFunctionReturn(PETSC_SUCCESS); 7038 } 7039 7040 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7041 { 7042 ISLocalToGlobalMapping mapping; 7043 Mat A; 7044 PetscInt n_neighs, *neighs, *n_shared, **shared; 7045 PetscMPIInt size, rank, color; 7046 PetscInt *xadj, *adjncy; 7047 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7048 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7049 PetscInt void_procs, *procs_candidates = NULL; 7050 PetscInt xadj_count, *count; 7051 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7052 PetscSubcomm psubcomm; 7053 MPI_Comm subcomm; 7054 7055 PetscFunctionBegin; 7056 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7057 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7058 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7059 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7060 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7061 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7062 7063 if (have_void) *have_void = PETSC_FALSE; 7064 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7065 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7066 PetscCall(MatISGetLocalMat(mat, &A)); 7067 PetscCall(MatGetLocalSize(A, &n, NULL)); 7068 im_active = !!n; 7069 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7070 void_procs = size - active_procs; 7071 /* get ranks of non-active processes in mat communicator */ 7072 if (void_procs) { 7073 PetscInt ncand; 7074 7075 if (have_void) *have_void = PETSC_TRUE; 7076 PetscCall(PetscMalloc1(size, &procs_candidates)); 7077 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7078 for (i = 0, ncand = 0; i < size; i++) { 7079 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7080 } 7081 /* force n_subdomains to be not greater that the number of non-active processes */ 7082 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7083 } 7084 7085 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7086 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7087 PetscCall(MatGetSize(mat, &N, NULL)); 7088 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7089 PetscInt issize, isidx, dest; 7090 if (*n_subdomains == 1) dest = 0; 7091 else dest = rank; 7092 if (im_active) { 7093 issize = 1; 7094 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7095 isidx = procs_candidates[dest]; 7096 } else { 7097 isidx = dest; 7098 } 7099 } else { 7100 issize = 0; 7101 isidx = -1; 7102 } 7103 if (*n_subdomains != 1) *n_subdomains = active_procs; 7104 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7105 PetscCall(PetscFree(procs_candidates)); 7106 PetscFunctionReturn(PETSC_SUCCESS); 7107 } 7108 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL)); 7109 PetscCall(PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL)); 7110 threshold = PetscMax(threshold, 2); 7111 7112 /* Get info on mapping */ 7113 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7114 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7115 7116 /* build local CSR graph of subdomains' connectivity */ 7117 PetscCall(PetscMalloc1(2, &xadj)); 7118 xadj[0] = 0; 7119 xadj[1] = PetscMax(n_neighs - 1, 0); 7120 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7121 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7122 PetscCall(PetscCalloc1(n, &count)); 7123 for (i = 1; i < n_neighs; i++) 7124 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7125 7126 xadj_count = 0; 7127 for (i = 1; i < n_neighs; i++) { 7128 for (j = 0; j < n_shared[i]; j++) { 7129 if (count[shared[i][j]] < threshold) { 7130 adjncy[xadj_count] = neighs[i]; 7131 adjncy_wgt[xadj_count] = n_shared[i]; 7132 xadj_count++; 7133 break; 7134 } 7135 } 7136 } 7137 xadj[1] = xadj_count; 7138 PetscCall(PetscFree(count)); 7139 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7140 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7141 7142 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7143 7144 /* Restrict work on active processes only */ 7145 PetscCall(PetscMPIIntCast(im_active, &color)); 7146 if (void_procs) { 7147 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7148 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7149 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7150 subcomm = PetscSubcommChild(psubcomm); 7151 } else { 7152 psubcomm = NULL; 7153 subcomm = PetscObjectComm((PetscObject)mat); 7154 } 7155 7156 v_wgt = NULL; 7157 if (!color) { 7158 PetscCall(PetscFree(xadj)); 7159 PetscCall(PetscFree(adjncy)); 7160 PetscCall(PetscFree(adjncy_wgt)); 7161 } else { 7162 Mat subdomain_adj; 7163 IS new_ranks, new_ranks_contig; 7164 MatPartitioning partitioner; 7165 PetscInt rstart = 0, rend = 0; 7166 PetscInt *is_indices, *oldranks; 7167 PetscMPIInt size; 7168 PetscBool aggregate; 7169 7170 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7171 if (void_procs) { 7172 PetscInt prank = rank; 7173 PetscCall(PetscMalloc1(size, &oldranks)); 7174 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7175 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7176 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7177 } else { 7178 oldranks = NULL; 7179 } 7180 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7181 if (aggregate) { /* TODO: all this part could be made more efficient */ 7182 PetscInt lrows, row, ncols, *cols; 7183 PetscMPIInt nrank; 7184 PetscScalar *vals; 7185 7186 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7187 lrows = 0; 7188 if (nrank < redprocs) { 7189 lrows = size / redprocs; 7190 if (nrank < size % redprocs) lrows++; 7191 } 7192 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7193 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7194 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7195 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7196 row = nrank; 7197 ncols = xadj[1] - xadj[0]; 7198 cols = adjncy; 7199 PetscCall(PetscMalloc1(ncols, &vals)); 7200 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7201 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7202 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7203 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7204 PetscCall(PetscFree(xadj)); 7205 PetscCall(PetscFree(adjncy)); 7206 PetscCall(PetscFree(adjncy_wgt)); 7207 PetscCall(PetscFree(vals)); 7208 if (use_vwgt) { 7209 Vec v; 7210 const PetscScalar *array; 7211 PetscInt nl; 7212 7213 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7214 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7215 PetscCall(VecAssemblyBegin(v)); 7216 PetscCall(VecAssemblyEnd(v)); 7217 PetscCall(VecGetLocalSize(v, &nl)); 7218 PetscCall(VecGetArrayRead(v, &array)); 7219 PetscCall(PetscMalloc1(nl, &v_wgt)); 7220 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7221 PetscCall(VecRestoreArrayRead(v, &array)); 7222 PetscCall(VecDestroy(&v)); 7223 } 7224 } else { 7225 PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7226 if (use_vwgt) { 7227 PetscCall(PetscMalloc1(1, &v_wgt)); 7228 v_wgt[0] = n; 7229 } 7230 } 7231 /* PetscCall(MatView(subdomain_adj,0)); */ 7232 7233 /* Partition */ 7234 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7235 #if defined(PETSC_HAVE_PTSCOTCH) 7236 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7237 #elif defined(PETSC_HAVE_PARMETIS) 7238 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7239 #else 7240 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7241 #endif 7242 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7243 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7244 *n_subdomains = PetscMin((PetscInt)size, *n_subdomains); 7245 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7246 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7247 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7248 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7249 7250 /* renumber new_ranks to avoid "holes" in new set of processors */ 7251 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7252 PetscCall(ISDestroy(&new_ranks)); 7253 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7254 if (!aggregate) { 7255 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7256 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7257 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7258 } else if (oldranks) { 7259 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7260 } else { 7261 ranks_send_to_idx[0] = is_indices[0]; 7262 } 7263 } else { 7264 PetscInt idx = 0; 7265 PetscMPIInt tag; 7266 MPI_Request *reqs; 7267 7268 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7269 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7270 for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7271 PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7272 PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE)); 7273 PetscCall(PetscFree(reqs)); 7274 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7275 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7276 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7277 } else if (oldranks) { 7278 ranks_send_to_idx[0] = oldranks[idx]; 7279 } else { 7280 ranks_send_to_idx[0] = idx; 7281 } 7282 } 7283 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7284 /* clean up */ 7285 PetscCall(PetscFree(oldranks)); 7286 PetscCall(ISDestroy(&new_ranks_contig)); 7287 PetscCall(MatDestroy(&subdomain_adj)); 7288 PetscCall(MatPartitioningDestroy(&partitioner)); 7289 } 7290 PetscCall(PetscSubcommDestroy(&psubcomm)); 7291 PetscCall(PetscFree(procs_candidates)); 7292 7293 /* assemble parallel IS for sends */ 7294 i = 1; 7295 if (!color) i = 0; 7296 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7297 PetscFunctionReturn(PETSC_SUCCESS); 7298 } 7299 7300 typedef enum { 7301 MATDENSE_PRIVATE = 0, 7302 MATAIJ_PRIVATE, 7303 MATBAIJ_PRIVATE, 7304 MATSBAIJ_PRIVATE 7305 } MatTypePrivate; 7306 7307 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7308 { 7309 Mat local_mat; 7310 IS is_sends_internal; 7311 PetscInt rows, cols, new_local_rows; 7312 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7313 PetscBool ismatis, isdense, newisdense, destroy_mat; 7314 ISLocalToGlobalMapping l2gmap; 7315 PetscInt *l2gmap_indices; 7316 const PetscInt *is_indices; 7317 MatType new_local_type; 7318 /* buffers */ 7319 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7320 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7321 PetscInt *recv_buffer_idxs_local; 7322 PetscScalar *ptr_vals, *recv_buffer_vals; 7323 const PetscScalar *send_buffer_vals; 7324 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7325 /* MPI */ 7326 MPI_Comm comm, comm_n; 7327 PetscSubcomm subcomm; 7328 PetscMPIInt n_sends, n_recvs, size; 7329 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7330 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7331 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7332 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7333 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7334 7335 PetscFunctionBegin; 7336 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7337 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7338 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7339 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7340 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7341 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7342 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7343 PetscValidLogicalCollectiveInt(mat, nis, 8); 7344 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7345 if (nvecs) { 7346 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7347 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7348 } 7349 /* further checks */ 7350 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7351 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7352 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7353 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7354 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7355 if (reuse && *mat_n) { 7356 PetscInt mrows, mcols, mnrows, mncols; 7357 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7358 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7359 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7360 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7361 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7362 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7363 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7364 } 7365 PetscCall(MatGetBlockSize(local_mat, &bs)); 7366 PetscValidLogicalCollectiveInt(mat, bs, 1); 7367 7368 /* prepare IS for sending if not provided */ 7369 if (!is_sends) { 7370 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7371 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7372 } else { 7373 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7374 is_sends_internal = is_sends; 7375 } 7376 7377 /* get comm */ 7378 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7379 7380 /* compute number of sends */ 7381 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7382 PetscCall(PetscMPIIntCast(i, &n_sends)); 7383 7384 /* compute number of receives */ 7385 PetscCallMPI(MPI_Comm_size(comm, &size)); 7386 PetscCall(PetscMalloc1(size, &iflags)); 7387 PetscCall(PetscArrayzero(iflags, size)); 7388 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7389 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7390 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7391 PetscCall(PetscFree(iflags)); 7392 7393 /* restrict comm if requested */ 7394 subcomm = NULL; 7395 destroy_mat = PETSC_FALSE; 7396 if (restrict_comm) { 7397 PetscMPIInt color, subcommsize; 7398 7399 color = 0; 7400 if (restrict_full) { 7401 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7402 } else { 7403 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7404 } 7405 PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7406 subcommsize = size - subcommsize; 7407 /* check if reuse has been requested */ 7408 if (reuse) { 7409 if (*mat_n) { 7410 PetscMPIInt subcommsize2; 7411 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7412 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7413 comm_n = PetscObjectComm((PetscObject)*mat_n); 7414 } else { 7415 comm_n = PETSC_COMM_SELF; 7416 } 7417 } else { /* MAT_INITIAL_MATRIX */ 7418 PetscMPIInt rank; 7419 7420 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7421 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7422 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7423 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7424 comm_n = PetscSubcommChild(subcomm); 7425 } 7426 /* flag to destroy *mat_n if not significative */ 7427 if (color) destroy_mat = PETSC_TRUE; 7428 } else { 7429 comm_n = comm; 7430 } 7431 7432 /* prepare send/receive buffers */ 7433 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 7434 PetscCall(PetscArrayzero(ilengths_idxs, size)); 7435 PetscCall(PetscMalloc1(size, &ilengths_vals)); 7436 PetscCall(PetscArrayzero(ilengths_vals, size)); 7437 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 7438 7439 /* Get data from local matrices */ 7440 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 7441 /* TODO: See below some guidelines on how to prepare the local buffers */ 7442 /* 7443 send_buffer_vals should contain the raw values of the local matrix 7444 send_buffer_idxs should contain: 7445 - MatType_PRIVATE type 7446 - PetscInt size_of_l2gmap 7447 - PetscInt global_row_indices[size_of_l2gmap] 7448 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7449 */ 7450 { 7451 ISLocalToGlobalMapping mapping; 7452 7453 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7454 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 7455 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 7456 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 7457 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7458 send_buffer_idxs[1] = i; 7459 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 7460 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 7461 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 7462 PetscCall(PetscMPIIntCast(i, &len)); 7463 for (i = 0; i < n_sends; i++) { 7464 ilengths_vals[is_indices[i]] = len * len; 7465 ilengths_idxs[is_indices[i]] = len + 2; 7466 } 7467 } 7468 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 7469 /* additional is (if any) */ 7470 if (nis) { 7471 PetscMPIInt psum; 7472 PetscInt j; 7473 for (j = 0, psum = 0; j < nis; j++) { 7474 PetscInt plen; 7475 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7476 PetscCall(PetscMPIIntCast(plen, &len)); 7477 psum += len + 1; /* indices + length */ 7478 } 7479 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 7480 for (j = 0, psum = 0; j < nis; j++) { 7481 PetscInt plen; 7482 const PetscInt *is_array_idxs; 7483 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7484 send_buffer_idxs_is[psum] = plen; 7485 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 7486 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 7487 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 7488 psum += plen + 1; /* indices + length */ 7489 } 7490 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 7491 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 7492 } 7493 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7494 7495 buf_size_idxs = 0; 7496 buf_size_vals = 0; 7497 buf_size_idxs_is = 0; 7498 buf_size_vecs = 0; 7499 for (i = 0; i < n_recvs; i++) { 7500 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7501 buf_size_vals += (PetscInt)olengths_vals[i]; 7502 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7503 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7504 } 7505 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 7506 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 7507 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 7508 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 7509 7510 /* get new tags for clean communications */ 7511 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 7512 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 7513 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 7514 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 7515 7516 /* allocate for requests */ 7517 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 7518 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 7519 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 7520 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 7521 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 7522 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 7523 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 7524 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 7525 7526 /* communications */ 7527 ptr_idxs = recv_buffer_idxs; 7528 ptr_vals = recv_buffer_vals; 7529 ptr_idxs_is = recv_buffer_idxs_is; 7530 ptr_vecs = recv_buffer_vecs; 7531 for (i = 0; i < n_recvs; i++) { 7532 source_dest = onodes[i]; 7533 PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i])); 7534 PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i])); 7535 ptr_idxs += olengths_idxs[i]; 7536 ptr_vals += olengths_vals[i]; 7537 if (nis) { 7538 source_dest = onodes_is[i]; 7539 PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i])); 7540 ptr_idxs_is += olengths_idxs_is[i]; 7541 } 7542 if (nvecs) { 7543 source_dest = onodes[i]; 7544 PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i])); 7545 ptr_vecs += olengths_idxs[i] - 2; 7546 } 7547 } 7548 for (i = 0; i < n_sends; i++) { 7549 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 7550 PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 7551 PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 7552 if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 7553 if (nvecs) { 7554 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7555 PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 7556 } 7557 } 7558 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 7559 PetscCall(ISDestroy(&is_sends_internal)); 7560 7561 /* assemble new l2g map */ 7562 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 7563 ptr_idxs = recv_buffer_idxs; 7564 new_local_rows = 0; 7565 for (i = 0; i < n_recvs; i++) { 7566 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7567 ptr_idxs += olengths_idxs[i]; 7568 } 7569 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 7570 ptr_idxs = recv_buffer_idxs; 7571 new_local_rows = 0; 7572 for (i = 0; i < n_recvs; i++) { 7573 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 7574 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 7575 ptr_idxs += olengths_idxs[i]; 7576 } 7577 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 7578 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 7579 PetscCall(PetscFree(l2gmap_indices)); 7580 7581 /* infer new local matrix type from received local matrices type */ 7582 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7583 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7584 if (n_recvs) { 7585 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7586 ptr_idxs = recv_buffer_idxs; 7587 for (i = 0; i < n_recvs; i++) { 7588 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7589 new_local_type_private = MATAIJ_PRIVATE; 7590 break; 7591 } 7592 ptr_idxs += olengths_idxs[i]; 7593 } 7594 switch (new_local_type_private) { 7595 case MATDENSE_PRIVATE: 7596 new_local_type = MATSEQAIJ; 7597 bs = 1; 7598 break; 7599 case MATAIJ_PRIVATE: 7600 new_local_type = MATSEQAIJ; 7601 bs = 1; 7602 break; 7603 case MATBAIJ_PRIVATE: 7604 new_local_type = MATSEQBAIJ; 7605 break; 7606 case MATSBAIJ_PRIVATE: 7607 new_local_type = MATSEQSBAIJ; 7608 break; 7609 default: 7610 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 7611 } 7612 } else { /* by default, new_local_type is seqaij */ 7613 new_local_type = MATSEQAIJ; 7614 bs = 1; 7615 } 7616 7617 /* create MATIS object if needed */ 7618 if (!reuse) { 7619 PetscCall(MatGetSize(mat, &rows, &cols)); 7620 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7621 } else { 7622 /* it also destroys the local matrices */ 7623 if (*mat_n) { 7624 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 7625 } else { /* this is a fake object */ 7626 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 7627 } 7628 } 7629 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 7630 PetscCall(MatSetType(local_mat, new_local_type)); 7631 7632 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 7633 7634 /* Global to local map of received indices */ 7635 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 7636 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 7637 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 7638 7639 /* restore attributes -> type of incoming data and its size */ 7640 buf_size_idxs = 0; 7641 for (i = 0; i < n_recvs; i++) { 7642 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7643 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 7644 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7645 } 7646 PetscCall(PetscFree(recv_buffer_idxs)); 7647 7648 /* set preallocation */ 7649 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 7650 if (!newisdense) { 7651 PetscInt *new_local_nnz = NULL; 7652 7653 ptr_idxs = recv_buffer_idxs_local; 7654 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 7655 for (i = 0; i < n_recvs; i++) { 7656 PetscInt j; 7657 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7658 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 7659 } else { 7660 /* TODO */ 7661 } 7662 ptr_idxs += olengths_idxs[i]; 7663 } 7664 if (new_local_nnz) { 7665 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 7666 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 7667 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 7668 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7669 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 7670 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 7671 } else { 7672 PetscCall(MatSetUp(local_mat)); 7673 } 7674 PetscCall(PetscFree(new_local_nnz)); 7675 } else { 7676 PetscCall(MatSetUp(local_mat)); 7677 } 7678 7679 /* set values */ 7680 ptr_vals = recv_buffer_vals; 7681 ptr_idxs = recv_buffer_idxs_local; 7682 for (i = 0; i < n_recvs; i++) { 7683 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7684 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 7685 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 7686 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 7687 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 7688 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 7689 } else { 7690 /* TODO */ 7691 } 7692 ptr_idxs += olengths_idxs[i]; 7693 ptr_vals += olengths_vals[i]; 7694 } 7695 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 7696 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 7697 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 7698 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 7699 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 7700 PetscCall(PetscFree(recv_buffer_vals)); 7701 7702 #if 0 7703 if (!restrict_comm) { /* check */ 7704 Vec lvec,rvec; 7705 PetscReal infty_error; 7706 7707 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 7708 PetscCall(VecSetRandom(rvec,NULL)); 7709 PetscCall(MatMult(mat,rvec,lvec)); 7710 PetscCall(VecScale(lvec,-1.0)); 7711 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 7712 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 7713 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 7714 PetscCall(VecDestroy(&rvec)); 7715 PetscCall(VecDestroy(&lvec)); 7716 } 7717 #endif 7718 7719 /* assemble new additional is (if any) */ 7720 if (nis) { 7721 PetscInt **temp_idxs, *count_is, j, psum; 7722 7723 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 7724 PetscCall(PetscCalloc1(nis, &count_is)); 7725 ptr_idxs = recv_buffer_idxs_is; 7726 psum = 0; 7727 for (i = 0; i < n_recvs; i++) { 7728 for (j = 0; j < nis; j++) { 7729 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7730 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7731 psum += plen; 7732 ptr_idxs += plen + 1; /* shift pointer to received data */ 7733 } 7734 } 7735 PetscCall(PetscMalloc1(nis, &temp_idxs)); 7736 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 7737 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 7738 PetscCall(PetscArrayzero(count_is, nis)); 7739 ptr_idxs = recv_buffer_idxs_is; 7740 for (i = 0; i < n_recvs; i++) { 7741 for (j = 0; j < nis; j++) { 7742 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7743 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 7744 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7745 ptr_idxs += plen + 1; /* shift pointer to received data */ 7746 } 7747 } 7748 for (i = 0; i < nis; i++) { 7749 PetscCall(ISDestroy(&isarray[i])); 7750 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 7751 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 7752 } 7753 PetscCall(PetscFree(count_is)); 7754 PetscCall(PetscFree(temp_idxs[0])); 7755 PetscCall(PetscFree(temp_idxs)); 7756 } 7757 /* free workspace */ 7758 PetscCall(PetscFree(recv_buffer_idxs_is)); 7759 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 7760 PetscCall(PetscFree(send_buffer_idxs)); 7761 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 7762 if (isdense) { 7763 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7764 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 7765 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7766 } else { 7767 /* PetscCall(PetscFree(send_buffer_vals)); */ 7768 } 7769 if (nis) { 7770 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 7771 PetscCall(PetscFree(send_buffer_idxs_is)); 7772 } 7773 7774 if (nvecs) { 7775 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 7776 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 7777 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7778 PetscCall(VecDestroy(&nnsp_vec[0])); 7779 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 7780 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 7781 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 7782 /* set values */ 7783 ptr_vals = recv_buffer_vecs; 7784 ptr_idxs = recv_buffer_idxs_local; 7785 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 7786 for (i = 0; i < n_recvs; i++) { 7787 PetscInt j; 7788 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 7789 ptr_idxs += olengths_idxs[i]; 7790 ptr_vals += olengths_idxs[i] - 2; 7791 } 7792 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 7793 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 7794 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 7795 } 7796 7797 PetscCall(PetscFree(recv_buffer_vecs)); 7798 PetscCall(PetscFree(recv_buffer_idxs_local)); 7799 PetscCall(PetscFree(recv_req_idxs)); 7800 PetscCall(PetscFree(recv_req_vals)); 7801 PetscCall(PetscFree(recv_req_vecs)); 7802 PetscCall(PetscFree(recv_req_idxs_is)); 7803 PetscCall(PetscFree(send_req_idxs)); 7804 PetscCall(PetscFree(send_req_vals)); 7805 PetscCall(PetscFree(send_req_vecs)); 7806 PetscCall(PetscFree(send_req_idxs_is)); 7807 PetscCall(PetscFree(ilengths_vals)); 7808 PetscCall(PetscFree(ilengths_idxs)); 7809 PetscCall(PetscFree(olengths_vals)); 7810 PetscCall(PetscFree(olengths_idxs)); 7811 PetscCall(PetscFree(onodes)); 7812 if (nis) { 7813 PetscCall(PetscFree(ilengths_idxs_is)); 7814 PetscCall(PetscFree(olengths_idxs_is)); 7815 PetscCall(PetscFree(onodes_is)); 7816 } 7817 PetscCall(PetscSubcommDestroy(&subcomm)); 7818 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 7819 PetscCall(MatDestroy(mat_n)); 7820 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 7821 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7822 PetscCall(VecDestroy(&nnsp_vec[0])); 7823 } 7824 *mat_n = NULL; 7825 } 7826 PetscFunctionReturn(PETSC_SUCCESS); 7827 } 7828 7829 /* temporary hack into ksp private data structure */ 7830 #include <petsc/private/kspimpl.h> 7831 7832 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals) 7833 { 7834 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7835 PC_IS *pcis = (PC_IS *)pc->data; 7836 Mat coarse_mat, coarse_mat_is, coarse_submat_dense; 7837 Mat coarsedivudotp = NULL; 7838 Mat coarseG, t_coarse_mat_is; 7839 MatNullSpace CoarseNullSpace = NULL; 7840 ISLocalToGlobalMapping coarse_islg; 7841 IS coarse_is, *isarray, corners; 7842 PetscInt i, im_active = -1, active_procs = -1; 7843 PetscInt nis, nisdofs, nisneu, nisvert; 7844 PetscInt coarse_eqs_per_proc; 7845 PC pc_temp; 7846 PCType coarse_pc_type; 7847 KSPType coarse_ksp_type; 7848 PetscBool multilevel_requested, multilevel_allowed; 7849 PetscBool coarse_reuse; 7850 PetscInt ncoarse, nedcfield; 7851 PetscBool compute_vecs = PETSC_FALSE; 7852 PetscScalar *array; 7853 MatReuse coarse_mat_reuse; 7854 PetscBool restr, full_restr, have_void; 7855 PetscMPIInt size; 7856 7857 PetscFunctionBegin; 7858 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 7859 /* Assign global numbering to coarse dofs */ 7860 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7861 PetscInt ocoarse_size; 7862 compute_vecs = PETSC_TRUE; 7863 7864 pcbddc->new_primal_space = PETSC_TRUE; 7865 ocoarse_size = pcbddc->coarse_size; 7866 PetscCall(PetscFree(pcbddc->global_primal_indices)); 7867 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 7868 /* see if we can avoid some work */ 7869 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7870 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7871 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7872 PetscCall(KSPReset(pcbddc->coarse_ksp)); 7873 coarse_reuse = PETSC_FALSE; 7874 } else { /* we can safely reuse already computed coarse matrix */ 7875 coarse_reuse = PETSC_TRUE; 7876 } 7877 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7878 coarse_reuse = PETSC_FALSE; 7879 } 7880 /* reset any subassembling information */ 7881 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 7882 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7883 coarse_reuse = PETSC_TRUE; 7884 } 7885 if (coarse_reuse && pcbddc->coarse_ksp) { 7886 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 7887 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 7888 coarse_mat_reuse = MAT_REUSE_MATRIX; 7889 } else { 7890 coarse_mat = NULL; 7891 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7892 } 7893 7894 /* creates temporary l2gmap and IS for coarse indexes */ 7895 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 7896 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 7897 7898 /* creates temporary MATIS object for coarse matrix */ 7899 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense)); 7900 PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is)); 7901 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense)); 7902 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7903 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 7904 PetscCall(MatDestroy(&coarse_submat_dense)); 7905 7906 /* count "active" (i.e. with positive local size) and "void" processes */ 7907 im_active = !!pcis->n; 7908 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7909 7910 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7911 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 7912 /* full_restr : just use the receivers from the subassembling pattern */ 7913 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 7914 coarse_mat_is = NULL; 7915 multilevel_allowed = PETSC_FALSE; 7916 multilevel_requested = PETSC_FALSE; 7917 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 7918 if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size; 7919 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7920 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 7921 if (multilevel_requested) { 7922 ncoarse = active_procs / pcbddc->coarsening_ratio; 7923 restr = PETSC_FALSE; 7924 full_restr = PETSC_FALSE; 7925 } else { 7926 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 7927 restr = PETSC_TRUE; 7928 full_restr = PETSC_TRUE; 7929 } 7930 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7931 ncoarse = PetscMax(1, ncoarse); 7932 if (!pcbddc->coarse_subassembling) { 7933 if (pcbddc->coarsening_ratio > 1) { 7934 if (multilevel_requested) { 7935 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7936 } else { 7937 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 7938 } 7939 } else { 7940 PetscMPIInt rank; 7941 7942 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 7943 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7944 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 7945 } 7946 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7947 PetscInt psum; 7948 if (pcbddc->coarse_ksp) psum = 1; 7949 else psum = 0; 7950 PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 7951 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 7952 } 7953 /* determine if we can go multilevel */ 7954 if (multilevel_requested) { 7955 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7956 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7957 } 7958 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7959 7960 /* dump subassembling pattern */ 7961 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 7962 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7963 nedcfield = -1; 7964 corners = NULL; 7965 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 7966 PetscInt *tidxs, *tidxs2, nout, tsize, i; 7967 const PetscInt *idxs; 7968 ISLocalToGlobalMapping tmap; 7969 7970 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7971 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 7972 /* allocate space for temporary storage */ 7973 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 7974 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 7975 /* allocate for IS array */ 7976 nisdofs = pcbddc->n_ISForDofsLocal; 7977 if (pcbddc->nedclocal) { 7978 if (pcbddc->nedfield > -1) { 7979 nedcfield = pcbddc->nedfield; 7980 } else { 7981 nedcfield = 0; 7982 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 7983 nisdofs = 1; 7984 } 7985 } 7986 nisneu = !!pcbddc->NeumannBoundariesLocal; 7987 nisvert = 0; /* nisvert is not used */ 7988 nis = nisdofs + nisneu + nisvert; 7989 PetscCall(PetscMalloc1(nis, &isarray)); 7990 /* dofs splitting */ 7991 for (i = 0; i < nisdofs; i++) { 7992 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 7993 if (nedcfield != i) { 7994 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 7995 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7996 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 7997 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 7998 } else { 7999 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8000 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8001 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8002 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8003 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8004 } 8005 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8006 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8007 /* PetscCall(ISView(isarray[i],0)); */ 8008 } 8009 /* neumann boundaries */ 8010 if (pcbddc->NeumannBoundariesLocal) { 8011 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8012 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8013 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8014 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8015 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8016 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8017 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8018 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8019 } 8020 /* coordinates */ 8021 if (pcbddc->corner_selected) { 8022 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8023 PetscCall(ISGetLocalSize(corners, &tsize)); 8024 PetscCall(ISGetIndices(corners, &idxs)); 8025 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8026 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8027 PetscCall(ISRestoreIndices(corners, &idxs)); 8028 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8029 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8030 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8031 } 8032 PetscCall(PetscFree(tidxs)); 8033 PetscCall(PetscFree(tidxs2)); 8034 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8035 } else { 8036 nis = 0; 8037 nisdofs = 0; 8038 nisneu = 0; 8039 nisvert = 0; 8040 isarray = NULL; 8041 } 8042 /* destroy no longer needed map */ 8043 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8044 8045 /* subassemble */ 8046 if (multilevel_allowed) { 8047 Vec vp[1]; 8048 PetscInt nvecs = 0; 8049 PetscBool reuse, reuser; 8050 8051 if (coarse_mat) reuse = PETSC_TRUE; 8052 else reuse = PETSC_FALSE; 8053 PetscCall(MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8054 vp[0] = NULL; 8055 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8056 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8057 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8058 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8059 nvecs = 1; 8060 8061 if (pcbddc->divudotp) { 8062 Mat B, loc_divudotp; 8063 Vec v, p; 8064 IS dummy; 8065 PetscInt np; 8066 8067 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8068 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8069 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8070 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8071 PetscCall(MatCreateVecs(B, &v, &p)); 8072 PetscCall(VecSet(p, 1.)); 8073 PetscCall(MatMultTranspose(B, p, v)); 8074 PetscCall(VecDestroy(&p)); 8075 PetscCall(MatDestroy(&B)); 8076 PetscCall(VecGetArray(vp[0], &array)); 8077 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8078 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8079 PetscCall(VecResetArray(pcbddc->vec1_P)); 8080 PetscCall(VecRestoreArray(vp[0], &array)); 8081 PetscCall(ISDestroy(&dummy)); 8082 PetscCall(VecDestroy(&v)); 8083 } 8084 } 8085 if (reuser) { 8086 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8087 } else { 8088 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8089 } 8090 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8091 PetscScalar *arraym; 8092 const PetscScalar *arrayv; 8093 PetscInt nl; 8094 PetscCall(VecGetLocalSize(vp[0], &nl)); 8095 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8096 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8097 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8098 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8099 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8100 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8101 PetscCall(VecDestroy(&vp[0])); 8102 } else { 8103 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8104 } 8105 } else { 8106 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8107 } 8108 if (coarse_mat_is || coarse_mat) { 8109 if (!multilevel_allowed) { 8110 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8111 } else { 8112 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8113 if (coarse_mat_is) { 8114 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8115 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8116 coarse_mat = coarse_mat_is; 8117 } 8118 } 8119 } 8120 PetscCall(MatDestroy(&t_coarse_mat_is)); 8121 PetscCall(MatDestroy(&coarse_mat_is)); 8122 8123 /* create local to global scatters for coarse problem */ 8124 if (compute_vecs) { 8125 PetscInt lrows; 8126 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8127 if (coarse_mat) { 8128 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8129 } else { 8130 lrows = 0; 8131 } 8132 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8133 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8134 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8135 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8136 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8137 } 8138 PetscCall(ISDestroy(&coarse_is)); 8139 8140 /* set defaults for coarse KSP and PC */ 8141 if (multilevel_allowed) { 8142 coarse_ksp_type = KSPRICHARDSON; 8143 coarse_pc_type = PCBDDC; 8144 } else { 8145 coarse_ksp_type = KSPPREONLY; 8146 coarse_pc_type = PCREDUNDANT; 8147 } 8148 8149 /* print some info if requested */ 8150 if (pcbddc->dbg_flag) { 8151 if (!multilevel_allowed) { 8152 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8153 if (multilevel_requested) { 8154 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio)); 8155 } else if (pcbddc->max_levels) { 8156 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8157 } 8158 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8159 } 8160 } 8161 8162 /* communicate coarse discrete gradient */ 8163 coarseG = NULL; 8164 if (pcbddc->nedcG && multilevel_allowed) { 8165 MPI_Comm ccomm; 8166 if (coarse_mat) { 8167 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8168 } else { 8169 ccomm = MPI_COMM_NULL; 8170 } 8171 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8172 } 8173 8174 /* create the coarse KSP object only once with defaults */ 8175 if (coarse_mat) { 8176 PetscBool isredundant, isbddc, force, valid; 8177 PetscViewer dbg_viewer = NULL; 8178 PetscBool isset, issym, isher, isspd; 8179 8180 if (pcbddc->dbg_flag) { 8181 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8182 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8183 } 8184 if (!pcbddc->coarse_ksp) { 8185 char prefix[256], str_level[16]; 8186 size_t len; 8187 8188 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8189 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8190 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8191 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8192 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1)); 8193 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8194 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8195 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8196 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8197 /* TODO is this logic correct? should check for coarse_mat type */ 8198 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8199 /* prefix */ 8200 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8201 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8202 if (!pcbddc->current_level) { 8203 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8204 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8205 } else { 8206 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8207 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8208 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8209 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8210 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8211 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level)); 8212 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8213 } 8214 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8215 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8216 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8217 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8218 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8219 /* allow user customization */ 8220 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8221 /* get some info after set from options */ 8222 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8223 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8224 force = PETSC_FALSE; 8225 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8226 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8227 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8228 if (multilevel_allowed && !force && !valid) { 8229 isbddc = PETSC_TRUE; 8230 PetscCall(PCSetType(pc_temp, PCBDDC)); 8231 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8232 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8233 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8234 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8235 PetscObjectOptionsBegin((PetscObject)pc_temp); 8236 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8237 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8238 PetscOptionsEnd(); 8239 pc_temp->setfromoptionscalled++; 8240 } 8241 } 8242 } 8243 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8244 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8245 if (nisdofs) { 8246 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8247 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8248 } 8249 if (nisneu) { 8250 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8251 PetscCall(ISDestroy(&isarray[nisdofs])); 8252 } 8253 if (nisvert) { 8254 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8255 PetscCall(ISDestroy(&isarray[nis - 1])); 8256 } 8257 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8258 8259 /* get some info after set from options */ 8260 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8261 8262 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8263 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8264 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8265 force = PETSC_FALSE; 8266 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8267 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8268 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8269 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8270 if (isredundant) { 8271 KSP inner_ksp; 8272 PC inner_pc; 8273 8274 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8275 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8276 } 8277 8278 /* parameters which miss an API */ 8279 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8280 if (isbddc) { 8281 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8282 8283 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8284 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8285 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8286 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8287 if (pcbddc_coarse->benign_saddle_point) { 8288 Mat coarsedivudotp_is; 8289 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8290 IS row, col; 8291 const PetscInt *gidxs; 8292 PetscInt n, st, M, N; 8293 8294 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8295 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8296 st = st - n; 8297 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8298 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8299 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8300 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8301 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8302 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8303 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8304 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8305 PetscCall(ISGetSize(row, &M)); 8306 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8307 PetscCall(ISDestroy(&row)); 8308 PetscCall(ISDestroy(&col)); 8309 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8310 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8311 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8312 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8313 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8314 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8315 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8316 PetscCall(MatDestroy(&coarsedivudotp)); 8317 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8318 PetscCall(MatDestroy(&coarsedivudotp_is)); 8319 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8320 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8321 } 8322 } 8323 8324 /* propagate symmetry info of coarse matrix */ 8325 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8326 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8327 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8328 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8329 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8330 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8331 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8332 8333 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8334 /* set operators */ 8335 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8336 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8337 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8338 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8339 } 8340 PetscCall(MatDestroy(&coarseG)); 8341 PetscCall(PetscFree(isarray)); 8342 #if 0 8343 { 8344 PetscViewer viewer; 8345 char filename[256]; 8346 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 8347 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8348 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8349 PetscCall(MatView(coarse_mat,viewer)); 8350 PetscCall(PetscViewerPopFormat(viewer)); 8351 PetscCall(PetscViewerDestroy(&viewer)); 8352 } 8353 #endif 8354 8355 if (corners) { 8356 Vec gv; 8357 IS is; 8358 const PetscInt *idxs; 8359 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8360 PetscScalar *coords; 8361 8362 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8363 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8364 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8365 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8366 PetscCall(VecSetBlockSize(gv, cdim)); 8367 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8368 PetscCall(VecSetType(gv, VECSTANDARD)); 8369 PetscCall(VecSetFromOptions(gv)); 8370 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8371 8372 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8373 PetscCall(ISGetLocalSize(is, &n)); 8374 PetscCall(ISGetIndices(is, &idxs)); 8375 PetscCall(PetscMalloc1(n * cdim, &coords)); 8376 for (i = 0; i < n; i++) { 8377 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 8378 } 8379 PetscCall(ISRestoreIndices(is, &idxs)); 8380 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8381 8382 PetscCall(ISGetLocalSize(corners, &n)); 8383 PetscCall(ISGetIndices(corners, &idxs)); 8384 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8385 PetscCall(ISRestoreIndices(corners, &idxs)); 8386 PetscCall(PetscFree(coords)); 8387 PetscCall(VecAssemblyBegin(gv)); 8388 PetscCall(VecAssemblyEnd(gv)); 8389 PetscCall(VecGetArray(gv, &coords)); 8390 if (pcbddc->coarse_ksp) { 8391 PC coarse_pc; 8392 PetscBool isbddc; 8393 8394 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8395 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8396 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8397 PetscReal *realcoords; 8398 8399 PetscCall(VecGetLocalSize(gv, &n)); 8400 #if defined(PETSC_USE_COMPLEX) 8401 PetscCall(PetscMalloc1(n, &realcoords)); 8402 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8403 #else 8404 realcoords = coords; 8405 #endif 8406 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8407 #if defined(PETSC_USE_COMPLEX) 8408 PetscCall(PetscFree(realcoords)); 8409 #endif 8410 } 8411 } 8412 PetscCall(VecRestoreArray(gv, &coords)); 8413 PetscCall(VecDestroy(&gv)); 8414 } 8415 PetscCall(ISDestroy(&corners)); 8416 8417 if (pcbddc->coarse_ksp) { 8418 Vec crhs, csol; 8419 8420 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 8421 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 8422 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL)); 8423 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs)); 8424 } 8425 PetscCall(MatDestroy(&coarsedivudotp)); 8426 8427 /* compute null space for coarse solver if the benign trick has been requested */ 8428 if (pcbddc->benign_null) { 8429 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 8430 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 8431 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8432 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8433 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8434 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8435 if (coarse_mat) { 8436 Vec nullv; 8437 PetscScalar *array, *array2; 8438 PetscInt nl; 8439 8440 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 8441 PetscCall(VecGetLocalSize(nullv, &nl)); 8442 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8443 PetscCall(VecGetArray(nullv, &array2)); 8444 PetscCall(PetscArraycpy(array2, array, nl)); 8445 PetscCall(VecRestoreArray(nullv, &array2)); 8446 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8447 PetscCall(VecNormalize(nullv, NULL)); 8448 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 8449 PetscCall(VecDestroy(&nullv)); 8450 } 8451 } 8452 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8453 8454 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8455 if (pcbddc->coarse_ksp) { 8456 PetscBool ispreonly; 8457 8458 if (CoarseNullSpace) { 8459 PetscBool isnull; 8460 8461 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 8462 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 8463 /* TODO: add local nullspaces (if any) */ 8464 } 8465 /* setup coarse ksp */ 8466 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8467 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8468 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 8469 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8470 KSP check_ksp; 8471 KSPType check_ksp_type; 8472 PC check_pc; 8473 Vec check_vec, coarse_vec; 8474 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 8475 PetscInt its; 8476 PetscBool compute_eigs; 8477 PetscReal *eigs_r, *eigs_c; 8478 PetscInt neigs; 8479 const char *prefix; 8480 8481 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8482 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 8483 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 8484 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 8485 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 8486 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 8487 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size)); 8488 /* prevent from setup unneeded object */ 8489 PetscCall(KSPGetPC(check_ksp, &check_pc)); 8490 PetscCall(PCSetType(check_pc, PCNONE)); 8491 if (ispreonly) { 8492 check_ksp_type = KSPPREONLY; 8493 compute_eigs = PETSC_FALSE; 8494 } else { 8495 check_ksp_type = KSPGMRES; 8496 compute_eigs = PETSC_TRUE; 8497 } 8498 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 8499 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 8500 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 8501 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 8502 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 8503 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 8504 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 8505 PetscCall(KSPSetFromOptions(check_ksp)); 8506 PetscCall(KSPSetUp(check_ksp)); 8507 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 8508 PetscCall(KSPSetPC(check_ksp, check_pc)); 8509 /* create random vec */ 8510 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 8511 PetscCall(VecSetRandom(check_vec, NULL)); 8512 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8513 /* solve coarse problem */ 8514 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 8515 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 8516 /* set eigenvalue estimation if preonly has not been requested */ 8517 if (compute_eigs) { 8518 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 8519 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 8520 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 8521 if (neigs) { 8522 lambda_max = eigs_r[neigs - 1]; 8523 lambda_min = eigs_r[0]; 8524 if (pcbddc->use_coarse_estimates) { 8525 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8526 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 8527 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 8528 } 8529 } 8530 } 8531 } 8532 8533 /* check coarse problem residual error */ 8534 if (pcbddc->dbg_flag) { 8535 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8536 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8537 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 8538 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 8539 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 8540 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 8541 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 8542 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer)); 8543 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer)); 8544 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 8545 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 8546 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 8547 if (compute_eigs) { 8548 PetscReal lambda_max_s, lambda_min_s; 8549 KSPConvergedReason reason; 8550 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 8551 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 8552 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 8553 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 8554 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 8555 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 8556 } 8557 PetscCall(PetscViewerFlush(dbg_viewer)); 8558 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 8559 } 8560 PetscCall(VecDestroy(&check_vec)); 8561 PetscCall(VecDestroy(&coarse_vec)); 8562 PetscCall(KSPDestroy(&check_ksp)); 8563 if (compute_eigs) { 8564 PetscCall(PetscFree(eigs_r)); 8565 PetscCall(PetscFree(eigs_c)); 8566 } 8567 } 8568 } 8569 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 8570 /* print additional info */ 8571 if (pcbddc->dbg_flag) { 8572 /* waits until all processes reaches this point */ 8573 PetscCall(PetscBarrier((PetscObject)pc)); 8574 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 8575 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8576 } 8577 8578 /* free memory */ 8579 PetscCall(MatDestroy(&coarse_mat)); 8580 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8581 PetscFunctionReturn(PETSC_SUCCESS); 8582 } 8583 8584 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 8585 { 8586 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8587 PC_IS *pcis = (PC_IS *)pc->data; 8588 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 8589 IS subset, subset_mult, subset_n; 8590 PetscInt local_size, coarse_size = 0; 8591 PetscInt *local_primal_indices = NULL; 8592 const PetscInt *t_local_primal_indices; 8593 8594 PetscFunctionBegin; 8595 /* Compute global number of coarse dofs */ 8596 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 8597 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 8598 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 8599 PetscCall(ISDestroy(&subset_n)); 8600 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 8601 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 8602 PetscCall(ISDestroy(&subset)); 8603 PetscCall(ISDestroy(&subset_mult)); 8604 PetscCall(ISGetLocalSize(subset_n, &local_size)); 8605 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 8606 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 8607 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 8608 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 8609 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 8610 PetscCall(ISDestroy(&subset_n)); 8611 8612 /* check numbering */ 8613 if (pcbddc->dbg_flag) { 8614 PetscScalar coarsesum, *array, *array2; 8615 PetscInt i; 8616 PetscBool set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE; 8617 8618 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8619 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8620 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n")); 8621 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8622 /* counter */ 8623 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8624 PetscCall(VecSet(pcis->vec1_N, 1.0)); 8625 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8626 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8627 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8628 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD)); 8629 PetscCall(VecSet(pcis->vec1_N, 0.0)); 8630 for (i = 0; i < pcbddc->local_primal_size; i++) PetscCall(VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES)); 8631 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 8632 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 8633 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8634 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8635 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8636 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8637 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 8638 PetscCall(VecGetArray(pcis->vec1_N, &array)); 8639 PetscCall(VecGetArray(pcis->vec2_N, &array2)); 8640 for (i = 0; i < pcis->n; i++) { 8641 if (array[i] != 0.0 && array[i] != array2[i]) { 8642 PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi; 8643 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8644 set_error = PETSC_TRUE; 8645 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi)); 8646 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh)); 8647 } 8648 } 8649 PetscCall(VecRestoreArray(pcis->vec2_N, &array2)); 8650 PetscCall(MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8651 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8652 for (i = 0; i < pcis->n; i++) { 8653 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]); 8654 } 8655 PetscCall(VecRestoreArray(pcis->vec1_N, &array)); 8656 PetscCall(VecSet(pcis->vec1_global, 0.0)); 8657 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8658 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 8659 PetscCall(VecSum(pcis->vec1_global, &coarsesum)); 8660 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum))); 8661 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8662 PetscInt *gidxs; 8663 8664 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &gidxs)); 8665 PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs)); 8666 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n")); 8667 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8668 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank)); 8669 for (i = 0; i < pcbddc->local_primal_size; i++) { 8670 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i])); 8671 } 8672 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8673 PetscCall(PetscFree(gidxs)); 8674 } 8675 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8676 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8677 PetscCheck(!set_error_reduced, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "BDDC Numbering of coarse dofs failed"); 8678 } 8679 8680 /* get back data */ 8681 *coarse_size_n = coarse_size; 8682 *local_primal_indices_n = local_primal_indices; 8683 PetscFunctionReturn(PETSC_SUCCESS); 8684 } 8685 8686 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 8687 { 8688 IS localis_t; 8689 PetscInt i, lsize, *idxs, n; 8690 PetscScalar *vals; 8691 8692 PetscFunctionBegin; 8693 /* get indices in local ordering exploiting local to global map */ 8694 PetscCall(ISGetLocalSize(globalis, &lsize)); 8695 PetscCall(PetscMalloc1(lsize, &vals)); 8696 for (i = 0; i < lsize; i++) vals[i] = 1.0; 8697 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 8698 PetscCall(VecSet(gwork, 0.0)); 8699 PetscCall(VecSet(lwork, 0.0)); 8700 if (idxs) { /* multilevel guard */ 8701 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 8702 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 8703 } 8704 PetscCall(VecAssemblyBegin(gwork)); 8705 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 8706 PetscCall(PetscFree(vals)); 8707 PetscCall(VecAssemblyEnd(gwork)); 8708 /* now compute set in local ordering */ 8709 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8710 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 8711 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 8712 PetscCall(VecGetSize(lwork, &n)); 8713 for (i = 0, lsize = 0; i < n; i++) { 8714 if (PetscRealPart(vals[i]) > 0.5) lsize++; 8715 } 8716 PetscCall(PetscMalloc1(lsize, &idxs)); 8717 for (i = 0, lsize = 0; i < n; i++) { 8718 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 8719 } 8720 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 8721 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 8722 *localis = localis_t; 8723 PetscFunctionReturn(PETSC_SUCCESS); 8724 } 8725 8726 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 8727 { 8728 PC_IS *pcis = (PC_IS *)pc->data; 8729 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8730 PC_IS *pcisf; 8731 PC_BDDC *pcbddcf; 8732 PC pcf; 8733 8734 PetscFunctionBegin; 8735 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 8736 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 8737 PetscCall(PCSetType(pcf, PCBDDC)); 8738 8739 pcisf = (PC_IS *)pcf->data; 8740 pcbddcf = (PC_BDDC *)pcf->data; 8741 8742 pcisf->is_B_local = pcis->is_B_local; 8743 pcisf->vec1_N = pcis->vec1_N; 8744 pcisf->BtoNmap = pcis->BtoNmap; 8745 pcisf->n = pcis->n; 8746 pcisf->n_B = pcis->n_B; 8747 8748 PetscCall(PetscFree(pcbddcf->mat_graph)); 8749 PetscCall(PetscFree(pcbddcf->sub_schurs)); 8750 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 8751 pcbddcf->sub_schurs = schurs; 8752 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 8753 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 8754 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 8755 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 8756 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 8757 pcbddcf->use_faces = PETSC_TRUE; 8758 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 8759 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 8760 pcbddcf->use_qr_single = (PetscBool)!constraints; 8761 pcbddcf->fake_change = PETSC_TRUE; 8762 pcbddcf->dbg_flag = pcbddc->dbg_flag; 8763 8764 PetscCall(PCBDDCAdaptiveSelection(pcf)); 8765 PetscCall(PCBDDCConstraintsSetUp(pcf)); 8766 8767 *change = pcbddcf->ConstraintMatrix; 8768 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 8769 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 8770 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 8771 8772 if (schurs) pcbddcf->sub_schurs = NULL; 8773 pcbddcf->ConstraintMatrix = NULL; 8774 pcbddcf->mat_graph = NULL; 8775 pcisf->is_B_local = NULL; 8776 pcisf->vec1_N = NULL; 8777 pcisf->BtoNmap = NULL; 8778 PetscCall(PCDestroy(&pcf)); 8779 PetscFunctionReturn(PETSC_SUCCESS); 8780 } 8781 8782 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8783 { 8784 PC_IS *pcis = (PC_IS *)pc->data; 8785 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8786 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 8787 Mat S_j; 8788 PetscInt *used_xadj, *used_adjncy; 8789 PetscBool free_used_adj; 8790 8791 PetscFunctionBegin; 8792 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8793 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8794 free_used_adj = PETSC_FALSE; 8795 if (pcbddc->sub_schurs_layers == -1) { 8796 used_xadj = NULL; 8797 used_adjncy = NULL; 8798 } else { 8799 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8800 used_xadj = pcbddc->mat_graph->xadj; 8801 used_adjncy = pcbddc->mat_graph->adjncy; 8802 } else if (pcbddc->computed_rowadj) { 8803 used_xadj = pcbddc->mat_graph->xadj; 8804 used_adjncy = pcbddc->mat_graph->adjncy; 8805 } else { 8806 PetscBool flg_row = PETSC_FALSE; 8807 const PetscInt *xadj, *adjncy; 8808 PetscInt nvtxs; 8809 8810 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8811 if (flg_row) { 8812 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 8813 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 8814 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 8815 free_used_adj = PETSC_TRUE; 8816 } else { 8817 pcbddc->sub_schurs_layers = -1; 8818 used_xadj = NULL; 8819 used_adjncy = NULL; 8820 } 8821 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 8822 } 8823 } 8824 8825 /* setup sub_schurs data */ 8826 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 8827 if (!sub_schurs->schur_explicit) { 8828 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8829 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 8830 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 8831 } else { 8832 Mat change = NULL; 8833 Vec scaling = NULL; 8834 IS change_primal = NULL, iP; 8835 PetscInt benign_n; 8836 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8837 PetscBool need_change = PETSC_FALSE; 8838 PetscBool discrete_harmonic = PETSC_FALSE; 8839 8840 if (!pcbddc->use_vertices && reuse_solvers) { 8841 PetscInt n_vertices; 8842 8843 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 8844 reuse_solvers = (PetscBool)!n_vertices; 8845 } 8846 if (!pcbddc->benign_change_explicit) { 8847 benign_n = pcbddc->benign_n; 8848 } else { 8849 benign_n = 0; 8850 } 8851 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8852 We need a global reduction to avoid possible deadlocks. 8853 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8854 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8855 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8856 PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8857 need_change = (PetscBool)(!need_change); 8858 } 8859 /* If the user defines additional constraints, we import them here */ 8860 if (need_change) { 8861 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 8862 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 8863 } 8864 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8865 8866 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 8867 if (iP) { 8868 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 8869 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 8870 PetscOptionsEnd(); 8871 } 8872 if (discrete_harmonic) { 8873 Mat A; 8874 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 8875 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 8876 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 8877 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 8878 pcbddc->benign_zerodiag_subs, change, change_primal)); 8879 PetscCall(MatDestroy(&A)); 8880 } else { 8881 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 8882 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 8883 } 8884 PetscCall(MatDestroy(&change)); 8885 PetscCall(ISDestroy(&change_primal)); 8886 } 8887 PetscCall(MatDestroy(&S_j)); 8888 8889 /* free adjacency */ 8890 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 8891 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 8892 PetscFunctionReturn(PETSC_SUCCESS); 8893 } 8894 8895 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8896 { 8897 PC_IS *pcis = (PC_IS *)pc->data; 8898 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8899 PCBDDCGraph graph; 8900 8901 PetscFunctionBegin; 8902 /* attach interface graph for determining subsets */ 8903 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8904 IS verticesIS, verticescomm; 8905 PetscInt vsize, *idxs; 8906 8907 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8908 PetscCall(ISGetSize(verticesIS, &vsize)); 8909 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 8910 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 8911 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 8912 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 8913 PetscCall(PCBDDCGraphCreate(&graph)); 8914 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 8915 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 8916 PetscCall(ISDestroy(&verticescomm)); 8917 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 8918 } else { 8919 graph = pcbddc->mat_graph; 8920 } 8921 /* print some info */ 8922 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8923 IS vertices; 8924 PetscInt nv, nedges, nfaces; 8925 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 8926 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8927 PetscCall(ISGetSize(vertices, &nv)); 8928 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 8929 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 8930 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 8931 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 8932 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 8933 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8934 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 8935 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 8936 } 8937 8938 /* sub_schurs init */ 8939 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 8940 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 8941 8942 /* free graph struct */ 8943 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 8944 PetscFunctionReturn(PETSC_SUCCESS); 8945 } 8946 8947 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8948 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8949 { 8950 Mat At; 8951 IS rows; 8952 PetscInt rst, ren; 8953 PetscLayout rmap; 8954 8955 PetscFunctionBegin; 8956 rst = ren = 0; 8957 if (ccomm != MPI_COMM_NULL) { 8958 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 8959 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 8960 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 8961 PetscCall(PetscLayoutSetUp(rmap)); 8962 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 8963 } 8964 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 8965 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 8966 PetscCall(ISDestroy(&rows)); 8967 8968 if (ccomm != MPI_COMM_NULL) { 8969 Mat_MPIAIJ *a, *b; 8970 IS from, to; 8971 Vec gvec; 8972 PetscInt lsize; 8973 8974 PetscCall(MatCreate(ccomm, B)); 8975 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 8976 PetscCall(MatSetType(*B, MATAIJ)); 8977 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 8978 PetscCall(PetscLayoutSetUp((*B)->cmap)); 8979 a = (Mat_MPIAIJ *)At->data; 8980 b = (Mat_MPIAIJ *)(*B)->data; 8981 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 8982 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 8983 PetscCall(PetscObjectReference((PetscObject)a->A)); 8984 PetscCall(PetscObjectReference((PetscObject)a->B)); 8985 b->A = a->A; 8986 b->B = a->B; 8987 8988 b->donotstash = a->donotstash; 8989 b->roworiented = a->roworiented; 8990 b->rowindices = NULL; 8991 b->rowvalues = NULL; 8992 b->getrowactive = PETSC_FALSE; 8993 8994 (*B)->rmap = rmap; 8995 (*B)->factortype = A->factortype; 8996 (*B)->assembled = PETSC_TRUE; 8997 (*B)->insertmode = NOT_SET_VALUES; 8998 (*B)->preallocated = PETSC_TRUE; 8999 9000 if (a->colmap) { 9001 #if defined(PETSC_USE_CTABLE) 9002 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9003 #else 9004 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9005 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9006 #endif 9007 } else b->colmap = NULL; 9008 if (a->garray) { 9009 PetscInt len; 9010 len = a->B->cmap->n; 9011 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9012 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9013 } else b->garray = NULL; 9014 9015 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9016 b->lvec = a->lvec; 9017 9018 /* cannot use VecScatterCopy */ 9019 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9020 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9021 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9022 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9023 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9024 PetscCall(ISDestroy(&from)); 9025 PetscCall(ISDestroy(&to)); 9026 PetscCall(VecDestroy(&gvec)); 9027 } 9028 PetscCall(MatDestroy(&At)); 9029 PetscFunctionReturn(PETSC_SUCCESS); 9030 } 9031